{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999804522056428, "eval_steps": 500, "global_step": 1918, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005212745161920897, "grad_norm": 5.11075803301647, "learning_rate": 5.208333333333333e-08, "loss": 0.4124, "step": 1 }, { "epoch": 0.0010425490323841794, "grad_norm": 5.17436422800092, "learning_rate": 1.0416666666666667e-07, "loss": 0.4261, "step": 2 }, { "epoch": 0.001563823548576269, "grad_norm": 5.345440991325531, "learning_rate": 1.5625e-07, "loss": 0.4092, "step": 3 }, { "epoch": 0.0020850980647683587, "grad_norm": 4.900834347782274, "learning_rate": 2.0833333333333333e-07, "loss": 0.3987, "step": 4 }, { "epoch": 0.002606372580960448, "grad_norm": 5.075426426426963, "learning_rate": 2.604166666666667e-07, "loss": 0.4086, "step": 5 }, { "epoch": 0.003127647097152538, "grad_norm": 4.851137328642362, "learning_rate": 3.125e-07, "loss": 0.3814, "step": 6 }, { "epoch": 0.0036489216133446275, "grad_norm": 4.849044652089594, "learning_rate": 3.6458333333333337e-07, "loss": 0.3795, "step": 7 }, { "epoch": 0.0041701961295367174, "grad_norm": 4.5784045386773755, "learning_rate": 4.1666666666666667e-07, "loss": 0.4008, "step": 8 }, { "epoch": 0.004691470645728807, "grad_norm": 4.7682806495033745, "learning_rate": 4.6875000000000006e-07, "loss": 0.4106, "step": 9 }, { "epoch": 0.005212745161920896, "grad_norm": 4.328666361858364, "learning_rate": 5.208333333333334e-07, "loss": 0.3986, "step": 10 }, { "epoch": 0.005734019678112986, "grad_norm": 4.283851136233341, "learning_rate": 5.729166666666667e-07, "loss": 0.4031, "step": 11 }, { "epoch": 0.006255294194305076, "grad_norm": 4.131111662127315, "learning_rate": 6.25e-07, "loss": 0.3776, "step": 12 }, { "epoch": 0.006776568710497166, "grad_norm": 3.759641197331778, "learning_rate": 6.770833333333333e-07, "loss": 0.3865, "step": 13 }, { "epoch": 0.007297843226689255, "grad_norm": 3.451804643766274, "learning_rate": 7.291666666666667e-07, "loss": 0.3748, "step": 14 }, { "epoch": 0.007819117742881345, "grad_norm": 3.135418097302639, "learning_rate": 7.8125e-07, "loss": 0.3665, "step": 15 }, { "epoch": 0.008340392259073435, "grad_norm": 2.7047102856884093, "learning_rate": 8.333333333333333e-07, "loss": 0.3597, "step": 16 }, { "epoch": 0.008861666775265524, "grad_norm": 2.772951802260474, "learning_rate": 8.854166666666668e-07, "loss": 0.3735, "step": 17 }, { "epoch": 0.009382941291457614, "grad_norm": 2.3198899618016138, "learning_rate": 9.375000000000001e-07, "loss": 0.3503, "step": 18 }, { "epoch": 0.009904215807649703, "grad_norm": 2.2447520592031704, "learning_rate": 9.895833333333333e-07, "loss": 0.3331, "step": 19 }, { "epoch": 0.010425490323841793, "grad_norm": 2.400538942817267, "learning_rate": 1.0416666666666667e-06, "loss": 0.3194, "step": 20 }, { "epoch": 0.010946764840033882, "grad_norm": 2.4839418106829743, "learning_rate": 1.0937500000000001e-06, "loss": 0.3059, "step": 21 }, { "epoch": 0.011468039356225972, "grad_norm": 2.072907404146958, "learning_rate": 1.1458333333333333e-06, "loss": 0.3251, "step": 22 }, { "epoch": 0.011989313872418063, "grad_norm": 1.9341789006266392, "learning_rate": 1.197916666666667e-06, "loss": 0.3095, "step": 23 }, { "epoch": 0.012510588388610152, "grad_norm": 1.7307141646883002, "learning_rate": 1.25e-06, "loss": 0.3193, "step": 24 }, { "epoch": 0.013031862904802242, "grad_norm": 1.7615841916776682, "learning_rate": 1.3020833333333335e-06, "loss": 0.3319, "step": 25 }, { "epoch": 0.013553137420994331, "grad_norm": 1.7136648054606347, "learning_rate": 1.3541666666666667e-06, "loss": 0.2944, "step": 26 }, { "epoch": 0.01407441193718642, "grad_norm": 1.9987205714740135, "learning_rate": 1.40625e-06, "loss": 0.3041, "step": 27 }, { "epoch": 0.01459568645337851, "grad_norm": 2.012277299697316, "learning_rate": 1.4583333333333335e-06, "loss": 0.3063, "step": 28 }, { "epoch": 0.0151169609695706, "grad_norm": 2.0782151715380746, "learning_rate": 1.5104166666666667e-06, "loss": 0.3091, "step": 29 }, { "epoch": 0.01563823548576269, "grad_norm": 1.7474712574781637, "learning_rate": 1.5625e-06, "loss": 0.2879, "step": 30 }, { "epoch": 0.01615951000195478, "grad_norm": 1.8944893639350486, "learning_rate": 1.6145833333333335e-06, "loss": 0.3016, "step": 31 }, { "epoch": 0.01668078451814687, "grad_norm": 1.6876038268436602, "learning_rate": 1.6666666666666667e-06, "loss": 0.3042, "step": 32 }, { "epoch": 0.017202059034338957, "grad_norm": 1.861475840222954, "learning_rate": 1.71875e-06, "loss": 0.3048, "step": 33 }, { "epoch": 0.01772333355053105, "grad_norm": 1.6018157378320321, "learning_rate": 1.7708333333333337e-06, "loss": 0.3043, "step": 34 }, { "epoch": 0.018244608066723136, "grad_norm": 1.7354154053640138, "learning_rate": 1.8229166666666666e-06, "loss": 0.2924, "step": 35 }, { "epoch": 0.018765882582915228, "grad_norm": 1.4987710469506477, "learning_rate": 1.8750000000000003e-06, "loss": 0.2771, "step": 36 }, { "epoch": 0.01928715709910732, "grad_norm": 1.567127836898921, "learning_rate": 1.9270833333333334e-06, "loss": 0.2838, "step": 37 }, { "epoch": 0.019808431615299407, "grad_norm": 1.5064764171139846, "learning_rate": 1.9791666666666666e-06, "loss": 0.2941, "step": 38 }, { "epoch": 0.020329706131491498, "grad_norm": 1.340604325707909, "learning_rate": 2.0312500000000002e-06, "loss": 0.2918, "step": 39 }, { "epoch": 0.020850980647683585, "grad_norm": 1.3211877381742252, "learning_rate": 2.0833333333333334e-06, "loss": 0.2762, "step": 40 }, { "epoch": 0.021372255163875677, "grad_norm": 1.4655683382115356, "learning_rate": 2.1354166666666666e-06, "loss": 0.2657, "step": 41 }, { "epoch": 0.021893529680067764, "grad_norm": 1.3423365858196985, "learning_rate": 2.1875000000000002e-06, "loss": 0.2897, "step": 42 }, { "epoch": 0.022414804196259856, "grad_norm": 1.3364845050339877, "learning_rate": 2.2395833333333334e-06, "loss": 0.2821, "step": 43 }, { "epoch": 0.022936078712451943, "grad_norm": 1.3593339435906422, "learning_rate": 2.2916666666666666e-06, "loss": 0.2599, "step": 44 }, { "epoch": 0.023457353228644034, "grad_norm": 1.3475417862019787, "learning_rate": 2.3437500000000002e-06, "loss": 0.2745, "step": 45 }, { "epoch": 0.023978627744836126, "grad_norm": 1.342587329740068, "learning_rate": 2.395833333333334e-06, "loss": 0.2626, "step": 46 }, { "epoch": 0.024499902261028213, "grad_norm": 1.2789760759653985, "learning_rate": 2.4479166666666666e-06, "loss": 0.2699, "step": 47 }, { "epoch": 0.025021176777220305, "grad_norm": 1.3631288905885437, "learning_rate": 2.5e-06, "loss": 0.2764, "step": 48 }, { "epoch": 0.025542451293412392, "grad_norm": 1.3230391864388509, "learning_rate": 2.5520833333333334e-06, "loss": 0.2774, "step": 49 }, { "epoch": 0.026063725809604484, "grad_norm": 1.21271476234125, "learning_rate": 2.604166666666667e-06, "loss": 0.2763, "step": 50 }, { "epoch": 0.02658500032579657, "grad_norm": 1.1932303504862625, "learning_rate": 2.65625e-06, "loss": 0.2586, "step": 51 }, { "epoch": 0.027106274841988662, "grad_norm": 1.2820424405047717, "learning_rate": 2.7083333333333334e-06, "loss": 0.273, "step": 52 }, { "epoch": 0.027627549358180754, "grad_norm": 1.2531931817054822, "learning_rate": 2.760416666666667e-06, "loss": 0.2633, "step": 53 }, { "epoch": 0.02814882387437284, "grad_norm": 1.4100197823383918, "learning_rate": 2.8125e-06, "loss": 0.2713, "step": 54 }, { "epoch": 0.028670098390564933, "grad_norm": 1.244230363037586, "learning_rate": 2.8645833333333334e-06, "loss": 0.2675, "step": 55 }, { "epoch": 0.02919137290675702, "grad_norm": 1.233721117946254, "learning_rate": 2.916666666666667e-06, "loss": 0.26, "step": 56 }, { "epoch": 0.02971264742294911, "grad_norm": 1.33074120780009, "learning_rate": 2.96875e-06, "loss": 0.2623, "step": 57 }, { "epoch": 0.0302339219391412, "grad_norm": 1.3279194320065162, "learning_rate": 3.0208333333333334e-06, "loss": 0.2709, "step": 58 }, { "epoch": 0.03075519645533329, "grad_norm": 1.4390908943253897, "learning_rate": 3.072916666666667e-06, "loss": 0.2519, "step": 59 }, { "epoch": 0.03127647097152538, "grad_norm": 1.4426086827285376, "learning_rate": 3.125e-06, "loss": 0.2683, "step": 60 }, { "epoch": 0.031797745487717466, "grad_norm": 1.2969321386639103, "learning_rate": 3.1770833333333333e-06, "loss": 0.2486, "step": 61 }, { "epoch": 0.03231902000390956, "grad_norm": 1.303884366809117, "learning_rate": 3.229166666666667e-06, "loss": 0.2589, "step": 62 }, { "epoch": 0.03284029452010165, "grad_norm": 1.4012507792386482, "learning_rate": 3.28125e-06, "loss": 0.2583, "step": 63 }, { "epoch": 0.03336156903629374, "grad_norm": 1.3811445006822614, "learning_rate": 3.3333333333333333e-06, "loss": 0.2596, "step": 64 }, { "epoch": 0.03388284355248583, "grad_norm": 1.4475370235278413, "learning_rate": 3.385416666666667e-06, "loss": 0.2605, "step": 65 }, { "epoch": 0.034404118068677915, "grad_norm": 1.322688630617171, "learning_rate": 3.4375e-06, "loss": 0.2477, "step": 66 }, { "epoch": 0.034925392584870006, "grad_norm": 1.3992892078555939, "learning_rate": 3.4895833333333333e-06, "loss": 0.2446, "step": 67 }, { "epoch": 0.0354466671010621, "grad_norm": 1.2681725892431805, "learning_rate": 3.5416666666666673e-06, "loss": 0.2478, "step": 68 }, { "epoch": 0.03596794161725419, "grad_norm": 1.4436847621832125, "learning_rate": 3.59375e-06, "loss": 0.2619, "step": 69 }, { "epoch": 0.03648921613344627, "grad_norm": 1.4010222407978898, "learning_rate": 3.6458333333333333e-06, "loss": 0.2458, "step": 70 }, { "epoch": 0.037010490649638364, "grad_norm": 1.1847301699214483, "learning_rate": 3.6979166666666673e-06, "loss": 0.2457, "step": 71 }, { "epoch": 0.037531765165830455, "grad_norm": 1.2656606340585645, "learning_rate": 3.7500000000000005e-06, "loss": 0.2496, "step": 72 }, { "epoch": 0.038053039682022546, "grad_norm": 1.4606373768036418, "learning_rate": 3.8020833333333333e-06, "loss": 0.2512, "step": 73 }, { "epoch": 0.03857431419821464, "grad_norm": 1.2166433050839274, "learning_rate": 3.854166666666667e-06, "loss": 0.2219, "step": 74 }, { "epoch": 0.03909558871440672, "grad_norm": 1.3292968759151849, "learning_rate": 3.90625e-06, "loss": 0.2579, "step": 75 }, { "epoch": 0.03961686323059881, "grad_norm": 1.4951315237751113, "learning_rate": 3.958333333333333e-06, "loss": 0.2447, "step": 76 }, { "epoch": 0.040138137746790904, "grad_norm": 1.3489871277817018, "learning_rate": 4.010416666666667e-06, "loss": 0.2575, "step": 77 }, { "epoch": 0.040659412262982995, "grad_norm": 1.3802154019982127, "learning_rate": 4.0625000000000005e-06, "loss": 0.2524, "step": 78 }, { "epoch": 0.04118068677917508, "grad_norm": 1.2213764719182099, "learning_rate": 4.114583333333334e-06, "loss": 0.2534, "step": 79 }, { "epoch": 0.04170196129536717, "grad_norm": 1.2310142856144946, "learning_rate": 4.166666666666667e-06, "loss": 0.2434, "step": 80 }, { "epoch": 0.04222323581155926, "grad_norm": 1.2489940345168304, "learning_rate": 4.21875e-06, "loss": 0.2224, "step": 81 }, { "epoch": 0.04274451032775135, "grad_norm": 1.5335069492862015, "learning_rate": 4.270833333333333e-06, "loss": 0.2646, "step": 82 }, { "epoch": 0.043265784843943444, "grad_norm": 1.448069380566151, "learning_rate": 4.322916666666667e-06, "loss": 0.2613, "step": 83 }, { "epoch": 0.04378705936013553, "grad_norm": 1.6356379211230665, "learning_rate": 4.3750000000000005e-06, "loss": 0.2616, "step": 84 }, { "epoch": 0.04430833387632762, "grad_norm": 1.7155523706338194, "learning_rate": 4.427083333333334e-06, "loss": 0.241, "step": 85 }, { "epoch": 0.04482960839251971, "grad_norm": 1.3286348110703599, "learning_rate": 4.479166666666667e-06, "loss": 0.2664, "step": 86 }, { "epoch": 0.0453508829087118, "grad_norm": 1.4003238573470502, "learning_rate": 4.53125e-06, "loss": 0.2364, "step": 87 }, { "epoch": 0.04587215742490389, "grad_norm": 1.3658849183921546, "learning_rate": 4.583333333333333e-06, "loss": 0.235, "step": 88 }, { "epoch": 0.04639343194109598, "grad_norm": 1.3866176657796205, "learning_rate": 4.635416666666667e-06, "loss": 0.2502, "step": 89 }, { "epoch": 0.04691470645728807, "grad_norm": 1.4329731217987849, "learning_rate": 4.6875000000000004e-06, "loss": 0.2591, "step": 90 }, { "epoch": 0.04743598097348016, "grad_norm": 1.258561927924062, "learning_rate": 4.739583333333334e-06, "loss": 0.2503, "step": 91 }, { "epoch": 0.04795725548967225, "grad_norm": 1.3076546779001206, "learning_rate": 4.791666666666668e-06, "loss": 0.2345, "step": 92 }, { "epoch": 0.048478530005864336, "grad_norm": 1.2960823334662572, "learning_rate": 4.84375e-06, "loss": 0.2258, "step": 93 }, { "epoch": 0.04899980452205643, "grad_norm": 1.210681753840828, "learning_rate": 4.895833333333333e-06, "loss": 0.2469, "step": 94 }, { "epoch": 0.04952107903824852, "grad_norm": 1.3727031474669664, "learning_rate": 4.947916666666667e-06, "loss": 0.2595, "step": 95 }, { "epoch": 0.05004235355444061, "grad_norm": 1.385506278655309, "learning_rate": 5e-06, "loss": 0.2357, "step": 96 }, { "epoch": 0.050563628070632693, "grad_norm": 1.3815222257014061, "learning_rate": 4.999996283681687e-06, "loss": 0.2617, "step": 97 }, { "epoch": 0.051084902586824785, "grad_norm": 1.327004917894176, "learning_rate": 4.9999851347377946e-06, "loss": 0.2476, "step": 98 }, { "epoch": 0.051606177103016876, "grad_norm": 1.3696537124053607, "learning_rate": 4.99996655320147e-06, "loss": 0.2392, "step": 99 }, { "epoch": 0.05212745161920897, "grad_norm": 1.3657512033991304, "learning_rate": 4.999940539127958e-06, "loss": 0.2608, "step": 100 }, { "epoch": 0.05264872613540106, "grad_norm": 1.4194965343026547, "learning_rate": 4.999907092594598e-06, "loss": 0.2442, "step": 101 }, { "epoch": 0.05317000065159314, "grad_norm": 1.2749089132199982, "learning_rate": 4.99986621370083e-06, "loss": 0.2226, "step": 102 }, { "epoch": 0.053691275167785234, "grad_norm": 1.1887113141245471, "learning_rate": 4.99981790256819e-06, "loss": 0.2432, "step": 103 }, { "epoch": 0.054212549683977325, "grad_norm": 1.1938219400507954, "learning_rate": 4.999762159340305e-06, "loss": 0.2283, "step": 104 }, { "epoch": 0.054733824200169416, "grad_norm": 1.2840348614013515, "learning_rate": 4.999698984182909e-06, "loss": 0.218, "step": 105 }, { "epoch": 0.05525509871636151, "grad_norm": 1.2803746740205806, "learning_rate": 4.999628377283821e-06, "loss": 0.2508, "step": 106 }, { "epoch": 0.05577637323255359, "grad_norm": 1.4168047008355111, "learning_rate": 4.99955033885296e-06, "loss": 0.227, "step": 107 }, { "epoch": 0.05629764774874568, "grad_norm": 1.3985330741489335, "learning_rate": 4.999464869122339e-06, "loss": 0.2654, "step": 108 }, { "epoch": 0.056818922264937774, "grad_norm": 1.3276357405175772, "learning_rate": 4.999371968346064e-06, "loss": 0.2559, "step": 109 }, { "epoch": 0.057340196781129865, "grad_norm": 1.3466763643818087, "learning_rate": 4.999271636800334e-06, "loss": 0.2453, "step": 110 }, { "epoch": 0.05786147129732195, "grad_norm": 1.306537355090405, "learning_rate": 4.999163874783441e-06, "loss": 0.2546, "step": 111 }, { "epoch": 0.05838274581351404, "grad_norm": 1.3217941323578062, "learning_rate": 4.999048682615766e-06, "loss": 0.2555, "step": 112 }, { "epoch": 0.05890402032970613, "grad_norm": 1.2438940840815147, "learning_rate": 4.9989260606397816e-06, "loss": 0.2449, "step": 113 }, { "epoch": 0.05942529484589822, "grad_norm": 1.4326367838877314, "learning_rate": 4.998796009220051e-06, "loss": 0.255, "step": 114 }, { "epoch": 0.059946569362090314, "grad_norm": 1.269352153923466, "learning_rate": 4.9986585287432236e-06, "loss": 0.2372, "step": 115 }, { "epoch": 0.0604678438782824, "grad_norm": 1.3071274325044602, "learning_rate": 4.998513619618036e-06, "loss": 0.2442, "step": 116 }, { "epoch": 0.06098911839447449, "grad_norm": 1.3637289863199187, "learning_rate": 4.998361282275311e-06, "loss": 0.2281, "step": 117 }, { "epoch": 0.06151039291066658, "grad_norm": 1.4354154013144573, "learning_rate": 4.998201517167956e-06, "loss": 0.2554, "step": 118 }, { "epoch": 0.06203166742685867, "grad_norm": 1.2686877262427592, "learning_rate": 4.998034324770962e-06, "loss": 0.2334, "step": 119 }, { "epoch": 0.06255294194305076, "grad_norm": 1.4652927159239066, "learning_rate": 4.997859705581399e-06, "loss": 0.2407, "step": 120 }, { "epoch": 0.06307421645924285, "grad_norm": 1.382408681054862, "learning_rate": 4.997677660118423e-06, "loss": 0.23, "step": 121 }, { "epoch": 0.06359549097543493, "grad_norm": 1.3083101827135726, "learning_rate": 4.997488188923262e-06, "loss": 0.2369, "step": 122 }, { "epoch": 0.06411676549162702, "grad_norm": 1.2680968491770188, "learning_rate": 4.9972912925592245e-06, "loss": 0.2322, "step": 123 }, { "epoch": 0.06463804000781911, "grad_norm": 1.3728476777387406, "learning_rate": 4.997086971611696e-06, "loss": 0.2254, "step": 124 }, { "epoch": 0.0651593145240112, "grad_norm": 1.2130340794808803, "learning_rate": 4.996875226688133e-06, "loss": 0.2322, "step": 125 }, { "epoch": 0.0656805890402033, "grad_norm": 1.1725670998162612, "learning_rate": 4.996656058418064e-06, "loss": 0.2511, "step": 126 }, { "epoch": 0.06620186355639539, "grad_norm": 1.2975213169142519, "learning_rate": 4.996429467453088e-06, "loss": 0.2359, "step": 127 }, { "epoch": 0.06672313807258748, "grad_norm": 1.2983892428971382, "learning_rate": 4.996195454466873e-06, "loss": 0.2176, "step": 128 }, { "epoch": 0.06724441258877957, "grad_norm": 1.3409928375397802, "learning_rate": 4.995954020155153e-06, "loss": 0.2409, "step": 129 }, { "epoch": 0.06776568710497166, "grad_norm": 1.2708811055521516, "learning_rate": 4.995705165235726e-06, "loss": 0.2472, "step": 130 }, { "epoch": 0.06828696162116374, "grad_norm": 1.2719045463078689, "learning_rate": 4.995448890448449e-06, "loss": 0.2446, "step": 131 }, { "epoch": 0.06880823613735583, "grad_norm": 1.2812681207614192, "learning_rate": 4.995185196555242e-06, "loss": 0.2364, "step": 132 }, { "epoch": 0.06932951065354792, "grad_norm": 1.2101435618357532, "learning_rate": 4.994914084340082e-06, "loss": 0.2469, "step": 133 }, { "epoch": 0.06985078516974001, "grad_norm": 1.233938197016408, "learning_rate": 4.994635554608999e-06, "loss": 0.2286, "step": 134 }, { "epoch": 0.0703720596859321, "grad_norm": 1.2730808235698976, "learning_rate": 4.994349608190079e-06, "loss": 0.24, "step": 135 }, { "epoch": 0.0708933342021242, "grad_norm": 1.384333360859674, "learning_rate": 4.994056245933454e-06, "loss": 0.2393, "step": 136 }, { "epoch": 0.07141460871831629, "grad_norm": 1.2385359011388384, "learning_rate": 4.993755468711308e-06, "loss": 0.2446, "step": 137 }, { "epoch": 0.07193588323450838, "grad_norm": 1.2478572413164555, "learning_rate": 4.993447277417867e-06, "loss": 0.2269, "step": 138 }, { "epoch": 0.07245715775070047, "grad_norm": 1.2105094944153947, "learning_rate": 4.993131672969402e-06, "loss": 0.2484, "step": 139 }, { "epoch": 0.07297843226689255, "grad_norm": 1.3486869348259192, "learning_rate": 4.992808656304221e-06, "loss": 0.238, "step": 140 }, { "epoch": 0.07349970678308464, "grad_norm": 1.20349798497798, "learning_rate": 4.99247822838267e-06, "loss": 0.231, "step": 141 }, { "epoch": 0.07402098129927673, "grad_norm": 1.1821840497420153, "learning_rate": 4.99214039018713e-06, "loss": 0.2373, "step": 142 }, { "epoch": 0.07454225581546882, "grad_norm": 1.452457587792453, "learning_rate": 4.991795142722012e-06, "loss": 0.2395, "step": 143 }, { "epoch": 0.07506353033166091, "grad_norm": 1.239932267685592, "learning_rate": 4.9914424870137565e-06, "loss": 0.235, "step": 144 }, { "epoch": 0.075584804847853, "grad_norm": 1.22718860947722, "learning_rate": 4.991082424110826e-06, "loss": 0.2427, "step": 145 }, { "epoch": 0.07610607936404509, "grad_norm": 1.146867050689835, "learning_rate": 4.990714955083709e-06, "loss": 0.2437, "step": 146 }, { "epoch": 0.07662735388023718, "grad_norm": 1.1165066351844564, "learning_rate": 4.9903400810249116e-06, "loss": 0.2237, "step": 147 }, { "epoch": 0.07714862839642928, "grad_norm": 1.22634913619575, "learning_rate": 4.9899578030489534e-06, "loss": 0.2277, "step": 148 }, { "epoch": 0.07766990291262135, "grad_norm": 1.1756420690402456, "learning_rate": 4.9895681222923685e-06, "loss": 0.2349, "step": 149 }, { "epoch": 0.07819117742881344, "grad_norm": 1.2768853313777127, "learning_rate": 4.989171039913698e-06, "loss": 0.2378, "step": 150 }, { "epoch": 0.07871245194500553, "grad_norm": 1.298030043050591, "learning_rate": 4.9887665570934905e-06, "loss": 0.2309, "step": 151 }, { "epoch": 0.07923372646119763, "grad_norm": 1.1324424296737856, "learning_rate": 4.988354675034296e-06, "loss": 0.2242, "step": 152 }, { "epoch": 0.07975500097738972, "grad_norm": 1.289394446194592, "learning_rate": 4.987935394960661e-06, "loss": 0.2285, "step": 153 }, { "epoch": 0.08027627549358181, "grad_norm": 1.2538484453836192, "learning_rate": 4.98750871811913e-06, "loss": 0.2545, "step": 154 }, { "epoch": 0.0807975500097739, "grad_norm": 1.2220953206167016, "learning_rate": 4.987074645778234e-06, "loss": 0.2277, "step": 155 }, { "epoch": 0.08131882452596599, "grad_norm": 1.187543706345677, "learning_rate": 4.986633179228495e-06, "loss": 0.2327, "step": 156 }, { "epoch": 0.08184009904215808, "grad_norm": 1.2934140935539535, "learning_rate": 4.986184319782418e-06, "loss": 0.2424, "step": 157 }, { "epoch": 0.08236137355835016, "grad_norm": 1.2219190172913152, "learning_rate": 4.9857280687744856e-06, "loss": 0.2508, "step": 158 }, { "epoch": 0.08288264807454225, "grad_norm": 1.2427749225301898, "learning_rate": 4.985264427561158e-06, "loss": 0.2297, "step": 159 }, { "epoch": 0.08340392259073434, "grad_norm": 1.2357587940866266, "learning_rate": 4.984793397520865e-06, "loss": 0.2307, "step": 160 }, { "epoch": 0.08392519710692643, "grad_norm": 1.2364383187414076, "learning_rate": 4.984314980054005e-06, "loss": 0.2323, "step": 161 }, { "epoch": 0.08444647162311852, "grad_norm": 1.2393212896003536, "learning_rate": 4.983829176582939e-06, "loss": 0.2473, "step": 162 }, { "epoch": 0.08496774613931062, "grad_norm": 1.3103587969557084, "learning_rate": 4.983335988551986e-06, "loss": 0.2344, "step": 163 }, { "epoch": 0.0854890206555027, "grad_norm": 1.2950039813259882, "learning_rate": 4.982835417427424e-06, "loss": 0.2447, "step": 164 }, { "epoch": 0.0860102951716948, "grad_norm": 1.1823175382955655, "learning_rate": 4.982327464697476e-06, "loss": 0.2339, "step": 165 }, { "epoch": 0.08653156968788689, "grad_norm": 1.3397014918648167, "learning_rate": 4.981812131872315e-06, "loss": 0.2421, "step": 166 }, { "epoch": 0.08705284420407897, "grad_norm": 1.1880479785297025, "learning_rate": 4.981289420484051e-06, "loss": 0.2063, "step": 167 }, { "epoch": 0.08757411872027106, "grad_norm": 1.1844610096859758, "learning_rate": 4.980759332086736e-06, "loss": 0.2223, "step": 168 }, { "epoch": 0.08809539323646315, "grad_norm": 1.1743257421756592, "learning_rate": 4.980221868256351e-06, "loss": 0.2518, "step": 169 }, { "epoch": 0.08861666775265524, "grad_norm": 1.2816501589273177, "learning_rate": 4.9796770305908045e-06, "loss": 0.2244, "step": 170 }, { "epoch": 0.08913794226884733, "grad_norm": 1.3332187743906725, "learning_rate": 4.979124820709931e-06, "loss": 0.2315, "step": 171 }, { "epoch": 0.08965921678503942, "grad_norm": 1.1756963489546968, "learning_rate": 4.978565240255477e-06, "loss": 0.226, "step": 172 }, { "epoch": 0.09018049130123151, "grad_norm": 1.4972378793543208, "learning_rate": 4.977998290891109e-06, "loss": 0.2314, "step": 173 }, { "epoch": 0.0907017658174236, "grad_norm": 1.3196899591714728, "learning_rate": 4.9774239743023975e-06, "loss": 0.2359, "step": 174 }, { "epoch": 0.0912230403336157, "grad_norm": 1.4208086181110644, "learning_rate": 4.976842292196817e-06, "loss": 0.2371, "step": 175 }, { "epoch": 0.09174431484980777, "grad_norm": 1.5874713119749235, "learning_rate": 4.9762532463037385e-06, "loss": 0.2346, "step": 176 }, { "epoch": 0.09226558936599986, "grad_norm": 1.3081464000461915, "learning_rate": 4.97565683837443e-06, "loss": 0.2377, "step": 177 }, { "epoch": 0.09278686388219196, "grad_norm": 1.4793193226923158, "learning_rate": 4.9750530701820446e-06, "loss": 0.2301, "step": 178 }, { "epoch": 0.09330813839838405, "grad_norm": 1.45104441094533, "learning_rate": 4.974441943521616e-06, "loss": 0.246, "step": 179 }, { "epoch": 0.09382941291457614, "grad_norm": 1.1712951871890278, "learning_rate": 4.9738234602100605e-06, "loss": 0.2255, "step": 180 }, { "epoch": 0.09435068743076823, "grad_norm": 1.4773502084469639, "learning_rate": 4.97319762208616e-06, "loss": 0.2326, "step": 181 }, { "epoch": 0.09487196194696032, "grad_norm": 1.306772468267686, "learning_rate": 4.972564431010567e-06, "loss": 0.245, "step": 182 }, { "epoch": 0.09539323646315241, "grad_norm": 1.209641545426326, "learning_rate": 4.971923888865792e-06, "loss": 0.2247, "step": 183 }, { "epoch": 0.0959145109793445, "grad_norm": 1.200834610292657, "learning_rate": 4.971275997556203e-06, "loss": 0.2073, "step": 184 }, { "epoch": 0.09643578549553658, "grad_norm": 1.266596376115279, "learning_rate": 4.970620759008015e-06, "loss": 0.2284, "step": 185 }, { "epoch": 0.09695706001172867, "grad_norm": 1.4118237346104745, "learning_rate": 4.969958175169291e-06, "loss": 0.2483, "step": 186 }, { "epoch": 0.09747833452792076, "grad_norm": 1.182726741808869, "learning_rate": 4.969288248009924e-06, "loss": 0.2184, "step": 187 }, { "epoch": 0.09799960904411285, "grad_norm": 1.2332451914842804, "learning_rate": 4.968610979521647e-06, "loss": 0.2229, "step": 188 }, { "epoch": 0.09852088356030494, "grad_norm": 1.296696985788193, "learning_rate": 4.967926371718017e-06, "loss": 0.2456, "step": 189 }, { "epoch": 0.09904215807649704, "grad_norm": 1.2471025773185285, "learning_rate": 4.96723442663441e-06, "loss": 0.2358, "step": 190 }, { "epoch": 0.09956343259268913, "grad_norm": 1.2081020978259593, "learning_rate": 4.966535146328014e-06, "loss": 0.2259, "step": 191 }, { "epoch": 0.10008470710888122, "grad_norm": 1.1747127101005468, "learning_rate": 4.965828532877831e-06, "loss": 0.2262, "step": 192 }, { "epoch": 0.10060598162507331, "grad_norm": 1.239720059272851, "learning_rate": 4.96511458838466e-06, "loss": 0.2475, "step": 193 }, { "epoch": 0.10112725614126539, "grad_norm": 1.3048056513019626, "learning_rate": 4.964393314971096e-06, "loss": 0.2311, "step": 194 }, { "epoch": 0.10164853065745748, "grad_norm": 1.183469548460636, "learning_rate": 4.963664714781525e-06, "loss": 0.2395, "step": 195 }, { "epoch": 0.10216980517364957, "grad_norm": 1.333533223581565, "learning_rate": 4.962928789982117e-06, "loss": 0.2191, "step": 196 }, { "epoch": 0.10269107968984166, "grad_norm": 1.196909258791872, "learning_rate": 4.9621855427608134e-06, "loss": 0.2208, "step": 197 }, { "epoch": 0.10321235420603375, "grad_norm": 1.2853727381550513, "learning_rate": 4.961434975327331e-06, "loss": 0.2459, "step": 198 }, { "epoch": 0.10373362872222584, "grad_norm": 1.1160859614220253, "learning_rate": 4.960677089913146e-06, "loss": 0.2328, "step": 199 }, { "epoch": 0.10425490323841793, "grad_norm": 1.1166526080209556, "learning_rate": 4.959911888771496e-06, "loss": 0.2312, "step": 200 }, { "epoch": 0.10477617775461003, "grad_norm": 1.0880873416190306, "learning_rate": 4.959139374177364e-06, "loss": 0.2074, "step": 201 }, { "epoch": 0.10529745227080212, "grad_norm": 1.2040328431798148, "learning_rate": 4.958359548427478e-06, "loss": 0.2357, "step": 202 }, { "epoch": 0.10581872678699421, "grad_norm": 1.0518301757015505, "learning_rate": 4.957572413840303e-06, "loss": 0.2182, "step": 203 }, { "epoch": 0.10634000130318629, "grad_norm": 1.154934427237305, "learning_rate": 4.956777972756033e-06, "loss": 0.2237, "step": 204 }, { "epoch": 0.10686127581937838, "grad_norm": 1.1276890679621758, "learning_rate": 4.955976227536584e-06, "loss": 0.2105, "step": 205 }, { "epoch": 0.10738255033557047, "grad_norm": 1.1058755543209657, "learning_rate": 4.95516718056559e-06, "loss": 0.2273, "step": 206 }, { "epoch": 0.10790382485176256, "grad_norm": 1.2126591248862537, "learning_rate": 4.95435083424839e-06, "loss": 0.239, "step": 207 }, { "epoch": 0.10842509936795465, "grad_norm": 1.2627590952539536, "learning_rate": 4.953527191012029e-06, "loss": 0.2417, "step": 208 }, { "epoch": 0.10894637388414674, "grad_norm": 1.2188135211116549, "learning_rate": 4.95269625330524e-06, "loss": 0.2282, "step": 209 }, { "epoch": 0.10946764840033883, "grad_norm": 1.087249434059975, "learning_rate": 4.951858023598448e-06, "loss": 0.2013, "step": 210 }, { "epoch": 0.10998892291653092, "grad_norm": 1.1192373478037494, "learning_rate": 4.951012504383756e-06, "loss": 0.2271, "step": 211 }, { "epoch": 0.11051019743272301, "grad_norm": 1.1846883599390468, "learning_rate": 4.9501596981749375e-06, "loss": 0.2393, "step": 212 }, { "epoch": 0.11103147194891509, "grad_norm": 1.2820136837325322, "learning_rate": 4.949299607507434e-06, "loss": 0.2311, "step": 213 }, { "epoch": 0.11155274646510718, "grad_norm": 1.0711936772195123, "learning_rate": 4.94843223493834e-06, "loss": 0.2029, "step": 214 }, { "epoch": 0.11207402098129927, "grad_norm": 1.1864803079209343, "learning_rate": 4.947557583046403e-06, "loss": 0.2342, "step": 215 }, { "epoch": 0.11259529549749137, "grad_norm": 1.1855853986108895, "learning_rate": 4.94667565443201e-06, "loss": 0.2188, "step": 216 }, { "epoch": 0.11311657001368346, "grad_norm": 1.2164131878100903, "learning_rate": 4.945786451717183e-06, "loss": 0.2376, "step": 217 }, { "epoch": 0.11363784452987555, "grad_norm": 1.2725613700670313, "learning_rate": 4.944889977545571e-06, "loss": 0.2353, "step": 218 }, { "epoch": 0.11415911904606764, "grad_norm": 1.2700871402224212, "learning_rate": 4.94398623458244e-06, "loss": 0.249, "step": 219 }, { "epoch": 0.11468039356225973, "grad_norm": 1.0477852266713497, "learning_rate": 4.943075225514667e-06, "loss": 0.2212, "step": 220 }, { "epoch": 0.11520166807845182, "grad_norm": 1.1949076765095417, "learning_rate": 4.942156953050733e-06, "loss": 0.2427, "step": 221 }, { "epoch": 0.1157229425946439, "grad_norm": 1.1910711165686847, "learning_rate": 4.94123141992071e-06, "loss": 0.2315, "step": 222 }, { "epoch": 0.11624421711083599, "grad_norm": 1.1171795697179367, "learning_rate": 4.940298628876261e-06, "loss": 0.2323, "step": 223 }, { "epoch": 0.11676549162702808, "grad_norm": 1.119061233540857, "learning_rate": 4.9393585826906245e-06, "loss": 0.2208, "step": 224 }, { "epoch": 0.11728676614322017, "grad_norm": 1.1501275918061766, "learning_rate": 4.938411284158608e-06, "loss": 0.219, "step": 225 }, { "epoch": 0.11780804065941226, "grad_norm": 1.2954253968552865, "learning_rate": 4.937456736096581e-06, "loss": 0.2482, "step": 226 }, { "epoch": 0.11832931517560435, "grad_norm": 1.2008217826774177, "learning_rate": 4.936494941342469e-06, "loss": 0.2373, "step": 227 }, { "epoch": 0.11885058969179645, "grad_norm": 1.2630056268710672, "learning_rate": 4.93552590275574e-06, "loss": 0.2323, "step": 228 }, { "epoch": 0.11937186420798854, "grad_norm": 1.1943668197100574, "learning_rate": 4.934549623217399e-06, "loss": 0.2266, "step": 229 }, { "epoch": 0.11989313872418063, "grad_norm": 1.2274448290359987, "learning_rate": 4.9335661056299755e-06, "loss": 0.2325, "step": 230 }, { "epoch": 0.1204144132403727, "grad_norm": 1.1933170165235585, "learning_rate": 4.932575352917524e-06, "loss": 0.2224, "step": 231 }, { "epoch": 0.1209356877565648, "grad_norm": 1.1015086989558849, "learning_rate": 4.931577368025607e-06, "loss": 0.2159, "step": 232 }, { "epoch": 0.12145696227275689, "grad_norm": 1.323236281530893, "learning_rate": 4.930572153921287e-06, "loss": 0.2418, "step": 233 }, { "epoch": 0.12197823678894898, "grad_norm": 1.328150537223152, "learning_rate": 4.92955971359312e-06, "loss": 0.2296, "step": 234 }, { "epoch": 0.12249951130514107, "grad_norm": 1.1592367964351038, "learning_rate": 4.928540050051146e-06, "loss": 0.211, "step": 235 }, { "epoch": 0.12302078582133316, "grad_norm": 1.1266688403160974, "learning_rate": 4.927513166326881e-06, "loss": 0.2147, "step": 236 }, { "epoch": 0.12354206033752525, "grad_norm": 1.3104950047973025, "learning_rate": 4.9264790654733076e-06, "loss": 0.2274, "step": 237 }, { "epoch": 0.12406333485371734, "grad_norm": 1.3857089010320038, "learning_rate": 4.925437750564863e-06, "loss": 0.235, "step": 238 }, { "epoch": 0.12458460936990944, "grad_norm": 1.0833873266614178, "learning_rate": 4.924389224697433e-06, "loss": 0.2272, "step": 239 }, { "epoch": 0.12510588388610153, "grad_norm": 1.2529871243213786, "learning_rate": 4.923333490988343e-06, "loss": 0.2412, "step": 240 }, { "epoch": 0.1256271584022936, "grad_norm": 1.118800053266488, "learning_rate": 4.922270552576347e-06, "loss": 0.2168, "step": 241 }, { "epoch": 0.1261484329184857, "grad_norm": 1.153865139835547, "learning_rate": 4.921200412621619e-06, "loss": 0.2133, "step": 242 }, { "epoch": 0.1266697074346778, "grad_norm": 1.1260326719408678, "learning_rate": 4.920123074305743e-06, "loss": 0.2246, "step": 243 }, { "epoch": 0.12719098195086986, "grad_norm": 1.2169366957395464, "learning_rate": 4.919038540831705e-06, "loss": 0.2182, "step": 244 }, { "epoch": 0.12771225646706197, "grad_norm": 1.106220335006384, "learning_rate": 4.917946815423883e-06, "loss": 0.2119, "step": 245 }, { "epoch": 0.12823353098325405, "grad_norm": 1.128901025595511, "learning_rate": 4.916847901328035e-06, "loss": 0.231, "step": 246 }, { "epoch": 0.12875480549944615, "grad_norm": 1.1605446687661631, "learning_rate": 4.915741801811294e-06, "loss": 0.2362, "step": 247 }, { "epoch": 0.12927608001563823, "grad_norm": 1.282977790360365, "learning_rate": 4.914628520162154e-06, "loss": 0.2375, "step": 248 }, { "epoch": 0.12979735453183033, "grad_norm": 1.0118063392484975, "learning_rate": 4.913508059690461e-06, "loss": 0.1983, "step": 249 }, { "epoch": 0.1303186290480224, "grad_norm": 1.1850969192532463, "learning_rate": 4.912380423727405e-06, "loss": 0.2208, "step": 250 }, { "epoch": 0.13083990356421452, "grad_norm": 1.1567774979095986, "learning_rate": 4.911245615625512e-06, "loss": 0.2163, "step": 251 }, { "epoch": 0.1313611780804066, "grad_norm": 1.2330915782132366, "learning_rate": 4.910103638758627e-06, "loss": 0.2379, "step": 252 }, { "epoch": 0.13188245259659867, "grad_norm": 1.1057712455940338, "learning_rate": 4.9089544965219095e-06, "loss": 0.224, "step": 253 }, { "epoch": 0.13240372711279078, "grad_norm": 1.1137495249197693, "learning_rate": 4.907798192331821e-06, "loss": 0.2088, "step": 254 }, { "epoch": 0.13292500162898285, "grad_norm": 1.183848047830925, "learning_rate": 4.90663472962612e-06, "loss": 0.2211, "step": 255 }, { "epoch": 0.13344627614517496, "grad_norm": 1.210251233263837, "learning_rate": 4.905464111863841e-06, "loss": 0.2287, "step": 256 }, { "epoch": 0.13396755066136704, "grad_norm": 1.2156314491143745, "learning_rate": 4.904286342525298e-06, "loss": 0.2339, "step": 257 }, { "epoch": 0.13448882517755914, "grad_norm": 1.1335883548375618, "learning_rate": 4.903101425112062e-06, "loss": 0.2327, "step": 258 }, { "epoch": 0.13501009969375122, "grad_norm": 1.1196076296130062, "learning_rate": 4.9019093631469575e-06, "loss": 0.2253, "step": 259 }, { "epoch": 0.13553137420994332, "grad_norm": 1.2260470187534245, "learning_rate": 4.900710160174048e-06, "loss": 0.248, "step": 260 }, { "epoch": 0.1360526487261354, "grad_norm": 1.1196565222861217, "learning_rate": 4.899503819758633e-06, "loss": 0.2174, "step": 261 }, { "epoch": 0.13657392324232748, "grad_norm": 1.0864136858861488, "learning_rate": 4.898290345487226e-06, "loss": 0.2219, "step": 262 }, { "epoch": 0.13709519775851958, "grad_norm": 1.0137672217351832, "learning_rate": 4.897069740967554e-06, "loss": 0.2131, "step": 263 }, { "epoch": 0.13761647227471166, "grad_norm": 1.1037519976554437, "learning_rate": 4.89584200982854e-06, "loss": 0.2284, "step": 264 }, { "epoch": 0.13813774679090376, "grad_norm": 1.1303347452809993, "learning_rate": 4.894607155720294e-06, "loss": 0.2418, "step": 265 }, { "epoch": 0.13865902130709584, "grad_norm": 1.1657801655573423, "learning_rate": 4.893365182314108e-06, "loss": 0.2385, "step": 266 }, { "epoch": 0.13918029582328795, "grad_norm": 1.1427107191789623, "learning_rate": 4.892116093302436e-06, "loss": 0.2229, "step": 267 }, { "epoch": 0.13970157033948002, "grad_norm": 1.1330210325415613, "learning_rate": 4.890859892398886e-06, "loss": 0.2355, "step": 268 }, { "epoch": 0.14022284485567213, "grad_norm": 1.105052560447426, "learning_rate": 4.889596583338213e-06, "loss": 0.2344, "step": 269 }, { "epoch": 0.1407441193718642, "grad_norm": 1.0965253134452662, "learning_rate": 4.8883261698763045e-06, "loss": 0.2188, "step": 270 }, { "epoch": 0.14126539388805628, "grad_norm": 1.147653006818557, "learning_rate": 4.887048655790169e-06, "loss": 0.2274, "step": 271 }, { "epoch": 0.1417866684042484, "grad_norm": 1.0895631770032759, "learning_rate": 4.8857640448779246e-06, "loss": 0.2398, "step": 272 }, { "epoch": 0.14230794292044047, "grad_norm": 1.0681929006676545, "learning_rate": 4.884472340958791e-06, "loss": 0.2221, "step": 273 }, { "epoch": 0.14282921743663257, "grad_norm": 1.1493300322422102, "learning_rate": 4.883173547873073e-06, "loss": 0.2318, "step": 274 }, { "epoch": 0.14335049195282465, "grad_norm": 1.178680600036612, "learning_rate": 4.881867669482157e-06, "loss": 0.2313, "step": 275 }, { "epoch": 0.14387176646901675, "grad_norm": 1.104816541922132, "learning_rate": 4.880554709668486e-06, "loss": 0.2332, "step": 276 }, { "epoch": 0.14439304098520883, "grad_norm": 1.1737237666373883, "learning_rate": 4.879234672335564e-06, "loss": 0.235, "step": 277 }, { "epoch": 0.14491431550140094, "grad_norm": 1.2092792760641806, "learning_rate": 4.8779075614079354e-06, "loss": 0.2395, "step": 278 }, { "epoch": 0.145435590017593, "grad_norm": 1.0839887894678346, "learning_rate": 4.87657338083117e-06, "loss": 0.2223, "step": 279 }, { "epoch": 0.1459568645337851, "grad_norm": 1.0999122775478736, "learning_rate": 4.875232134571863e-06, "loss": 0.2275, "step": 280 }, { "epoch": 0.1464781390499772, "grad_norm": 1.1636449280470644, "learning_rate": 4.8738838266176094e-06, "loss": 0.2211, "step": 281 }, { "epoch": 0.14699941356616927, "grad_norm": 1.1790289908665157, "learning_rate": 4.872528460977005e-06, "loss": 0.229, "step": 282 }, { "epoch": 0.14752068808236138, "grad_norm": 1.1989633537081639, "learning_rate": 4.871166041679626e-06, "loss": 0.2258, "step": 283 }, { "epoch": 0.14804196259855346, "grad_norm": 1.2423687257331835, "learning_rate": 4.869796572776018e-06, "loss": 0.2323, "step": 284 }, { "epoch": 0.14856323711474556, "grad_norm": 1.2228142589786297, "learning_rate": 4.868420058337687e-06, "loss": 0.2107, "step": 285 }, { "epoch": 0.14908451163093764, "grad_norm": 0.9926879454981049, "learning_rate": 4.867036502457087e-06, "loss": 0.2107, "step": 286 }, { "epoch": 0.14960578614712974, "grad_norm": 1.2008996239053435, "learning_rate": 4.865645909247604e-06, "loss": 0.2249, "step": 287 }, { "epoch": 0.15012706066332182, "grad_norm": 1.1542733944022654, "learning_rate": 4.864248282843548e-06, "loss": 0.2399, "step": 288 }, { "epoch": 0.1506483351795139, "grad_norm": 1.1691556753754107, "learning_rate": 4.862843627400139e-06, "loss": 0.2151, "step": 289 }, { "epoch": 0.151169609695706, "grad_norm": 1.3273619113907211, "learning_rate": 4.861431947093494e-06, "loss": 0.2175, "step": 290 }, { "epoch": 0.15169088421189808, "grad_norm": 1.2832476992194477, "learning_rate": 4.860013246120616e-06, "loss": 0.2298, "step": 291 }, { "epoch": 0.15221215872809019, "grad_norm": 1.084146667118211, "learning_rate": 4.85858752869938e-06, "loss": 0.2089, "step": 292 }, { "epoch": 0.15273343324428226, "grad_norm": 1.1508676857394082, "learning_rate": 4.857154799068522e-06, "loss": 0.2266, "step": 293 }, { "epoch": 0.15325470776047437, "grad_norm": 1.1639283738815098, "learning_rate": 4.855715061487626e-06, "loss": 0.2255, "step": 294 }, { "epoch": 0.15377598227666645, "grad_norm": 1.2554700518066422, "learning_rate": 4.8542683202371105e-06, "loss": 0.2223, "step": 295 }, { "epoch": 0.15429725679285855, "grad_norm": 1.3283787770601974, "learning_rate": 4.8528145796182155e-06, "loss": 0.2154, "step": 296 }, { "epoch": 0.15481853130905063, "grad_norm": 1.1601511824620128, "learning_rate": 4.851353843952992e-06, "loss": 0.2228, "step": 297 }, { "epoch": 0.1553398058252427, "grad_norm": 1.220795788128704, "learning_rate": 4.849886117584286e-06, "loss": 0.2335, "step": 298 }, { "epoch": 0.1558610803414348, "grad_norm": 1.2555523292108537, "learning_rate": 4.84841140487573e-06, "loss": 0.2097, "step": 299 }, { "epoch": 0.1563823548576269, "grad_norm": 1.164702238891539, "learning_rate": 4.846929710211724e-06, "loss": 0.2272, "step": 300 }, { "epoch": 0.156903629373819, "grad_norm": 1.183879685349843, "learning_rate": 4.845441037997428e-06, "loss": 0.2321, "step": 301 }, { "epoch": 0.15742490389001107, "grad_norm": 1.0808256708331534, "learning_rate": 4.843945392658744e-06, "loss": 0.2212, "step": 302 }, { "epoch": 0.15794617840620317, "grad_norm": 1.1207884103366483, "learning_rate": 4.84244277864231e-06, "loss": 0.2312, "step": 303 }, { "epoch": 0.15846745292239525, "grad_norm": 1.2420206921311776, "learning_rate": 4.840933200415479e-06, "loss": 0.2328, "step": 304 }, { "epoch": 0.15898872743858736, "grad_norm": 1.1302339000354495, "learning_rate": 4.839416662466307e-06, "loss": 0.2317, "step": 305 }, { "epoch": 0.15951000195477943, "grad_norm": 1.1663665978420636, "learning_rate": 4.837893169303548e-06, "loss": 0.2297, "step": 306 }, { "epoch": 0.1600312764709715, "grad_norm": 1.2551140786028032, "learning_rate": 4.836362725456628e-06, "loss": 0.2182, "step": 307 }, { "epoch": 0.16055255098716362, "grad_norm": 1.1278960330544754, "learning_rate": 4.834825335475641e-06, "loss": 0.2251, "step": 308 }, { "epoch": 0.1610738255033557, "grad_norm": 1.117111565112251, "learning_rate": 4.833281003931331e-06, "loss": 0.2165, "step": 309 }, { "epoch": 0.1615951000195478, "grad_norm": 1.0974889618517698, "learning_rate": 4.831729735415081e-06, "loss": 0.2333, "step": 310 }, { "epoch": 0.16211637453573988, "grad_norm": 1.1489672264426987, "learning_rate": 4.830171534538895e-06, "loss": 0.2408, "step": 311 }, { "epoch": 0.16263764905193198, "grad_norm": 1.0739457008165634, "learning_rate": 4.828606405935391e-06, "loss": 0.2118, "step": 312 }, { "epoch": 0.16315892356812406, "grad_norm": 1.1562271241695659, "learning_rate": 4.827034354257782e-06, "loss": 0.2259, "step": 313 }, { "epoch": 0.16368019808431616, "grad_norm": 1.146438735977002, "learning_rate": 4.825455384179864e-06, "loss": 0.2197, "step": 314 }, { "epoch": 0.16420147260050824, "grad_norm": 1.2160636638243234, "learning_rate": 4.823869500395999e-06, "loss": 0.2346, "step": 315 }, { "epoch": 0.16472274711670032, "grad_norm": 1.1626081501279093, "learning_rate": 4.822276707621109e-06, "loss": 0.2298, "step": 316 }, { "epoch": 0.16524402163289242, "grad_norm": 1.112204442031006, "learning_rate": 4.820677010590652e-06, "loss": 0.227, "step": 317 }, { "epoch": 0.1657652961490845, "grad_norm": 1.2103180677038683, "learning_rate": 4.819070414060616e-06, "loss": 0.2388, "step": 318 }, { "epoch": 0.1662865706652766, "grad_norm": 1.1743106791258793, "learning_rate": 4.817456922807499e-06, "loss": 0.2338, "step": 319 }, { "epoch": 0.16680784518146868, "grad_norm": 1.0896117602385076, "learning_rate": 4.815836541628299e-06, "loss": 0.2299, "step": 320 }, { "epoch": 0.1673291196976608, "grad_norm": 1.1892438430988612, "learning_rate": 4.814209275340498e-06, "loss": 0.2047, "step": 321 }, { "epoch": 0.16785039421385287, "grad_norm": 1.1452649056862731, "learning_rate": 4.8125751287820484e-06, "loss": 0.2189, "step": 322 }, { "epoch": 0.16837166873004497, "grad_norm": 1.1511519078648214, "learning_rate": 4.8109341068113566e-06, "loss": 0.2355, "step": 323 }, { "epoch": 0.16889294324623705, "grad_norm": 1.1351415607365043, "learning_rate": 4.8092862143072705e-06, "loss": 0.2311, "step": 324 }, { "epoch": 0.16941421776242913, "grad_norm": 1.1787027223084978, "learning_rate": 4.807631456169064e-06, "loss": 0.2316, "step": 325 }, { "epoch": 0.16993549227862123, "grad_norm": 1.1242332884299846, "learning_rate": 4.805969837316424e-06, "loss": 0.2108, "step": 326 }, { "epoch": 0.1704567667948133, "grad_norm": 1.1307767960947903, "learning_rate": 4.804301362689435e-06, "loss": 0.2292, "step": 327 }, { "epoch": 0.1709780413110054, "grad_norm": 1.1743519557918656, "learning_rate": 4.8026260372485625e-06, "loss": 0.2168, "step": 328 }, { "epoch": 0.1714993158271975, "grad_norm": 1.1654596044991834, "learning_rate": 4.8009438659746396e-06, "loss": 0.2119, "step": 329 }, { "epoch": 0.1720205903433896, "grad_norm": 1.0486212534056034, "learning_rate": 4.7992548538688554e-06, "loss": 0.2227, "step": 330 }, { "epoch": 0.17254186485958167, "grad_norm": 1.2639066123447578, "learning_rate": 4.797559005952733e-06, "loss": 0.2161, "step": 331 }, { "epoch": 0.17306313937577378, "grad_norm": 1.1820605824642645, "learning_rate": 4.795856327268124e-06, "loss": 0.2338, "step": 332 }, { "epoch": 0.17358441389196586, "grad_norm": 1.0878966501873548, "learning_rate": 4.794146822877182e-06, "loss": 0.2232, "step": 333 }, { "epoch": 0.17410568840815793, "grad_norm": 1.2325481338468462, "learning_rate": 4.792430497862358e-06, "loss": 0.2181, "step": 334 }, { "epoch": 0.17462696292435004, "grad_norm": 1.1599398884974863, "learning_rate": 4.790707357326381e-06, "loss": 0.2333, "step": 335 }, { "epoch": 0.17514823744054211, "grad_norm": 1.0837820673563148, "learning_rate": 4.788977406392242e-06, "loss": 0.2034, "step": 336 }, { "epoch": 0.17566951195673422, "grad_norm": 1.182057248623312, "learning_rate": 4.787240650203178e-06, "loss": 0.2272, "step": 337 }, { "epoch": 0.1761907864729263, "grad_norm": 1.2568951261399692, "learning_rate": 4.785497093922662e-06, "loss": 0.2376, "step": 338 }, { "epoch": 0.1767120609891184, "grad_norm": 1.2211840928254953, "learning_rate": 4.78374674273438e-06, "loss": 0.23, "step": 339 }, { "epoch": 0.17723333550531048, "grad_norm": 1.0705403957934083, "learning_rate": 4.781989601842224e-06, "loss": 0.2132, "step": 340 }, { "epoch": 0.17775461002150258, "grad_norm": 1.1202000299882193, "learning_rate": 4.780225676470268e-06, "loss": 0.1965, "step": 341 }, { "epoch": 0.17827588453769466, "grad_norm": 1.047916892629429, "learning_rate": 4.77845497186276e-06, "loss": 0.2076, "step": 342 }, { "epoch": 0.17879715905388674, "grad_norm": 1.1180752657115838, "learning_rate": 4.776677493284101e-06, "loss": 0.2195, "step": 343 }, { "epoch": 0.17931843357007884, "grad_norm": 1.1592347591044683, "learning_rate": 4.774893246018831e-06, "loss": 0.2293, "step": 344 }, { "epoch": 0.17983970808627092, "grad_norm": 1.088817972776965, "learning_rate": 4.773102235371617e-06, "loss": 0.218, "step": 345 }, { "epoch": 0.18036098260246303, "grad_norm": 1.2244227223086848, "learning_rate": 4.771304466667229e-06, "loss": 0.2174, "step": 346 }, { "epoch": 0.1808822571186551, "grad_norm": 1.1747749136702952, "learning_rate": 4.769499945250533e-06, "loss": 0.2237, "step": 347 }, { "epoch": 0.1814035316348472, "grad_norm": 1.0920641978765553, "learning_rate": 4.76768867648647e-06, "loss": 0.2323, "step": 348 }, { "epoch": 0.1819248061510393, "grad_norm": 1.0542260185395076, "learning_rate": 4.7658706657600395e-06, "loss": 0.2027, "step": 349 }, { "epoch": 0.1824460806672314, "grad_norm": 1.129211741927763, "learning_rate": 4.764045918476288e-06, "loss": 0.2298, "step": 350 }, { "epoch": 0.18296735518342347, "grad_norm": 1.1380740875574586, "learning_rate": 4.762214440060289e-06, "loss": 0.2217, "step": 351 }, { "epoch": 0.18348862969961555, "grad_norm": 1.136617520410274, "learning_rate": 4.760376235957127e-06, "loss": 0.2376, "step": 352 }, { "epoch": 0.18400990421580765, "grad_norm": 1.1274285674722149, "learning_rate": 4.758531311631884e-06, "loss": 0.2103, "step": 353 }, { "epoch": 0.18453117873199973, "grad_norm": 1.0510082657536814, "learning_rate": 4.756679672569621e-06, "loss": 0.2271, "step": 354 }, { "epoch": 0.18505245324819183, "grad_norm": 1.0689796820104887, "learning_rate": 4.7548213242753616e-06, "loss": 0.2176, "step": 355 }, { "epoch": 0.1855737277643839, "grad_norm": 1.0432439733778767, "learning_rate": 4.752956272274078e-06, "loss": 0.2211, "step": 356 }, { "epoch": 0.18609500228057602, "grad_norm": 1.1900272807021643, "learning_rate": 4.751084522110669e-06, "loss": 0.2225, "step": 357 }, { "epoch": 0.1866162767967681, "grad_norm": 1.074547857544035, "learning_rate": 4.749206079349952e-06, "loss": 0.2116, "step": 358 }, { "epoch": 0.1871375513129602, "grad_norm": 1.033594905483247, "learning_rate": 4.747320949576641e-06, "loss": 0.2142, "step": 359 }, { "epoch": 0.18765882582915228, "grad_norm": 1.1255484030942537, "learning_rate": 4.745429138395329e-06, "loss": 0.2107, "step": 360 }, { "epoch": 0.18818010034534435, "grad_norm": 1.1208575114062105, "learning_rate": 4.743530651430472e-06, "loss": 0.2199, "step": 361 }, { "epoch": 0.18870137486153646, "grad_norm": 1.1460239450962784, "learning_rate": 4.741625494326379e-06, "loss": 0.2232, "step": 362 }, { "epoch": 0.18922264937772854, "grad_norm": 1.1761939886658683, "learning_rate": 4.739713672747183e-06, "loss": 0.2193, "step": 363 }, { "epoch": 0.18974392389392064, "grad_norm": 1.19826819689747, "learning_rate": 4.737795192376836e-06, "loss": 0.226, "step": 364 }, { "epoch": 0.19026519841011272, "grad_norm": 1.103789018784415, "learning_rate": 4.735870058919084e-06, "loss": 0.2339, "step": 365 }, { "epoch": 0.19078647292630482, "grad_norm": 1.0832407294802735, "learning_rate": 4.733938278097456e-06, "loss": 0.2393, "step": 366 }, { "epoch": 0.1913077474424969, "grad_norm": 1.1848088227040534, "learning_rate": 4.731999855655239e-06, "loss": 0.224, "step": 367 }, { "epoch": 0.191829021958689, "grad_norm": 1.1838392460670388, "learning_rate": 4.730054797355471e-06, "loss": 0.2448, "step": 368 }, { "epoch": 0.19235029647488108, "grad_norm": 1.0617393105525352, "learning_rate": 4.728103108980915e-06, "loss": 0.2051, "step": 369 }, { "epoch": 0.19287157099107316, "grad_norm": 1.0844815033870783, "learning_rate": 4.726144796334049e-06, "loss": 0.2139, "step": 370 }, { "epoch": 0.19339284550726527, "grad_norm": 1.1834140709430048, "learning_rate": 4.724179865237042e-06, "loss": 0.2288, "step": 371 }, { "epoch": 0.19391412002345734, "grad_norm": 1.109820726429485, "learning_rate": 4.722208321531743e-06, "loss": 0.2107, "step": 372 }, { "epoch": 0.19443539453964945, "grad_norm": 1.1351904940122715, "learning_rate": 4.720230171079657e-06, "loss": 0.2192, "step": 373 }, { "epoch": 0.19495666905584152, "grad_norm": 1.2167214965769027, "learning_rate": 4.7182454197619355e-06, "loss": 0.2256, "step": 374 }, { "epoch": 0.19547794357203363, "grad_norm": 1.1604275914972113, "learning_rate": 4.716254073479352e-06, "loss": 0.2274, "step": 375 }, { "epoch": 0.1959992180882257, "grad_norm": 1.1426123348996993, "learning_rate": 4.714256138152287e-06, "loss": 0.2145, "step": 376 }, { "epoch": 0.1965204926044178, "grad_norm": 1.1541949632639865, "learning_rate": 4.712251619720712e-06, "loss": 0.2236, "step": 377 }, { "epoch": 0.1970417671206099, "grad_norm": 1.1680074383865588, "learning_rate": 4.71024052414417e-06, "loss": 0.224, "step": 378 }, { "epoch": 0.19756304163680197, "grad_norm": 1.093211547348792, "learning_rate": 4.70822285740176e-06, "loss": 0.2167, "step": 379 }, { "epoch": 0.19808431615299407, "grad_norm": 1.0429291132552085, "learning_rate": 4.706198625492111e-06, "loss": 0.225, "step": 380 }, { "epoch": 0.19860559066918615, "grad_norm": 1.1437201685657041, "learning_rate": 4.704167834433378e-06, "loss": 0.2154, "step": 381 }, { "epoch": 0.19912686518537825, "grad_norm": 1.0255764432733163, "learning_rate": 4.702130490263215e-06, "loss": 0.2211, "step": 382 }, { "epoch": 0.19964813970157033, "grad_norm": 1.079528796981029, "learning_rate": 4.700086599038755e-06, "loss": 0.2067, "step": 383 }, { "epoch": 0.20016941421776244, "grad_norm": 1.1198477995083633, "learning_rate": 4.698036166836598e-06, "loss": 0.2325, "step": 384 }, { "epoch": 0.20069068873395451, "grad_norm": 0.9961051646512427, "learning_rate": 4.695979199752794e-06, "loss": 0.2095, "step": 385 }, { "epoch": 0.20121196325014662, "grad_norm": 1.115542929205913, "learning_rate": 4.693915703902816e-06, "loss": 0.2222, "step": 386 }, { "epoch": 0.2017332377663387, "grad_norm": 1.0607550045367906, "learning_rate": 4.691845685421551e-06, "loss": 0.2103, "step": 387 }, { "epoch": 0.20225451228253077, "grad_norm": 1.0383685809934824, "learning_rate": 4.689769150463277e-06, "loss": 0.2196, "step": 388 }, { "epoch": 0.20277578679872288, "grad_norm": 1.1801982817474617, "learning_rate": 4.687686105201645e-06, "loss": 0.226, "step": 389 }, { "epoch": 0.20329706131491496, "grad_norm": 1.1154931717291245, "learning_rate": 4.685596555829664e-06, "loss": 0.2047, "step": 390 }, { "epoch": 0.20381833583110706, "grad_norm": 1.032527412329601, "learning_rate": 4.683500508559676e-06, "loss": 0.2083, "step": 391 }, { "epoch": 0.20433961034729914, "grad_norm": 1.2019006510748693, "learning_rate": 4.681397969623347e-06, "loss": 0.2065, "step": 392 }, { "epoch": 0.20486088486349124, "grad_norm": 1.1978210836606196, "learning_rate": 4.679288945271639e-06, "loss": 0.2444, "step": 393 }, { "epoch": 0.20538215937968332, "grad_norm": 1.0218474068950976, "learning_rate": 4.677173441774796e-06, "loss": 0.2236, "step": 394 }, { "epoch": 0.20590343389587543, "grad_norm": 1.1665523253520813, "learning_rate": 4.675051465422326e-06, "loss": 0.2337, "step": 395 }, { "epoch": 0.2064247084120675, "grad_norm": 1.132284782223571, "learning_rate": 4.6729230225229815e-06, "loss": 0.2195, "step": 396 }, { "epoch": 0.20694598292825958, "grad_norm": 1.193184872531427, "learning_rate": 4.670788119404739e-06, "loss": 0.2287, "step": 397 }, { "epoch": 0.20746725744445169, "grad_norm": 1.049891267841484, "learning_rate": 4.66864676241478e-06, "loss": 0.2071, "step": 398 }, { "epoch": 0.20798853196064376, "grad_norm": 1.0641364325724467, "learning_rate": 4.666498957919479e-06, "loss": 0.2168, "step": 399 }, { "epoch": 0.20850980647683587, "grad_norm": 1.0981712069511067, "learning_rate": 4.664344712304375e-06, "loss": 0.2176, "step": 400 }, { "epoch": 0.20903108099302795, "grad_norm": 1.1645379745244755, "learning_rate": 4.6621840319741576e-06, "loss": 0.2306, "step": 401 }, { "epoch": 0.20955235550922005, "grad_norm": 1.0935765067331227, "learning_rate": 4.660016923352648e-06, "loss": 0.219, "step": 402 }, { "epoch": 0.21007363002541213, "grad_norm": 1.0162370357327695, "learning_rate": 4.657843392882778e-06, "loss": 0.2164, "step": 403 }, { "epoch": 0.21059490454160423, "grad_norm": 1.0685832938670983, "learning_rate": 4.6556634470265725e-06, "loss": 0.2082, "step": 404 }, { "epoch": 0.2111161790577963, "grad_norm": 1.1207863558203524, "learning_rate": 4.6534770922651305e-06, "loss": 0.2209, "step": 405 }, { "epoch": 0.21163745357398842, "grad_norm": 1.2696785852584918, "learning_rate": 4.651284335098603e-06, "loss": 0.2335, "step": 406 }, { "epoch": 0.2121587280901805, "grad_norm": 1.1354128670205956, "learning_rate": 4.6490851820461785e-06, "loss": 0.2224, "step": 407 }, { "epoch": 0.21268000260637257, "grad_norm": 1.0975319534286776, "learning_rate": 4.646879639646058e-06, "loss": 0.212, "step": 408 }, { "epoch": 0.21320127712256468, "grad_norm": 1.095184660583565, "learning_rate": 4.64466771445544e-06, "loss": 0.2136, "step": 409 }, { "epoch": 0.21372255163875675, "grad_norm": 1.064612547677763, "learning_rate": 4.642449413050499e-06, "loss": 0.1937, "step": 410 }, { "epoch": 0.21424382615494886, "grad_norm": 1.1115362759791394, "learning_rate": 4.640224742026365e-06, "loss": 0.2254, "step": 411 }, { "epoch": 0.21476510067114093, "grad_norm": 1.0348862046404177, "learning_rate": 4.637993707997107e-06, "loss": 0.2112, "step": 412 }, { "epoch": 0.21528637518733304, "grad_norm": 1.0818172703394755, "learning_rate": 4.635756317595714e-06, "loss": 0.206, "step": 413 }, { "epoch": 0.21580764970352512, "grad_norm": 1.0260926284540925, "learning_rate": 4.6335125774740665e-06, "loss": 0.2076, "step": 414 }, { "epoch": 0.21632892421971722, "grad_norm": 1.0429305974716216, "learning_rate": 4.6312624943029275e-06, "loss": 0.2177, "step": 415 }, { "epoch": 0.2168501987359093, "grad_norm": 1.1280018957375995, "learning_rate": 4.629006074771918e-06, "loss": 0.2373, "step": 416 }, { "epoch": 0.21737147325210138, "grad_norm": 1.1275683844242368, "learning_rate": 4.626743325589496e-06, "loss": 0.211, "step": 417 }, { "epoch": 0.21789274776829348, "grad_norm": 1.07286210999913, "learning_rate": 4.624474253482938e-06, "loss": 0.2225, "step": 418 }, { "epoch": 0.21841402228448556, "grad_norm": 1.097889164710293, "learning_rate": 4.622198865198321e-06, "loss": 0.2151, "step": 419 }, { "epoch": 0.21893529680067766, "grad_norm": 1.1004095694814584, "learning_rate": 4.619917167500496e-06, "loss": 0.2089, "step": 420 }, { "epoch": 0.21945657131686974, "grad_norm": 1.1048350999571594, "learning_rate": 4.617629167173078e-06, "loss": 0.2266, "step": 421 }, { "epoch": 0.21997784583306185, "grad_norm": 1.0912873499468907, "learning_rate": 4.615334871018415e-06, "loss": 0.231, "step": 422 }, { "epoch": 0.22049912034925392, "grad_norm": 1.0408222898191029, "learning_rate": 4.613034285857575e-06, "loss": 0.1894, "step": 423 }, { "epoch": 0.22102039486544603, "grad_norm": 1.1377530366999036, "learning_rate": 4.610727418530324e-06, "loss": 0.2273, "step": 424 }, { "epoch": 0.2215416693816381, "grad_norm": 1.070826056404283, "learning_rate": 4.6084142758951055e-06, "loss": 0.2196, "step": 425 }, { "epoch": 0.22206294389783018, "grad_norm": 1.1473399605841177, "learning_rate": 4.606094864829016e-06, "loss": 0.2109, "step": 426 }, { "epoch": 0.2225842184140223, "grad_norm": 1.1432744764899827, "learning_rate": 4.603769192227795e-06, "loss": 0.2213, "step": 427 }, { "epoch": 0.22310549293021437, "grad_norm": 1.072285539296878, "learning_rate": 4.601437265005792e-06, "loss": 0.2166, "step": 428 }, { "epoch": 0.22362676744640647, "grad_norm": 1.1618829602109915, "learning_rate": 4.599099090095955e-06, "loss": 0.207, "step": 429 }, { "epoch": 0.22414804196259855, "grad_norm": 1.2500972677894722, "learning_rate": 4.5967546744498044e-06, "loss": 0.2193, "step": 430 }, { "epoch": 0.22466931647879065, "grad_norm": 1.1657729036776328, "learning_rate": 4.594404025037418e-06, "loss": 0.2113, "step": 431 }, { "epoch": 0.22519059099498273, "grad_norm": 1.249964132485446, "learning_rate": 4.592047148847404e-06, "loss": 0.2256, "step": 432 }, { "epoch": 0.22571186551117484, "grad_norm": 1.1644116306373535, "learning_rate": 4.589684052886884e-06, "loss": 0.2198, "step": 433 }, { "epoch": 0.2262331400273669, "grad_norm": 1.2065528009787465, "learning_rate": 4.587314744181471e-06, "loss": 0.231, "step": 434 }, { "epoch": 0.226754414543559, "grad_norm": 1.143504671323367, "learning_rate": 4.58493922977525e-06, "loss": 0.209, "step": 435 }, { "epoch": 0.2272756890597511, "grad_norm": 1.0472266892118371, "learning_rate": 4.582557516730755e-06, "loss": 0.2072, "step": 436 }, { "epoch": 0.22779696357594317, "grad_norm": 1.0361215331977902, "learning_rate": 4.58016961212895e-06, "loss": 0.196, "step": 437 }, { "epoch": 0.22831823809213528, "grad_norm": 1.1481051690496913, "learning_rate": 4.577775523069204e-06, "loss": 0.2277, "step": 438 }, { "epoch": 0.22883951260832736, "grad_norm": 1.2547761237172292, "learning_rate": 4.575375256669276e-06, "loss": 0.2407, "step": 439 }, { "epoch": 0.22936078712451946, "grad_norm": 0.9650748955192284, "learning_rate": 4.572968820065288e-06, "loss": 0.1848, "step": 440 }, { "epoch": 0.22988206164071154, "grad_norm": 1.1719657520615745, "learning_rate": 4.570556220411708e-06, "loss": 0.2163, "step": 441 }, { "epoch": 0.23040333615690364, "grad_norm": 1.2562943824109747, "learning_rate": 4.568137464881328e-06, "loss": 0.2093, "step": 442 }, { "epoch": 0.23092461067309572, "grad_norm": 1.0944459906859987, "learning_rate": 4.5657125606652385e-06, "loss": 0.2132, "step": 443 }, { "epoch": 0.2314458851892878, "grad_norm": 1.2155693533167746, "learning_rate": 4.563281514972814e-06, "loss": 0.2262, "step": 444 }, { "epoch": 0.2319671597054799, "grad_norm": 1.2337605671295995, "learning_rate": 4.560844335031684e-06, "loss": 0.2404, "step": 445 }, { "epoch": 0.23248843422167198, "grad_norm": 1.1757287729273904, "learning_rate": 4.55840102808772e-06, "loss": 0.2343, "step": 446 }, { "epoch": 0.23300970873786409, "grad_norm": 1.1150156127039372, "learning_rate": 4.555951601405005e-06, "loss": 0.2063, "step": 447 }, { "epoch": 0.23353098325405616, "grad_norm": 1.1301475531038054, "learning_rate": 4.55349606226582e-06, "loss": 0.2202, "step": 448 }, { "epoch": 0.23405225777024827, "grad_norm": 1.1388989584706561, "learning_rate": 4.551034417970616e-06, "loss": 0.2324, "step": 449 }, { "epoch": 0.23457353228644034, "grad_norm": 1.0575247489861925, "learning_rate": 4.548566675837996e-06, "loss": 0.2186, "step": 450 }, { "epoch": 0.23509480680263245, "grad_norm": 1.0204052134233565, "learning_rate": 4.546092843204694e-06, "loss": 0.2101, "step": 451 }, { "epoch": 0.23561608131882453, "grad_norm": 1.2251080937053365, "learning_rate": 4.543612927425547e-06, "loss": 0.2248, "step": 452 }, { "epoch": 0.2361373558350166, "grad_norm": 1.1642657736807371, "learning_rate": 4.541126935873481e-06, "loss": 0.2082, "step": 453 }, { "epoch": 0.2366586303512087, "grad_norm": 1.0770735004827978, "learning_rate": 4.538634875939486e-06, "loss": 0.2114, "step": 454 }, { "epoch": 0.2371799048674008, "grad_norm": 1.1584724096430439, "learning_rate": 4.536136755032592e-06, "loss": 0.2133, "step": 455 }, { "epoch": 0.2377011793835929, "grad_norm": 1.1493910338394913, "learning_rate": 4.5336325805798475e-06, "loss": 0.214, "step": 456 }, { "epoch": 0.23822245389978497, "grad_norm": 0.9889705744340543, "learning_rate": 4.5311223600263016e-06, "loss": 0.2074, "step": 457 }, { "epoch": 0.23874372841597707, "grad_norm": 1.0191092901261611, "learning_rate": 4.528606100834976e-06, "loss": 0.208, "step": 458 }, { "epoch": 0.23926500293216915, "grad_norm": 1.0516379799007636, "learning_rate": 4.526083810486848e-06, "loss": 0.2108, "step": 459 }, { "epoch": 0.23978627744836126, "grad_norm": 1.1330898073365478, "learning_rate": 4.523555496480824e-06, "loss": 0.2126, "step": 460 }, { "epoch": 0.24030755196455333, "grad_norm": 1.0796213356859856, "learning_rate": 4.5210211663337195e-06, "loss": 0.2218, "step": 461 }, { "epoch": 0.2408288264807454, "grad_norm": 1.0355251655845654, "learning_rate": 4.518480827580237e-06, "loss": 0.2115, "step": 462 }, { "epoch": 0.24135010099693752, "grad_norm": 1.153573794271069, "learning_rate": 4.515934487772942e-06, "loss": 0.2142, "step": 463 }, { "epoch": 0.2418713755131296, "grad_norm": 0.9815120251789137, "learning_rate": 4.513382154482242e-06, "loss": 0.198, "step": 464 }, { "epoch": 0.2423926500293217, "grad_norm": 1.1495741092058984, "learning_rate": 4.510823835296364e-06, "loss": 0.2272, "step": 465 }, { "epoch": 0.24291392454551378, "grad_norm": 1.102910606321348, "learning_rate": 4.50825953782133e-06, "loss": 0.2001, "step": 466 }, { "epoch": 0.24343519906170588, "grad_norm": 1.0645645708742124, "learning_rate": 4.505689269680937e-06, "loss": 0.2124, "step": 467 }, { "epoch": 0.24395647357789796, "grad_norm": 1.0189195608220585, "learning_rate": 4.503113038516732e-06, "loss": 0.2126, "step": 468 }, { "epoch": 0.24447774809409006, "grad_norm": 1.067814056299296, "learning_rate": 4.500530851987992e-06, "loss": 0.2225, "step": 469 }, { "epoch": 0.24499902261028214, "grad_norm": 1.061499241529969, "learning_rate": 4.4979427177716974e-06, "loss": 0.1993, "step": 470 }, { "epoch": 0.24552029712647422, "grad_norm": 1.0402414395794188, "learning_rate": 4.495348643562514e-06, "loss": 0.209, "step": 471 }, { "epoch": 0.24604157164266632, "grad_norm": 1.0418834782414723, "learning_rate": 4.4927486370727656e-06, "loss": 0.2205, "step": 472 }, { "epoch": 0.2465628461588584, "grad_norm": 1.0972428443151883, "learning_rate": 4.4901427060324135e-06, "loss": 0.2306, "step": 473 }, { "epoch": 0.2470841206750505, "grad_norm": 1.1155839876556997, "learning_rate": 4.487530858189033e-06, "loss": 0.2187, "step": 474 }, { "epoch": 0.24760539519124258, "grad_norm": 1.1300995264380476, "learning_rate": 4.4849131013077915e-06, "loss": 0.2314, "step": 475 }, { "epoch": 0.2481266697074347, "grad_norm": 1.028559227898691, "learning_rate": 4.482289443171421e-06, "loss": 0.2175, "step": 476 }, { "epoch": 0.24864794422362677, "grad_norm": 1.133823139878735, "learning_rate": 4.479659891580203e-06, "loss": 0.2151, "step": 477 }, { "epoch": 0.24916921873981887, "grad_norm": 1.1948210325504214, "learning_rate": 4.477024454351937e-06, "loss": 0.2243, "step": 478 }, { "epoch": 0.24969049325601095, "grad_norm": 1.1155669981436795, "learning_rate": 4.4743831393219215e-06, "loss": 0.2246, "step": 479 }, { "epoch": 0.25021176777220305, "grad_norm": 1.0870863630319363, "learning_rate": 4.471735954342932e-06, "loss": 0.2194, "step": 480 }, { "epoch": 0.2507330422883951, "grad_norm": 1.021947377733924, "learning_rate": 4.469082907285192e-06, "loss": 0.214, "step": 481 }, { "epoch": 0.2512543168045872, "grad_norm": 1.0696327117947075, "learning_rate": 4.4664240060363565e-06, "loss": 0.2195, "step": 482 }, { "epoch": 0.2517755913207793, "grad_norm": 1.080457437808401, "learning_rate": 4.463759258501485e-06, "loss": 0.2058, "step": 483 }, { "epoch": 0.2522968658369714, "grad_norm": 1.165235624176743, "learning_rate": 4.461088672603015e-06, "loss": 0.2198, "step": 484 }, { "epoch": 0.25281814035316347, "grad_norm": 1.1277534771234747, "learning_rate": 4.458412256280747e-06, "loss": 0.225, "step": 485 }, { "epoch": 0.2533394148693556, "grad_norm": 1.0599584663873574, "learning_rate": 4.455730017491812e-06, "loss": 0.2006, "step": 486 }, { "epoch": 0.2538606893855477, "grad_norm": 1.0249989199079323, "learning_rate": 4.453041964210653e-06, "loss": 0.2041, "step": 487 }, { "epoch": 0.2543819639017397, "grad_norm": 1.0814394140585897, "learning_rate": 4.450348104428998e-06, "loss": 0.2215, "step": 488 }, { "epoch": 0.25490323841793183, "grad_norm": 1.0858602425123827, "learning_rate": 4.447648446155841e-06, "loss": 0.222, "step": 489 }, { "epoch": 0.25542451293412394, "grad_norm": 1.0278147977821745, "learning_rate": 4.4449429974174115e-06, "loss": 0.2114, "step": 490 }, { "epoch": 0.25594578745031604, "grad_norm": 0.9653873167735169, "learning_rate": 4.442231766257159e-06, "loss": 0.1928, "step": 491 }, { "epoch": 0.2564670619665081, "grad_norm": 1.0141617541982602, "learning_rate": 4.43951476073572e-06, "loss": 0.2, "step": 492 }, { "epoch": 0.2569883364827002, "grad_norm": 1.0782517271477747, "learning_rate": 4.436791988930901e-06, "loss": 0.2182, "step": 493 }, { "epoch": 0.2575096109988923, "grad_norm": 1.0883211480385289, "learning_rate": 4.434063458937652e-06, "loss": 0.2096, "step": 494 }, { "epoch": 0.2580308855150844, "grad_norm": 1.0827340273369845, "learning_rate": 4.43132917886804e-06, "loss": 0.2257, "step": 495 }, { "epoch": 0.25855216003127646, "grad_norm": 1.0410081603932166, "learning_rate": 4.428589156851231e-06, "loss": 0.2181, "step": 496 }, { "epoch": 0.25907343454746856, "grad_norm": 1.0541847604693613, "learning_rate": 4.42584340103346e-06, "loss": 0.2162, "step": 497 }, { "epoch": 0.25959470906366067, "grad_norm": 1.143645122965762, "learning_rate": 4.423091919578008e-06, "loss": 0.2301, "step": 498 }, { "epoch": 0.2601159835798527, "grad_norm": 1.0639180365437577, "learning_rate": 4.4203347206651805e-06, "loss": 0.2355, "step": 499 }, { "epoch": 0.2606372580960448, "grad_norm": 0.9614473295811211, "learning_rate": 4.417571812492279e-06, "loss": 0.1954, "step": 500 }, { "epoch": 0.2606372580960448, "eval_loss": 0.21339768171310425, "eval_runtime": 52.4959, "eval_samples_per_second": 23.64, "eval_steps_per_second": 2.972, "step": 500 }, { "epoch": 0.2611585326122369, "grad_norm": 1.0841579950020523, "learning_rate": 4.4148032032735835e-06, "loss": 0.2233, "step": 501 }, { "epoch": 0.26167980712842903, "grad_norm": 1.0055568844787404, "learning_rate": 4.4120289012403185e-06, "loss": 0.205, "step": 502 }, { "epoch": 0.2622010816446211, "grad_norm": 1.0687056061935238, "learning_rate": 4.409248914640636e-06, "loss": 0.2156, "step": 503 }, { "epoch": 0.2627223561608132, "grad_norm": 1.0771229976941505, "learning_rate": 4.4064632517395875e-06, "loss": 0.2247, "step": 504 }, { "epoch": 0.2632436306770053, "grad_norm": 1.0481049423895688, "learning_rate": 4.4036719208191025e-06, "loss": 0.2234, "step": 505 }, { "epoch": 0.26376490519319734, "grad_norm": 1.162506231563762, "learning_rate": 4.400874930177959e-06, "loss": 0.2219, "step": 506 }, { "epoch": 0.26428617970938945, "grad_norm": 1.0912372610620908, "learning_rate": 4.398072288131763e-06, "loss": 0.1922, "step": 507 }, { "epoch": 0.26480745422558155, "grad_norm": 1.0867839888631574, "learning_rate": 4.395264003012924e-06, "loss": 0.2175, "step": 508 }, { "epoch": 0.26532872874177366, "grad_norm": 1.1805618069379247, "learning_rate": 4.392450083170625e-06, "loss": 0.218, "step": 509 }, { "epoch": 0.2658500032579657, "grad_norm": 1.0889255347942146, "learning_rate": 4.389630536970806e-06, "loss": 0.2064, "step": 510 }, { "epoch": 0.2663712777741578, "grad_norm": 1.1116894634969319, "learning_rate": 4.386805372796129e-06, "loss": 0.2181, "step": 511 }, { "epoch": 0.2668925522903499, "grad_norm": 1.0586197466332472, "learning_rate": 4.383974599045963e-06, "loss": 0.2093, "step": 512 }, { "epoch": 0.267413826806542, "grad_norm": 1.0145935887387179, "learning_rate": 4.3811382241363545e-06, "loss": 0.2113, "step": 513 }, { "epoch": 0.26793510132273407, "grad_norm": 1.0435500323197133, "learning_rate": 4.378296256499998e-06, "loss": 0.2103, "step": 514 }, { "epoch": 0.2684563758389262, "grad_norm": 1.0479347125521619, "learning_rate": 4.375448704586221e-06, "loss": 0.208, "step": 515 }, { "epoch": 0.2689776503551183, "grad_norm": 1.0710861477938967, "learning_rate": 4.37259557686095e-06, "loss": 0.2001, "step": 516 }, { "epoch": 0.26949892487131033, "grad_norm": 1.0438111670635168, "learning_rate": 4.369736881806691e-06, "loss": 0.2155, "step": 517 }, { "epoch": 0.27002019938750244, "grad_norm": 1.071314827976162, "learning_rate": 4.366872627922498e-06, "loss": 0.2049, "step": 518 }, { "epoch": 0.27054147390369454, "grad_norm": 1.1105126190630343, "learning_rate": 4.364002823723956e-06, "loss": 0.2161, "step": 519 }, { "epoch": 0.27106274841988665, "grad_norm": 1.1128881857208017, "learning_rate": 4.36112747774315e-06, "loss": 0.2178, "step": 520 }, { "epoch": 0.2715840229360787, "grad_norm": 1.127620184184549, "learning_rate": 4.358246598528641e-06, "loss": 0.2008, "step": 521 }, { "epoch": 0.2721052974522708, "grad_norm": 1.10004250359709, "learning_rate": 4.355360194645439e-06, "loss": 0.1985, "step": 522 }, { "epoch": 0.2726265719684629, "grad_norm": 1.0272279568989093, "learning_rate": 4.35246827467498e-06, "loss": 0.2138, "step": 523 }, { "epoch": 0.27314784648465495, "grad_norm": 1.106632586603704, "learning_rate": 4.349570847215104e-06, "loss": 0.2155, "step": 524 }, { "epoch": 0.27366912100084706, "grad_norm": 1.194736618683367, "learning_rate": 4.346667920880016e-06, "loss": 0.2197, "step": 525 }, { "epoch": 0.27419039551703916, "grad_norm": 1.0801091301725214, "learning_rate": 4.343759504300278e-06, "loss": 0.2074, "step": 526 }, { "epoch": 0.27471167003323127, "grad_norm": 1.0614194685397207, "learning_rate": 4.34084560612277e-06, "loss": 0.2136, "step": 527 }, { "epoch": 0.2752329445494233, "grad_norm": 1.0531834029204565, "learning_rate": 4.337926235010672e-06, "loss": 0.2053, "step": 528 }, { "epoch": 0.2757542190656154, "grad_norm": 1.1801906881402193, "learning_rate": 4.335001399643433e-06, "loss": 0.2194, "step": 529 }, { "epoch": 0.27627549358180753, "grad_norm": 1.0644262272232463, "learning_rate": 4.332071108716747e-06, "loss": 0.211, "step": 530 }, { "epoch": 0.27679676809799963, "grad_norm": 1.0440684724636122, "learning_rate": 4.329135370942531e-06, "loss": 0.2066, "step": 531 }, { "epoch": 0.2773180426141917, "grad_norm": 1.0480131480389971, "learning_rate": 4.326194195048894e-06, "loss": 0.2125, "step": 532 }, { "epoch": 0.2778393171303838, "grad_norm": 1.0574201215221624, "learning_rate": 4.323247589780111e-06, "loss": 0.2073, "step": 533 }, { "epoch": 0.2783605916465759, "grad_norm": 1.2097127844915045, "learning_rate": 4.320295563896601e-06, "loss": 0.2035, "step": 534 }, { "epoch": 0.27888186616276794, "grad_norm": 1.040708567287013, "learning_rate": 4.317338126174899e-06, "loss": 0.2262, "step": 535 }, { "epoch": 0.27940314067896005, "grad_norm": 1.0670609705734364, "learning_rate": 4.314375285407629e-06, "loss": 0.2178, "step": 536 }, { "epoch": 0.27992441519515215, "grad_norm": 1.0480234772110812, "learning_rate": 4.311407050403479e-06, "loss": 0.1976, "step": 537 }, { "epoch": 0.28044568971134426, "grad_norm": 1.0042327964128128, "learning_rate": 4.308433429987172e-06, "loss": 0.2111, "step": 538 }, { "epoch": 0.2809669642275363, "grad_norm": 1.138894159548414, "learning_rate": 4.305454432999445e-06, "loss": 0.2085, "step": 539 }, { "epoch": 0.2814882387437284, "grad_norm": 1.0516652868157126, "learning_rate": 4.302470068297019e-06, "loss": 0.2132, "step": 540 }, { "epoch": 0.2820095132599205, "grad_norm": 1.1303509110253085, "learning_rate": 4.2994803447525735e-06, "loss": 0.2119, "step": 541 }, { "epoch": 0.28253078777611257, "grad_norm": 0.990081843922128, "learning_rate": 4.29648527125472e-06, "loss": 0.1951, "step": 542 }, { "epoch": 0.2830520622923047, "grad_norm": 0.9709694070302939, "learning_rate": 4.2934848567079745e-06, "loss": 0.2034, "step": 543 }, { "epoch": 0.2835733368084968, "grad_norm": 1.1024012313744909, "learning_rate": 4.290479110032735e-06, "loss": 0.2195, "step": 544 }, { "epoch": 0.2840946113246889, "grad_norm": 0.9752023861970188, "learning_rate": 4.28746804016525e-06, "loss": 0.2013, "step": 545 }, { "epoch": 0.28461588584088093, "grad_norm": 1.051607491477118, "learning_rate": 4.284451656057595e-06, "loss": 0.2244, "step": 546 }, { "epoch": 0.28513716035707304, "grad_norm": 1.0474182907791254, "learning_rate": 4.281429966677644e-06, "loss": 0.2199, "step": 547 }, { "epoch": 0.28565843487326514, "grad_norm": 1.0251588658877104, "learning_rate": 4.2784029810090456e-06, "loss": 0.2042, "step": 548 }, { "epoch": 0.28617970938945725, "grad_norm": 1.046698681370147, "learning_rate": 4.275370708051194e-06, "loss": 0.2119, "step": 549 }, { "epoch": 0.2867009839056493, "grad_norm": 1.0824173335506142, "learning_rate": 4.2723331568192004e-06, "loss": 0.2214, "step": 550 }, { "epoch": 0.2872222584218414, "grad_norm": 1.0746888933028043, "learning_rate": 4.269290336343873e-06, "loss": 0.2169, "step": 551 }, { "epoch": 0.2877435329380335, "grad_norm": 1.0756144814428263, "learning_rate": 4.266242255671681e-06, "loss": 0.2117, "step": 552 }, { "epoch": 0.28826480745422556, "grad_norm": 1.0690384595853772, "learning_rate": 4.2631889238647375e-06, "loss": 0.2108, "step": 553 }, { "epoch": 0.28878608197041766, "grad_norm": 1.0994939303823907, "learning_rate": 4.260130350000763e-06, "loss": 0.2121, "step": 554 }, { "epoch": 0.28930735648660977, "grad_norm": 1.1317609730907072, "learning_rate": 4.257066543173064e-06, "loss": 0.2157, "step": 555 }, { "epoch": 0.2898286310028019, "grad_norm": 1.1807722421341038, "learning_rate": 4.253997512490507e-06, "loss": 0.2138, "step": 556 }, { "epoch": 0.2903499055189939, "grad_norm": 1.0766939997287335, "learning_rate": 4.250923267077489e-06, "loss": 0.2251, "step": 557 }, { "epoch": 0.290871180035186, "grad_norm": 1.015097333754172, "learning_rate": 4.247843816073909e-06, "loss": 0.2168, "step": 558 }, { "epoch": 0.29139245455137813, "grad_norm": 1.0611808878431113, "learning_rate": 4.2447591686351406e-06, "loss": 0.2151, "step": 559 }, { "epoch": 0.2919137290675702, "grad_norm": 1.081321925415324, "learning_rate": 4.2416693339320115e-06, "loss": 0.2137, "step": 560 }, { "epoch": 0.2924350035837623, "grad_norm": 1.0361316285970896, "learning_rate": 4.238574321150769e-06, "loss": 0.2001, "step": 561 }, { "epoch": 0.2929562780999544, "grad_norm": 1.0732683873731856, "learning_rate": 4.235474139493055e-06, "loss": 0.2126, "step": 562 }, { "epoch": 0.2934775526161465, "grad_norm": 1.042382014216991, "learning_rate": 4.23236879817588e-06, "loss": 0.2198, "step": 563 }, { "epoch": 0.29399882713233855, "grad_norm": 1.05888474945225, "learning_rate": 4.229258306431592e-06, "loss": 0.2026, "step": 564 }, { "epoch": 0.29452010164853065, "grad_norm": 1.096124873852566, "learning_rate": 4.226142673507852e-06, "loss": 0.2081, "step": 565 }, { "epoch": 0.29504137616472276, "grad_norm": 1.0846747928553437, "learning_rate": 4.22302190866761e-06, "loss": 0.2082, "step": 566 }, { "epoch": 0.29556265068091486, "grad_norm": 1.034256519119133, "learning_rate": 4.219896021189067e-06, "loss": 0.2058, "step": 567 }, { "epoch": 0.2960839251971069, "grad_norm": 1.0958509973103114, "learning_rate": 4.2167650203656605e-06, "loss": 0.2076, "step": 568 }, { "epoch": 0.296605199713299, "grad_norm": 1.0219802672482143, "learning_rate": 4.213628915506025e-06, "loss": 0.2098, "step": 569 }, { "epoch": 0.2971264742294911, "grad_norm": 1.0725760126979913, "learning_rate": 4.210487715933973e-06, "loss": 0.2004, "step": 570 }, { "epoch": 0.29764774874568317, "grad_norm": 1.1092050995305374, "learning_rate": 4.207341430988461e-06, "loss": 0.2179, "step": 571 }, { "epoch": 0.2981690232618753, "grad_norm": 1.0116440915769094, "learning_rate": 4.204190070023567e-06, "loss": 0.2103, "step": 572 }, { "epoch": 0.2986902977780674, "grad_norm": 1.0820906649909003, "learning_rate": 4.2010336424084596e-06, "loss": 0.2108, "step": 573 }, { "epoch": 0.2992115722942595, "grad_norm": 1.150843702137832, "learning_rate": 4.19787215752737e-06, "loss": 0.2114, "step": 574 }, { "epoch": 0.29973284681045154, "grad_norm": 1.0751806915419084, "learning_rate": 4.194705624779566e-06, "loss": 0.2185, "step": 575 }, { "epoch": 0.30025412132664364, "grad_norm": 1.0417628113570552, "learning_rate": 4.191534053579322e-06, "loss": 0.2115, "step": 576 }, { "epoch": 0.30077539584283575, "grad_norm": 1.2325769173664691, "learning_rate": 4.188357453355893e-06, "loss": 0.205, "step": 577 }, { "epoch": 0.3012966703590278, "grad_norm": 1.1461454843205805, "learning_rate": 4.1851758335534844e-06, "loss": 0.2223, "step": 578 }, { "epoch": 0.3018179448752199, "grad_norm": 1.0194539603948622, "learning_rate": 4.181989203631227e-06, "loss": 0.1904, "step": 579 }, { "epoch": 0.302339219391412, "grad_norm": 1.082325223841357, "learning_rate": 4.178797573063144e-06, "loss": 0.2093, "step": 580 }, { "epoch": 0.3028604939076041, "grad_norm": 1.0988623753150655, "learning_rate": 4.175600951338129e-06, "loss": 0.2124, "step": 581 }, { "epoch": 0.30338176842379616, "grad_norm": 1.1122185167686438, "learning_rate": 4.172399347959912e-06, "loss": 0.2293, "step": 582 }, { "epoch": 0.30390304293998827, "grad_norm": 1.1104488075802776, "learning_rate": 4.169192772447036e-06, "loss": 0.2094, "step": 583 }, { "epoch": 0.30442431745618037, "grad_norm": 1.1498498606575311, "learning_rate": 4.1659812343328246e-06, "loss": 0.2083, "step": 584 }, { "epoch": 0.3049455919723725, "grad_norm": 1.073342259019356, "learning_rate": 4.162764743165355e-06, "loss": 0.2062, "step": 585 }, { "epoch": 0.3054668664885645, "grad_norm": 1.0622125095205464, "learning_rate": 4.1595433085074334e-06, "loss": 0.2024, "step": 586 }, { "epoch": 0.30598814100475663, "grad_norm": 1.0668772967693896, "learning_rate": 4.156316939936559e-06, "loss": 0.2212, "step": 587 }, { "epoch": 0.30650941552094874, "grad_norm": 1.0675400676528097, "learning_rate": 4.153085647044904e-06, "loss": 0.2141, "step": 588 }, { "epoch": 0.3070306900371408, "grad_norm": 1.0197324992856283, "learning_rate": 4.149849439439277e-06, "loss": 0.1918, "step": 589 }, { "epoch": 0.3075519645533329, "grad_norm": 1.0208947839202611, "learning_rate": 4.146608326741101e-06, "loss": 0.1979, "step": 590 }, { "epoch": 0.308073239069525, "grad_norm": 1.0241897621000515, "learning_rate": 4.1433623185863805e-06, "loss": 0.2222, "step": 591 }, { "epoch": 0.3085945135857171, "grad_norm": 0.9958526236943049, "learning_rate": 4.140111424625676e-06, "loss": 0.2026, "step": 592 }, { "epoch": 0.30911578810190915, "grad_norm": 1.1010645029579753, "learning_rate": 4.1368556545240724e-06, "loss": 0.2111, "step": 593 }, { "epoch": 0.30963706261810126, "grad_norm": 1.020348361008944, "learning_rate": 4.133595017961152e-06, "loss": 0.2121, "step": 594 }, { "epoch": 0.31015833713429336, "grad_norm": 1.0634652896832968, "learning_rate": 4.130329524630966e-06, "loss": 0.2092, "step": 595 }, { "epoch": 0.3106796116504854, "grad_norm": 1.22594320649436, "learning_rate": 4.127059184242004e-06, "loss": 0.1888, "step": 596 }, { "epoch": 0.3112008861666775, "grad_norm": 1.084220606584145, "learning_rate": 4.123784006517166e-06, "loss": 0.2041, "step": 597 }, { "epoch": 0.3117221606828696, "grad_norm": 1.2076936308911805, "learning_rate": 4.120504001193737e-06, "loss": 0.2247, "step": 598 }, { "epoch": 0.3122434351990617, "grad_norm": 1.110425661526799, "learning_rate": 4.117219178023349e-06, "loss": 0.2187, "step": 599 }, { "epoch": 0.3127647097152538, "grad_norm": 1.0165222874522115, "learning_rate": 4.113929546771963e-06, "loss": 0.2212, "step": 600 }, { "epoch": 0.3132859842314459, "grad_norm": 1.01907832542922, "learning_rate": 4.1106351172198325e-06, "loss": 0.2059, "step": 601 }, { "epoch": 0.313807258747638, "grad_norm": 1.1113618660877311, "learning_rate": 4.1073358991614745e-06, "loss": 0.2056, "step": 602 }, { "epoch": 0.3143285332638301, "grad_norm": 1.061394484619993, "learning_rate": 4.1040319024056465e-06, "loss": 0.2157, "step": 603 }, { "epoch": 0.31484980778002214, "grad_norm": 1.0770711874914694, "learning_rate": 4.10072313677531e-06, "loss": 0.2267, "step": 604 }, { "epoch": 0.31537108229621424, "grad_norm": 1.1000479766093059, "learning_rate": 4.0974096121076076e-06, "loss": 0.2274, "step": 605 }, { "epoch": 0.31589235681240635, "grad_norm": 1.030867505172163, "learning_rate": 4.094091338253829e-06, "loss": 0.2042, "step": 606 }, { "epoch": 0.3164136313285984, "grad_norm": 1.0592868986180395, "learning_rate": 4.0907683250793814e-06, "loss": 0.1994, "step": 607 }, { "epoch": 0.3169349058447905, "grad_norm": 1.0149014356934085, "learning_rate": 4.0874405824637676e-06, "loss": 0.2021, "step": 608 }, { "epoch": 0.3174561803609826, "grad_norm": 1.0683812405742192, "learning_rate": 4.084108120300546e-06, "loss": 0.2047, "step": 609 }, { "epoch": 0.3179774548771747, "grad_norm": 1.1387780132206826, "learning_rate": 4.080770948497311e-06, "loss": 0.1883, "step": 610 }, { "epoch": 0.31849872939336676, "grad_norm": 1.1221245908523543, "learning_rate": 4.077429076975655e-06, "loss": 0.2095, "step": 611 }, { "epoch": 0.31902000390955887, "grad_norm": 1.0282758937773733, "learning_rate": 4.074082515671145e-06, "loss": 0.2145, "step": 612 }, { "epoch": 0.319541278425751, "grad_norm": 1.110630040223085, "learning_rate": 4.070731274533291e-06, "loss": 0.2049, "step": 613 }, { "epoch": 0.320062552941943, "grad_norm": 1.056095149339451, "learning_rate": 4.067375363525516e-06, "loss": 0.2137, "step": 614 }, { "epoch": 0.32058382745813513, "grad_norm": 1.0650979149433963, "learning_rate": 4.064014792625126e-06, "loss": 0.2003, "step": 615 }, { "epoch": 0.32110510197432723, "grad_norm": 1.099715369531199, "learning_rate": 4.060649571823284e-06, "loss": 0.2091, "step": 616 }, { "epoch": 0.32162637649051934, "grad_norm": 0.9805781333526467, "learning_rate": 4.057279711124973e-06, "loss": 0.2096, "step": 617 }, { "epoch": 0.3221476510067114, "grad_norm": 1.155776870795669, "learning_rate": 4.053905220548974e-06, "loss": 0.2212, "step": 618 }, { "epoch": 0.3226689255229035, "grad_norm": 1.0538004320056853, "learning_rate": 4.050526110127832e-06, "loss": 0.1766, "step": 619 }, { "epoch": 0.3231902000390956, "grad_norm": 1.0618488101877996, "learning_rate": 4.047142389907827e-06, "loss": 0.2056, "step": 620 }, { "epoch": 0.3237114745552877, "grad_norm": 1.0565037231122896, "learning_rate": 4.043754069948944e-06, "loss": 0.2175, "step": 621 }, { "epoch": 0.32423274907147975, "grad_norm": 1.0734547383525697, "learning_rate": 4.040361160324844e-06, "loss": 0.2108, "step": 622 }, { "epoch": 0.32475402358767186, "grad_norm": 1.1404843494285408, "learning_rate": 4.036963671122831e-06, "loss": 0.1984, "step": 623 }, { "epoch": 0.32527529810386396, "grad_norm": 1.0409356970425339, "learning_rate": 4.033561612443829e-06, "loss": 0.2186, "step": 624 }, { "epoch": 0.325796572620056, "grad_norm": 1.0275010010679515, "learning_rate": 4.030154994402341e-06, "loss": 0.202, "step": 625 }, { "epoch": 0.3263178471362481, "grad_norm": 1.0483642396947566, "learning_rate": 4.0267438271264304e-06, "loss": 0.1998, "step": 626 }, { "epoch": 0.3268391216524402, "grad_norm": 1.2195252537996888, "learning_rate": 4.023328120757685e-06, "loss": 0.2196, "step": 627 }, { "epoch": 0.32736039616863233, "grad_norm": 1.0662345771718675, "learning_rate": 4.019907885451184e-06, "loss": 0.2065, "step": 628 }, { "epoch": 0.3278816706848244, "grad_norm": 1.0183412109857055, "learning_rate": 4.016483131375476e-06, "loss": 0.1978, "step": 629 }, { "epoch": 0.3284029452010165, "grad_norm": 1.1051721887857624, "learning_rate": 4.01305386871254e-06, "loss": 0.2181, "step": 630 }, { "epoch": 0.3289242197172086, "grad_norm": 1.0671144442819984, "learning_rate": 4.009620107657763e-06, "loss": 0.2189, "step": 631 }, { "epoch": 0.32944549423340064, "grad_norm": 1.0747517554303343, "learning_rate": 4.006181858419905e-06, "loss": 0.203, "step": 632 }, { "epoch": 0.32996676874959274, "grad_norm": 0.9833069358480406, "learning_rate": 4.002739131221066e-06, "loss": 0.1977, "step": 633 }, { "epoch": 0.33048804326578485, "grad_norm": 1.075564747056274, "learning_rate": 3.999291936296664e-06, "loss": 0.2003, "step": 634 }, { "epoch": 0.33100931778197695, "grad_norm": 1.0211300435597885, "learning_rate": 3.995840283895399e-06, "loss": 0.1925, "step": 635 }, { "epoch": 0.331530592298169, "grad_norm": 1.055480781342074, "learning_rate": 3.99238418427922e-06, "loss": 0.2083, "step": 636 }, { "epoch": 0.3320518668143611, "grad_norm": 1.1455013890415642, "learning_rate": 3.988923647723301e-06, "loss": 0.2245, "step": 637 }, { "epoch": 0.3325731413305532, "grad_norm": 1.138804936681144, "learning_rate": 3.9854586845160055e-06, "loss": 0.2097, "step": 638 }, { "epoch": 0.3330944158467453, "grad_norm": 1.0391589155707512, "learning_rate": 3.981989304958861e-06, "loss": 0.1979, "step": 639 }, { "epoch": 0.33361569036293737, "grad_norm": 0.9846771488041198, "learning_rate": 3.978515519366519e-06, "loss": 0.191, "step": 640 }, { "epoch": 0.33413696487912947, "grad_norm": 1.160705167196543, "learning_rate": 3.975037338066736e-06, "loss": 0.2269, "step": 641 }, { "epoch": 0.3346582393953216, "grad_norm": 1.0341541389157343, "learning_rate": 3.9715547714003355e-06, "loss": 0.212, "step": 642 }, { "epoch": 0.3351795139115136, "grad_norm": 0.9669202329689861, "learning_rate": 3.968067829721178e-06, "loss": 0.1926, "step": 643 }, { "epoch": 0.33570078842770573, "grad_norm": 1.0646552233485294, "learning_rate": 3.96457652339613e-06, "loss": 0.2213, "step": 644 }, { "epoch": 0.33622206294389784, "grad_norm": 1.0267029973506987, "learning_rate": 3.961080862805039e-06, "loss": 0.2033, "step": 645 }, { "epoch": 0.33674333746008994, "grad_norm": 1.0386343031224388, "learning_rate": 3.9575808583406926e-06, "loss": 0.1999, "step": 646 }, { "epoch": 0.337264611976282, "grad_norm": 1.0609243483484754, "learning_rate": 3.954076520408796e-06, "loss": 0.2067, "step": 647 }, { "epoch": 0.3377858864924741, "grad_norm": 1.0156303481124798, "learning_rate": 3.950567859427938e-06, "loss": 0.1966, "step": 648 }, { "epoch": 0.3383071610086662, "grad_norm": 1.0521948883806702, "learning_rate": 3.947054885829559e-06, "loss": 0.1943, "step": 649 }, { "epoch": 0.33882843552485825, "grad_norm": 1.1434498924585015, "learning_rate": 3.943537610057921e-06, "loss": 0.2166, "step": 650 }, { "epoch": 0.33934971004105036, "grad_norm": 1.1505038673012769, "learning_rate": 3.940016042570079e-06, "loss": 0.2182, "step": 651 }, { "epoch": 0.33987098455724246, "grad_norm": 1.0440410659240964, "learning_rate": 3.936490193835843e-06, "loss": 0.2093, "step": 652 }, { "epoch": 0.34039225907343457, "grad_norm": 1.1208632283728355, "learning_rate": 3.932960074337755e-06, "loss": 0.2269, "step": 653 }, { "epoch": 0.3409135335896266, "grad_norm": 1.0861275993743262, "learning_rate": 3.929425694571055e-06, "loss": 0.2035, "step": 654 }, { "epoch": 0.3414348081058187, "grad_norm": 1.0290460624684266, "learning_rate": 3.925887065043643e-06, "loss": 0.2122, "step": 655 }, { "epoch": 0.3419560826220108, "grad_norm": 1.0884709264769998, "learning_rate": 3.922344196276063e-06, "loss": 0.2139, "step": 656 }, { "epoch": 0.34247735713820293, "grad_norm": 1.0532577216421508, "learning_rate": 3.918797098801453e-06, "loss": 0.2027, "step": 657 }, { "epoch": 0.342998631654395, "grad_norm": 1.0704967165140549, "learning_rate": 3.915245783165531e-06, "loss": 0.1975, "step": 658 }, { "epoch": 0.3435199061705871, "grad_norm": 1.1395825307815826, "learning_rate": 3.911690259926551e-06, "loss": 0.21, "step": 659 }, { "epoch": 0.3440411806867792, "grad_norm": 1.0578163243072147, "learning_rate": 3.908130539655278e-06, "loss": 0.2029, "step": 660 }, { "epoch": 0.34456245520297124, "grad_norm": 1.1280961630838429, "learning_rate": 3.904566632934955e-06, "loss": 0.2265, "step": 661 }, { "epoch": 0.34508372971916335, "grad_norm": 1.0471018683165267, "learning_rate": 3.900998550361271e-06, "loss": 0.2064, "step": 662 }, { "epoch": 0.34560500423535545, "grad_norm": 1.051587787475786, "learning_rate": 3.897426302542331e-06, "loss": 0.2009, "step": 663 }, { "epoch": 0.34612627875154756, "grad_norm": 1.07096724118746, "learning_rate": 3.893849900098623e-06, "loss": 0.1984, "step": 664 }, { "epoch": 0.3466475532677396, "grad_norm": 1.1311625175456812, "learning_rate": 3.890269353662987e-06, "loss": 0.2042, "step": 665 }, { "epoch": 0.3471688277839317, "grad_norm": 1.0603390750522113, "learning_rate": 3.886684673880583e-06, "loss": 0.2077, "step": 666 }, { "epoch": 0.3476901023001238, "grad_norm": 1.0002435941995667, "learning_rate": 3.8830958714088595e-06, "loss": 0.2036, "step": 667 }, { "epoch": 0.34821137681631587, "grad_norm": 1.09095658907915, "learning_rate": 3.879502956917524e-06, "loss": 0.2034, "step": 668 }, { "epoch": 0.34873265133250797, "grad_norm": 1.1310309501196332, "learning_rate": 3.875905941088505e-06, "loss": 0.2075, "step": 669 }, { "epoch": 0.3492539258487001, "grad_norm": 1.0418279096661451, "learning_rate": 3.872304834615929e-06, "loss": 0.1985, "step": 670 }, { "epoch": 0.3497752003648922, "grad_norm": 0.9684063606430465, "learning_rate": 3.868699648206081e-06, "loss": 0.1895, "step": 671 }, { "epoch": 0.35029647488108423, "grad_norm": 1.1204415636477791, "learning_rate": 3.8650903925773795e-06, "loss": 0.207, "step": 672 }, { "epoch": 0.35081774939727633, "grad_norm": 1.1243132412029433, "learning_rate": 3.861477078460337e-06, "loss": 0.2111, "step": 673 }, { "epoch": 0.35133902391346844, "grad_norm": 1.0979496907292703, "learning_rate": 3.857859716597534e-06, "loss": 0.2148, "step": 674 }, { "epoch": 0.35186029842966055, "grad_norm": 1.0443405543071218, "learning_rate": 3.854238317743586e-06, "loss": 0.2196, "step": 675 }, { "epoch": 0.3523815729458526, "grad_norm": 1.0762635593895937, "learning_rate": 3.8506128926651095e-06, "loss": 0.2105, "step": 676 }, { "epoch": 0.3529028474620447, "grad_norm": 1.0338762149355627, "learning_rate": 3.846983452140689e-06, "loss": 0.2132, "step": 677 }, { "epoch": 0.3534241219782368, "grad_norm": 1.0074710371606315, "learning_rate": 3.843350006960852e-06, "loss": 0.2053, "step": 678 }, { "epoch": 0.35394539649442885, "grad_norm": 1.0126329998732087, "learning_rate": 3.83971256792803e-06, "loss": 0.2102, "step": 679 }, { "epoch": 0.35446667101062096, "grad_norm": 1.0068494471356129, "learning_rate": 3.836071145856526e-06, "loss": 0.2021, "step": 680 }, { "epoch": 0.35498794552681306, "grad_norm": 1.0269809005666712, "learning_rate": 3.832425751572488e-06, "loss": 0.2034, "step": 681 }, { "epoch": 0.35550922004300517, "grad_norm": 0.9716053014003645, "learning_rate": 3.828776395913872e-06, "loss": 0.1993, "step": 682 }, { "epoch": 0.3560304945591972, "grad_norm": 1.0117095527596758, "learning_rate": 3.825123089730413e-06, "loss": 0.2096, "step": 683 }, { "epoch": 0.3565517690753893, "grad_norm": 1.0729107646685034, "learning_rate": 3.821465843883588e-06, "loss": 0.1915, "step": 684 }, { "epoch": 0.35707304359158143, "grad_norm": 1.0320361681933137, "learning_rate": 3.81780466924659e-06, "loss": 0.1905, "step": 685 }, { "epoch": 0.3575943181077735, "grad_norm": 0.9715658008355695, "learning_rate": 3.814139576704291e-06, "loss": 0.1969, "step": 686 }, { "epoch": 0.3581155926239656, "grad_norm": 1.0932036907834042, "learning_rate": 3.810470577153212e-06, "loss": 0.2115, "step": 687 }, { "epoch": 0.3586368671401577, "grad_norm": 1.029912994292444, "learning_rate": 3.8067976815014885e-06, "loss": 0.1997, "step": 688 }, { "epoch": 0.3591581416563498, "grad_norm": 1.0811060169767797, "learning_rate": 3.8031209006688397e-06, "loss": 0.2077, "step": 689 }, { "epoch": 0.35967941617254184, "grad_norm": 1.0421585599821286, "learning_rate": 3.7994402455865375e-06, "loss": 0.2079, "step": 690 }, { "epoch": 0.36020069068873395, "grad_norm": 1.0377992818502029, "learning_rate": 3.795755727197368e-06, "loss": 0.2017, "step": 691 }, { "epoch": 0.36072196520492605, "grad_norm": 1.0065660374049659, "learning_rate": 3.7920673564556083e-06, "loss": 0.2221, "step": 692 }, { "epoch": 0.36124323972111816, "grad_norm": 0.979282912950048, "learning_rate": 3.788375144326985e-06, "loss": 0.1955, "step": 693 }, { "epoch": 0.3617645142373102, "grad_norm": 1.0026392413250227, "learning_rate": 3.784679101788647e-06, "loss": 0.2116, "step": 694 }, { "epoch": 0.3622857887535023, "grad_norm": 0.9986548278232529, "learning_rate": 3.78097923982913e-06, "loss": 0.214, "step": 695 }, { "epoch": 0.3628070632696944, "grad_norm": 0.9585609430319894, "learning_rate": 3.7772755694483265e-06, "loss": 0.1905, "step": 696 }, { "epoch": 0.36332833778588647, "grad_norm": 1.0401759307528902, "learning_rate": 3.7735681016574504e-06, "loss": 0.1971, "step": 697 }, { "epoch": 0.3638496123020786, "grad_norm": 1.0360631930811912, "learning_rate": 3.7698568474790064e-06, "loss": 0.1996, "step": 698 }, { "epoch": 0.3643708868182707, "grad_norm": 1.0178019461687884, "learning_rate": 3.766141817946757e-06, "loss": 0.2161, "step": 699 }, { "epoch": 0.3648921613344628, "grad_norm": 0.9971708011450119, "learning_rate": 3.7624230241056854e-06, "loss": 0.2008, "step": 700 }, { "epoch": 0.36541343585065483, "grad_norm": 1.1371013279219415, "learning_rate": 3.7587004770119716e-06, "loss": 0.2293, "step": 701 }, { "epoch": 0.36593471036684694, "grad_norm": 1.0637851468087032, "learning_rate": 3.7549741877329504e-06, "loss": 0.2015, "step": 702 }, { "epoch": 0.36645598488303904, "grad_norm": 1.0182788467476143, "learning_rate": 3.7512441673470836e-06, "loss": 0.2097, "step": 703 }, { "epoch": 0.3669772593992311, "grad_norm": 0.9873903086378643, "learning_rate": 3.747510426943925e-06, "loss": 0.1945, "step": 704 }, { "epoch": 0.3674985339154232, "grad_norm": 1.066187464643768, "learning_rate": 3.7437729776240894e-06, "loss": 0.2033, "step": 705 }, { "epoch": 0.3680198084316153, "grad_norm": 1.0109132335088629, "learning_rate": 3.740031830499219e-06, "loss": 0.187, "step": 706 }, { "epoch": 0.3685410829478074, "grad_norm": 1.087352920662628, "learning_rate": 3.7362869966919467e-06, "loss": 0.2017, "step": 707 }, { "epoch": 0.36906235746399946, "grad_norm": 1.0923626972489895, "learning_rate": 3.7325384873358695e-06, "loss": 0.2112, "step": 708 }, { "epoch": 0.36958363198019156, "grad_norm": 1.078860234772305, "learning_rate": 3.7287863135755098e-06, "loss": 0.2117, "step": 709 }, { "epoch": 0.37010490649638367, "grad_norm": 1.1449021033180053, "learning_rate": 3.7250304865662857e-06, "loss": 0.1984, "step": 710 }, { "epoch": 0.3706261810125758, "grad_norm": 1.0614686156604147, "learning_rate": 3.7212710174744753e-06, "loss": 0.2079, "step": 711 }, { "epoch": 0.3711474555287678, "grad_norm": 1.037377272701006, "learning_rate": 3.7175079174771872e-06, "loss": 0.2022, "step": 712 }, { "epoch": 0.3716687300449599, "grad_norm": 1.0516583337554275, "learning_rate": 3.713741197762323e-06, "loss": 0.2009, "step": 713 }, { "epoch": 0.37219000456115203, "grad_norm": 1.0268348370799318, "learning_rate": 3.7099708695285436e-06, "loss": 0.1955, "step": 714 }, { "epoch": 0.3727112790773441, "grad_norm": 0.9891691053873646, "learning_rate": 3.706196943985245e-06, "loss": 0.198, "step": 715 }, { "epoch": 0.3732325535935362, "grad_norm": 1.0507470048468373, "learning_rate": 3.7024194323525115e-06, "loss": 0.2183, "step": 716 }, { "epoch": 0.3737538281097283, "grad_norm": 1.0441076323714367, "learning_rate": 3.6986383458610925e-06, "loss": 0.1982, "step": 717 }, { "epoch": 0.3742751026259204, "grad_norm": 0.9343537658012983, "learning_rate": 3.694853695752364e-06, "loss": 0.1944, "step": 718 }, { "epoch": 0.37479637714211245, "grad_norm": 0.9997766034036779, "learning_rate": 3.6910654932782984e-06, "loss": 0.2017, "step": 719 }, { "epoch": 0.37531765165830455, "grad_norm": 1.028401152906009, "learning_rate": 3.6872737497014286e-06, "loss": 0.2002, "step": 720 }, { "epoch": 0.37583892617449666, "grad_norm": 1.0087687788014903, "learning_rate": 3.6834784762948156e-06, "loss": 0.1982, "step": 721 }, { "epoch": 0.3763602006906887, "grad_norm": 0.9997736649547044, "learning_rate": 3.6796796843420134e-06, "loss": 0.2071, "step": 722 }, { "epoch": 0.3768814752068808, "grad_norm": 0.9721162464369297, "learning_rate": 3.67587738513704e-06, "loss": 0.1884, "step": 723 }, { "epoch": 0.3774027497230729, "grad_norm": 1.0304064094853025, "learning_rate": 3.672071589984337e-06, "loss": 0.1995, "step": 724 }, { "epoch": 0.377924024239265, "grad_norm": 1.0589375873312883, "learning_rate": 3.6682623101987423e-06, "loss": 0.1981, "step": 725 }, { "epoch": 0.37844529875545707, "grad_norm": 0.9710396112430536, "learning_rate": 3.664449557105454e-06, "loss": 0.2, "step": 726 }, { "epoch": 0.3789665732716492, "grad_norm": 1.0522926935967352, "learning_rate": 3.6606333420399933e-06, "loss": 0.2091, "step": 727 }, { "epoch": 0.3794878477878413, "grad_norm": 1.0852598572018897, "learning_rate": 3.6568136763481766e-06, "loss": 0.2079, "step": 728 }, { "epoch": 0.3800091223040334, "grad_norm": 1.0492806894049111, "learning_rate": 3.652990571386079e-06, "loss": 0.2074, "step": 729 }, { "epoch": 0.38053039682022544, "grad_norm": 1.0085185599351503, "learning_rate": 3.649164038520001e-06, "loss": 0.1935, "step": 730 }, { "epoch": 0.38105167133641754, "grad_norm": 1.0857364772225881, "learning_rate": 3.6453340891264344e-06, "loss": 0.2052, "step": 731 }, { "epoch": 0.38157294585260965, "grad_norm": 1.1224287433484814, "learning_rate": 3.641500734592026e-06, "loss": 0.205, "step": 732 }, { "epoch": 0.3820942203688017, "grad_norm": 1.155690013988157, "learning_rate": 3.63766398631355e-06, "loss": 0.2281, "step": 733 }, { "epoch": 0.3826154948849938, "grad_norm": 0.9944633480040708, "learning_rate": 3.633823855697869e-06, "loss": 0.1834, "step": 734 }, { "epoch": 0.3831367694011859, "grad_norm": 1.016517755609509, "learning_rate": 3.6299803541619e-06, "loss": 0.2101, "step": 735 }, { "epoch": 0.383658043917378, "grad_norm": 0.965254600627704, "learning_rate": 3.6261334931325833e-06, "loss": 0.1819, "step": 736 }, { "epoch": 0.38417931843357006, "grad_norm": 1.0509533639633712, "learning_rate": 3.622283284046847e-06, "loss": 0.1969, "step": 737 }, { "epoch": 0.38470059294976217, "grad_norm": 1.0273270859307988, "learning_rate": 3.618429738351574e-06, "loss": 0.1955, "step": 738 }, { "epoch": 0.38522186746595427, "grad_norm": 1.0828612063888188, "learning_rate": 3.6145728675035658e-06, "loss": 0.2083, "step": 739 }, { "epoch": 0.3857431419821463, "grad_norm": 1.037973214218959, "learning_rate": 3.6107126829695093e-06, "loss": 0.1959, "step": 740 }, { "epoch": 0.3862644164983384, "grad_norm": 0.9740301727879055, "learning_rate": 3.6068491962259457e-06, "loss": 0.1981, "step": 741 }, { "epoch": 0.38678569101453053, "grad_norm": 0.9675934276994784, "learning_rate": 3.6029824187592304e-06, "loss": 0.1905, "step": 742 }, { "epoch": 0.38730696553072264, "grad_norm": 0.9985279581622839, "learning_rate": 3.599112362065506e-06, "loss": 0.1946, "step": 743 }, { "epoch": 0.3878282400469147, "grad_norm": 1.0065347198646675, "learning_rate": 3.5952390376506614e-06, "loss": 0.1982, "step": 744 }, { "epoch": 0.3883495145631068, "grad_norm": 1.0684404764047237, "learning_rate": 3.591362457030302e-06, "loss": 0.2241, "step": 745 }, { "epoch": 0.3888707890792989, "grad_norm": 1.0386178441721692, "learning_rate": 3.5874826317297135e-06, "loss": 0.2158, "step": 746 }, { "epoch": 0.389392063595491, "grad_norm": 1.1467068947450896, "learning_rate": 3.5835995732838292e-06, "loss": 0.2187, "step": 747 }, { "epoch": 0.38991333811168305, "grad_norm": 0.9978113760767837, "learning_rate": 3.579713293237194e-06, "loss": 0.1909, "step": 748 }, { "epoch": 0.39043461262787515, "grad_norm": 1.0577765356298126, "learning_rate": 3.5758238031439306e-06, "loss": 0.2048, "step": 749 }, { "epoch": 0.39095588714406726, "grad_norm": 1.0270887257797516, "learning_rate": 3.5719311145677057e-06, "loss": 0.195, "step": 750 }, { "epoch": 0.3914771616602593, "grad_norm": 1.0201002842834375, "learning_rate": 3.5680352390816942e-06, "loss": 0.2078, "step": 751 }, { "epoch": 0.3919984361764514, "grad_norm": 1.069239425622886, "learning_rate": 3.5641361882685487e-06, "loss": 0.2055, "step": 752 }, { "epoch": 0.3925197106926435, "grad_norm": 0.999820669699393, "learning_rate": 3.5602339737203593e-06, "loss": 0.2047, "step": 753 }, { "epoch": 0.3930409852088356, "grad_norm": 1.0700111846850282, "learning_rate": 3.5563286070386237e-06, "loss": 0.2145, "step": 754 }, { "epoch": 0.3935622597250277, "grad_norm": 1.1382464786486353, "learning_rate": 3.5524200998342095e-06, "loss": 0.2122, "step": 755 }, { "epoch": 0.3940835342412198, "grad_norm": 1.072823077451666, "learning_rate": 3.5485084637273225e-06, "loss": 0.1982, "step": 756 }, { "epoch": 0.3946048087574119, "grad_norm": 1.0198723117083601, "learning_rate": 3.5445937103474713e-06, "loss": 0.2047, "step": 757 }, { "epoch": 0.39512608327360393, "grad_norm": 0.9377945180547989, "learning_rate": 3.5406758513334316e-06, "loss": 0.194, "step": 758 }, { "epoch": 0.39564735778979604, "grad_norm": 1.054102417249867, "learning_rate": 3.536754898333211e-06, "loss": 0.2125, "step": 759 }, { "epoch": 0.39616863230598814, "grad_norm": 1.0932943063554317, "learning_rate": 3.532830863004018e-06, "loss": 0.2018, "step": 760 }, { "epoch": 0.39668990682218025, "grad_norm": 1.0453261209434133, "learning_rate": 3.5289037570122246e-06, "loss": 0.1793, "step": 761 }, { "epoch": 0.3972111813383723, "grad_norm": 1.085250928485593, "learning_rate": 3.5249735920333312e-06, "loss": 0.2215, "step": 762 }, { "epoch": 0.3977324558545644, "grad_norm": 1.0207448526376532, "learning_rate": 3.521040379751933e-06, "loss": 0.212, "step": 763 }, { "epoch": 0.3982537303707565, "grad_norm": 1.06545180546958, "learning_rate": 3.517104131861685e-06, "loss": 0.2093, "step": 764 }, { "epoch": 0.3987750048869486, "grad_norm": 1.015376416999948, "learning_rate": 3.513164860065267e-06, "loss": 0.2047, "step": 765 }, { "epoch": 0.39929627940314066, "grad_norm": 1.0885519954151794, "learning_rate": 3.509222576074349e-06, "loss": 0.2164, "step": 766 }, { "epoch": 0.39981755391933277, "grad_norm": 1.0221092011588429, "learning_rate": 3.5052772916095584e-06, "loss": 0.197, "step": 767 }, { "epoch": 0.4003388284355249, "grad_norm": 1.0074564985446492, "learning_rate": 3.50132901840044e-06, "loss": 0.1848, "step": 768 }, { "epoch": 0.4008601029517169, "grad_norm": 1.124944009301868, "learning_rate": 3.4973777681854265e-06, "loss": 0.227, "step": 769 }, { "epoch": 0.40138137746790903, "grad_norm": 0.9933224411295778, "learning_rate": 3.4934235527118e-06, "loss": 0.2007, "step": 770 }, { "epoch": 0.40190265198410113, "grad_norm": 1.1140553396131367, "learning_rate": 3.4894663837356607e-06, "loss": 0.2062, "step": 771 }, { "epoch": 0.40242392650029324, "grad_norm": 1.07900121401784, "learning_rate": 3.485506273021887e-06, "loss": 0.2174, "step": 772 }, { "epoch": 0.4029452010164853, "grad_norm": 1.0396822700390023, "learning_rate": 3.4815432323441043e-06, "loss": 0.2265, "step": 773 }, { "epoch": 0.4034664755326774, "grad_norm": 1.0063749840646985, "learning_rate": 3.47757727348465e-06, "loss": 0.2058, "step": 774 }, { "epoch": 0.4039877500488695, "grad_norm": 1.0351010851739715, "learning_rate": 3.4736084082345355e-06, "loss": 0.2093, "step": 775 }, { "epoch": 0.40450902456506155, "grad_norm": 0.9589611297953635, "learning_rate": 3.4696366483934156e-06, "loss": 0.2004, "step": 776 }, { "epoch": 0.40503029908125365, "grad_norm": 1.052474173402005, "learning_rate": 3.465662005769548e-06, "loss": 0.205, "step": 777 }, { "epoch": 0.40555157359744576, "grad_norm": 0.9712723540245317, "learning_rate": 3.461684492179763e-06, "loss": 0.195, "step": 778 }, { "epoch": 0.40607284811363786, "grad_norm": 1.0335824947872163, "learning_rate": 3.4577041194494253e-06, "loss": 0.1955, "step": 779 }, { "epoch": 0.4065941226298299, "grad_norm": 0.9782579425499615, "learning_rate": 3.4537208994124015e-06, "loss": 0.1829, "step": 780 }, { "epoch": 0.407115397146022, "grad_norm": 1.011734636363808, "learning_rate": 3.449734843911022e-06, "loss": 0.2034, "step": 781 }, { "epoch": 0.4076366716622141, "grad_norm": 0.9975323909301046, "learning_rate": 3.4457459647960477e-06, "loss": 0.208, "step": 782 }, { "epoch": 0.40815794617840623, "grad_norm": 1.004285481609286, "learning_rate": 3.441754273926634e-06, "loss": 0.1993, "step": 783 }, { "epoch": 0.4086792206945983, "grad_norm": 1.0152260185286432, "learning_rate": 3.4377597831702962e-06, "loss": 0.202, "step": 784 }, { "epoch": 0.4092004952107904, "grad_norm": 1.029762874421123, "learning_rate": 3.433762504402874e-06, "loss": 0.1923, "step": 785 }, { "epoch": 0.4097217697269825, "grad_norm": 1.0259363482780564, "learning_rate": 3.429762449508495e-06, "loss": 0.1877, "step": 786 }, { "epoch": 0.41024304424317454, "grad_norm": 0.9708063683587468, "learning_rate": 3.425759630379541e-06, "loss": 0.1939, "step": 787 }, { "epoch": 0.41076431875936664, "grad_norm": 1.0125745012513352, "learning_rate": 3.421754058916612e-06, "loss": 0.1932, "step": 788 }, { "epoch": 0.41128559327555875, "grad_norm": 1.0539642303638106, "learning_rate": 3.4177457470284916e-06, "loss": 0.2186, "step": 789 }, { "epoch": 0.41180686779175085, "grad_norm": 1.0106759073395906, "learning_rate": 3.4137347066321097e-06, "loss": 0.2025, "step": 790 }, { "epoch": 0.4123281423079429, "grad_norm": 1.047283205204026, "learning_rate": 3.4097209496525087e-06, "loss": 0.2136, "step": 791 }, { "epoch": 0.412849416824135, "grad_norm": 1.0288785017939832, "learning_rate": 3.4057044880228064e-06, "loss": 0.1987, "step": 792 }, { "epoch": 0.4133706913403271, "grad_norm": 1.100836923285089, "learning_rate": 3.4016853336841638e-06, "loss": 0.2165, "step": 793 }, { "epoch": 0.41389196585651916, "grad_norm": 1.07422690447585, "learning_rate": 3.397663498585747e-06, "loss": 0.2091, "step": 794 }, { "epoch": 0.41441324037271127, "grad_norm": 0.9652362696809133, "learning_rate": 3.39363899468469e-06, "loss": 0.1996, "step": 795 }, { "epoch": 0.41493451488890337, "grad_norm": 0.9534402042629027, "learning_rate": 3.3896118339460635e-06, "loss": 0.1855, "step": 796 }, { "epoch": 0.4154557894050955, "grad_norm": 1.0497663522609355, "learning_rate": 3.385582028342837e-06, "loss": 0.2087, "step": 797 }, { "epoch": 0.4159770639212875, "grad_norm": 1.0863225389912796, "learning_rate": 3.3815495898558424e-06, "loss": 0.2102, "step": 798 }, { "epoch": 0.41649833843747963, "grad_norm": 0.9880252641524678, "learning_rate": 3.377514530473739e-06, "loss": 0.1834, "step": 799 }, { "epoch": 0.41701961295367174, "grad_norm": 1.0933466253103774, "learning_rate": 3.3734768621929805e-06, "loss": 0.1974, "step": 800 }, { "epoch": 0.41754088746986384, "grad_norm": 1.0467600398315684, "learning_rate": 3.369436597017774e-06, "loss": 0.1978, "step": 801 }, { "epoch": 0.4180621619860559, "grad_norm": 1.0397173376484643, "learning_rate": 3.3653937469600483e-06, "loss": 0.2013, "step": 802 }, { "epoch": 0.418583436502248, "grad_norm": 0.9815507411522406, "learning_rate": 3.361348324039419e-06, "loss": 0.1954, "step": 803 }, { "epoch": 0.4191047110184401, "grad_norm": 0.9774944041763228, "learning_rate": 3.3573003402831487e-06, "loss": 0.1963, "step": 804 }, { "epoch": 0.41962598553463215, "grad_norm": 1.1198416721294486, "learning_rate": 3.353249807726115e-06, "loss": 0.2094, "step": 805 }, { "epoch": 0.42014726005082426, "grad_norm": 1.0790995136876083, "learning_rate": 3.349196738410771e-06, "loss": 0.2095, "step": 806 }, { "epoch": 0.42066853456701636, "grad_norm": 1.0111985446162226, "learning_rate": 3.3451411443871145e-06, "loss": 0.2022, "step": 807 }, { "epoch": 0.42118980908320847, "grad_norm": 1.1014100834244736, "learning_rate": 3.341083037712649e-06, "loss": 0.2184, "step": 808 }, { "epoch": 0.4217110835994005, "grad_norm": 1.001885982166375, "learning_rate": 3.337022430452346e-06, "loss": 0.1967, "step": 809 }, { "epoch": 0.4222323581155926, "grad_norm": 0.9377950692781114, "learning_rate": 3.3329593346786125e-06, "loss": 0.1877, "step": 810 }, { "epoch": 0.4227536326317847, "grad_norm": 1.096886871024171, "learning_rate": 3.328893762471255e-06, "loss": 0.2083, "step": 811 }, { "epoch": 0.42327490714797683, "grad_norm": 1.0227673076035053, "learning_rate": 3.324825725917442e-06, "loss": 0.1969, "step": 812 }, { "epoch": 0.4237961816641689, "grad_norm": 1.129454857460604, "learning_rate": 3.320755237111669e-06, "loss": 0.2024, "step": 813 }, { "epoch": 0.424317456180361, "grad_norm": 1.0491617495855319, "learning_rate": 3.316682308155721e-06, "loss": 0.2061, "step": 814 }, { "epoch": 0.4248387306965531, "grad_norm": 1.1595335723790747, "learning_rate": 3.312606951158638e-06, "loss": 0.2134, "step": 815 }, { "epoch": 0.42536000521274514, "grad_norm": 1.1081813559550924, "learning_rate": 3.308529178236679e-06, "loss": 0.1918, "step": 816 }, { "epoch": 0.42588127972893725, "grad_norm": 1.0946308608610222, "learning_rate": 3.304449001513287e-06, "loss": 0.1989, "step": 817 }, { "epoch": 0.42640255424512935, "grad_norm": 1.075048101820695, "learning_rate": 3.3003664331190487e-06, "loss": 0.1989, "step": 818 }, { "epoch": 0.42692382876132146, "grad_norm": 1.0734187102713864, "learning_rate": 3.296281485191665e-06, "loss": 0.1978, "step": 819 }, { "epoch": 0.4274451032775135, "grad_norm": 1.130789104429466, "learning_rate": 3.292194169875908e-06, "loss": 0.215, "step": 820 }, { "epoch": 0.4279663777937056, "grad_norm": 1.0291523352304348, "learning_rate": 3.2881044993235893e-06, "loss": 0.2011, "step": 821 }, { "epoch": 0.4284876523098977, "grad_norm": 1.0057993810989465, "learning_rate": 3.284012485693524e-06, "loss": 0.2068, "step": 822 }, { "epoch": 0.42900892682608976, "grad_norm": 1.046396865883086, "learning_rate": 3.279918141151492e-06, "loss": 0.2105, "step": 823 }, { "epoch": 0.42953020134228187, "grad_norm": 0.9922656623291862, "learning_rate": 3.2758214778702026e-06, "loss": 0.1845, "step": 824 }, { "epoch": 0.430051475858474, "grad_norm": 1.0565367939693868, "learning_rate": 3.2717225080292598e-06, "loss": 0.2034, "step": 825 }, { "epoch": 0.4305727503746661, "grad_norm": 1.058708383674746, "learning_rate": 3.2676212438151256e-06, "loss": 0.1993, "step": 826 }, { "epoch": 0.43109402489085813, "grad_norm": 0.9815918898679389, "learning_rate": 3.2635176974210824e-06, "loss": 0.1974, "step": 827 }, { "epoch": 0.43161529940705023, "grad_norm": 1.1067842143201054, "learning_rate": 3.2594118810471982e-06, "loss": 0.2066, "step": 828 }, { "epoch": 0.43213657392324234, "grad_norm": 1.0059867992897973, "learning_rate": 3.2553038069002885e-06, "loss": 0.2067, "step": 829 }, { "epoch": 0.43265784843943444, "grad_norm": 1.0777338938551524, "learning_rate": 3.2511934871938825e-06, "loss": 0.2033, "step": 830 }, { "epoch": 0.4331791229556265, "grad_norm": 1.104533496737417, "learning_rate": 3.247080934148186e-06, "loss": 0.199, "step": 831 }, { "epoch": 0.4337003974718186, "grad_norm": 1.0144271815214954, "learning_rate": 3.242966159990044e-06, "loss": 0.201, "step": 832 }, { "epoch": 0.4342216719880107, "grad_norm": 1.0028965037997253, "learning_rate": 3.238849176952904e-06, "loss": 0.1937, "step": 833 }, { "epoch": 0.43474294650420275, "grad_norm": 1.0804865893646987, "learning_rate": 3.2347299972767824e-06, "loss": 0.1908, "step": 834 }, { "epoch": 0.43526422102039486, "grad_norm": 1.1742256638405077, "learning_rate": 3.230608633208225e-06, "loss": 0.1956, "step": 835 }, { "epoch": 0.43578549553658696, "grad_norm": 1.0960493756055618, "learning_rate": 3.226485097000273e-06, "loss": 0.195, "step": 836 }, { "epoch": 0.43630677005277907, "grad_norm": 1.1756701100327795, "learning_rate": 3.2223594009124247e-06, "loss": 0.2202, "step": 837 }, { "epoch": 0.4368280445689711, "grad_norm": 0.962954960634096, "learning_rate": 3.2182315572105995e-06, "loss": 0.1916, "step": 838 }, { "epoch": 0.4373493190851632, "grad_norm": 1.096035560134522, "learning_rate": 3.2141015781671025e-06, "loss": 0.2037, "step": 839 }, { "epoch": 0.43787059360135533, "grad_norm": 1.1022461563067543, "learning_rate": 3.209969476060587e-06, "loss": 0.1969, "step": 840 }, { "epoch": 0.4383918681175474, "grad_norm": 1.060989135942518, "learning_rate": 3.2058352631760198e-06, "loss": 0.216, "step": 841 }, { "epoch": 0.4389131426337395, "grad_norm": 1.0795040679735655, "learning_rate": 3.2016989518046397e-06, "loss": 0.1972, "step": 842 }, { "epoch": 0.4394344171499316, "grad_norm": 1.01155187615578, "learning_rate": 3.1975605542439276e-06, "loss": 0.1958, "step": 843 }, { "epoch": 0.4399556916661237, "grad_norm": 0.960593686621856, "learning_rate": 3.1934200827975654e-06, "loss": 0.2063, "step": 844 }, { "epoch": 0.44047696618231574, "grad_norm": 1.052348833707456, "learning_rate": 3.1892775497754014e-06, "loss": 0.1977, "step": 845 }, { "epoch": 0.44099824069850785, "grad_norm": 1.122152577144792, "learning_rate": 3.1851329674934116e-06, "loss": 0.2075, "step": 846 }, { "epoch": 0.44151951521469995, "grad_norm": 1.0257719745756821, "learning_rate": 3.1809863482736663e-06, "loss": 0.1982, "step": 847 }, { "epoch": 0.44204078973089206, "grad_norm": 0.9494321503502252, "learning_rate": 3.176837704444291e-06, "loss": 0.1961, "step": 848 }, { "epoch": 0.4425620642470841, "grad_norm": 1.1099523946512315, "learning_rate": 3.1726870483394312e-06, "loss": 0.2122, "step": 849 }, { "epoch": 0.4430833387632762, "grad_norm": 1.073386369078645, "learning_rate": 3.168534392299214e-06, "loss": 0.2044, "step": 850 }, { "epoch": 0.4436046132794683, "grad_norm": 0.9859176069316815, "learning_rate": 3.1643797486697116e-06, "loss": 0.1938, "step": 851 }, { "epoch": 0.44412588779566037, "grad_norm": 0.9386602709194345, "learning_rate": 3.1602231298029074e-06, "loss": 0.1754, "step": 852 }, { "epoch": 0.4446471623118525, "grad_norm": 0.9948808105890651, "learning_rate": 3.1560645480566566e-06, "loss": 0.1934, "step": 853 }, { "epoch": 0.4451684368280446, "grad_norm": 1.0814129445631278, "learning_rate": 3.15190401579465e-06, "loss": 0.2066, "step": 854 }, { "epoch": 0.4456897113442367, "grad_norm": 0.9719582900510382, "learning_rate": 3.1477415453863772e-06, "loss": 0.184, "step": 855 }, { "epoch": 0.44621098586042873, "grad_norm": 0.9329010427938074, "learning_rate": 3.143577149207091e-06, "loss": 0.1899, "step": 856 }, { "epoch": 0.44673226037662084, "grad_norm": 0.9921945412172163, "learning_rate": 3.139410839637767e-06, "loss": 0.1962, "step": 857 }, { "epoch": 0.44725353489281294, "grad_norm": 1.0485901926630037, "learning_rate": 3.135242629065073e-06, "loss": 0.2018, "step": 858 }, { "epoch": 0.447774809409005, "grad_norm": 1.0379569766914887, "learning_rate": 3.131072529881326e-06, "loss": 0.2001, "step": 859 }, { "epoch": 0.4482960839251971, "grad_norm": 1.0305130275174221, "learning_rate": 3.126900554484459e-06, "loss": 0.1917, "step": 860 }, { "epoch": 0.4488173584413892, "grad_norm": 1.0437881640236402, "learning_rate": 3.122726715277983e-06, "loss": 0.1945, "step": 861 }, { "epoch": 0.4493386329575813, "grad_norm": 0.9799689803224048, "learning_rate": 3.1185510246709487e-06, "loss": 0.1887, "step": 862 }, { "epoch": 0.44985990747377336, "grad_norm": 1.0533266466186548, "learning_rate": 3.1143734950779155e-06, "loss": 0.201, "step": 863 }, { "epoch": 0.45038118198996546, "grad_norm": 1.004136035506477, "learning_rate": 3.1101941389189045e-06, "loss": 0.1978, "step": 864 }, { "epoch": 0.45090245650615757, "grad_norm": 1.0401781545267712, "learning_rate": 3.106012968619371e-06, "loss": 0.2033, "step": 865 }, { "epoch": 0.4514237310223497, "grad_norm": 1.0734661861751236, "learning_rate": 3.1018299966101624e-06, "loss": 0.2013, "step": 866 }, { "epoch": 0.4519450055385417, "grad_norm": 1.0052701698748918, "learning_rate": 3.097645235327483e-06, "loss": 0.187, "step": 867 }, { "epoch": 0.4524662800547338, "grad_norm": 1.0362538608122422, "learning_rate": 3.0934586972128574e-06, "loss": 0.197, "step": 868 }, { "epoch": 0.45298755457092593, "grad_norm": 1.060588394413901, "learning_rate": 3.0892703947130914e-06, "loss": 0.184, "step": 869 }, { "epoch": 0.453508829087118, "grad_norm": 1.1062613356935505, "learning_rate": 3.085080340280239e-06, "loss": 0.1827, "step": 870 }, { "epoch": 0.4540301036033101, "grad_norm": 0.983203748560144, "learning_rate": 3.0808885463715584e-06, "loss": 0.1935, "step": 871 }, { "epoch": 0.4545513781195022, "grad_norm": 0.9737122112212313, "learning_rate": 3.076695025449484e-06, "loss": 0.1976, "step": 872 }, { "epoch": 0.4550726526356943, "grad_norm": 1.0014155379159426, "learning_rate": 3.072499789981582e-06, "loss": 0.1962, "step": 873 }, { "epoch": 0.45559392715188635, "grad_norm": 1.0341459320964297, "learning_rate": 3.068302852440517e-06, "loss": 0.2036, "step": 874 }, { "epoch": 0.45611520166807845, "grad_norm": 1.0894399997153617, "learning_rate": 3.064104225304013e-06, "loss": 0.2023, "step": 875 }, { "epoch": 0.45663647618427056, "grad_norm": 1.0494724208473707, "learning_rate": 3.059903921054818e-06, "loss": 0.2098, "step": 876 }, { "epoch": 0.4571577507004626, "grad_norm": 0.9897158428149795, "learning_rate": 3.0557019521806667e-06, "loss": 0.1967, "step": 877 }, { "epoch": 0.4576790252166547, "grad_norm": 1.0264984042834973, "learning_rate": 3.0514983311742426e-06, "loss": 0.1978, "step": 878 }, { "epoch": 0.4582002997328468, "grad_norm": 1.0461074830254988, "learning_rate": 3.04729307053314e-06, "loss": 0.2004, "step": 879 }, { "epoch": 0.4587215742490389, "grad_norm": 1.051395189628469, "learning_rate": 3.0430861827598277e-06, "loss": 0.2061, "step": 880 }, { "epoch": 0.45924284876523097, "grad_norm": 0.9590732108822055, "learning_rate": 3.0388776803616138e-06, "loss": 0.1846, "step": 881 }, { "epoch": 0.4597641232814231, "grad_norm": 1.0023302855138927, "learning_rate": 3.034667575850607e-06, "loss": 0.186, "step": 882 }, { "epoch": 0.4602853977976152, "grad_norm": 1.0649714751792867, "learning_rate": 3.0304558817436767e-06, "loss": 0.2038, "step": 883 }, { "epoch": 0.4608066723138073, "grad_norm": 1.0031794140387544, "learning_rate": 3.02624261056242e-06, "loss": 0.1934, "step": 884 }, { "epoch": 0.46132794682999934, "grad_norm": 0.9583181588579257, "learning_rate": 3.0220277748331223e-06, "loss": 0.1846, "step": 885 }, { "epoch": 0.46184922134619144, "grad_norm": 1.10019469721715, "learning_rate": 3.017811387086721e-06, "loss": 0.1889, "step": 886 }, { "epoch": 0.46237049586238355, "grad_norm": 1.0282877540715634, "learning_rate": 3.013593459858767e-06, "loss": 0.176, "step": 887 }, { "epoch": 0.4628917703785756, "grad_norm": 0.9807399162917946, "learning_rate": 3.0093740056893882e-06, "loss": 0.1887, "step": 888 }, { "epoch": 0.4634130448947677, "grad_norm": 0.9486313009420617, "learning_rate": 3.005153037123253e-06, "loss": 0.1909, "step": 889 }, { "epoch": 0.4639343194109598, "grad_norm": 1.1228149064440365, "learning_rate": 3.000930566709531e-06, "loss": 0.1968, "step": 890 }, { "epoch": 0.4644555939271519, "grad_norm": 0.9838279802105361, "learning_rate": 2.996706607001858e-06, "loss": 0.1984, "step": 891 }, { "epoch": 0.46497686844334396, "grad_norm": 1.0285867670220825, "learning_rate": 2.9924811705582966e-06, "loss": 0.1898, "step": 892 }, { "epoch": 0.46549814295953607, "grad_norm": 1.0748194071023658, "learning_rate": 2.988254269941302e-06, "loss": 0.2047, "step": 893 }, { "epoch": 0.46601941747572817, "grad_norm": 0.9802690541168912, "learning_rate": 2.984025917717678e-06, "loss": 0.1903, "step": 894 }, { "epoch": 0.4665406919919202, "grad_norm": 1.0156827732220404, "learning_rate": 2.979796126458548e-06, "loss": 0.1996, "step": 895 }, { "epoch": 0.4670619665081123, "grad_norm": 1.036042762325025, "learning_rate": 2.975564908739313e-06, "loss": 0.1927, "step": 896 }, { "epoch": 0.46758324102430443, "grad_norm": 0.9790007561286332, "learning_rate": 2.9713322771396147e-06, "loss": 0.1947, "step": 897 }, { "epoch": 0.46810451554049654, "grad_norm": 0.9961185982546604, "learning_rate": 2.967098244243297e-06, "loss": 0.1938, "step": 898 }, { "epoch": 0.4686257900566886, "grad_norm": 0.9990853654605351, "learning_rate": 2.962862822638372e-06, "loss": 0.2085, "step": 899 }, { "epoch": 0.4691470645728807, "grad_norm": 1.0057041227395545, "learning_rate": 2.95862602491698e-06, "loss": 0.177, "step": 900 }, { "epoch": 0.4696683390890728, "grad_norm": 1.0330314786302779, "learning_rate": 2.9543878636753514e-06, "loss": 0.2002, "step": 901 }, { "epoch": 0.4701896136052649, "grad_norm": 1.0126061115045026, "learning_rate": 2.950148351513771e-06, "loss": 0.207, "step": 902 }, { "epoch": 0.47071088812145695, "grad_norm": 1.012115005303386, "learning_rate": 2.9459075010365406e-06, "loss": 0.1902, "step": 903 }, { "epoch": 0.47123216263764905, "grad_norm": 1.0790139527402576, "learning_rate": 2.9416653248519404e-06, "loss": 0.1977, "step": 904 }, { "epoch": 0.47175343715384116, "grad_norm": 0.9547667716226275, "learning_rate": 2.9374218355721925e-06, "loss": 0.1864, "step": 905 }, { "epoch": 0.4722747116700332, "grad_norm": 0.9569343156592153, "learning_rate": 2.933177045813421e-06, "loss": 0.1872, "step": 906 }, { "epoch": 0.4727959861862253, "grad_norm": 1.0609203952713406, "learning_rate": 2.9289309681956194e-06, "loss": 0.2047, "step": 907 }, { "epoch": 0.4733172607024174, "grad_norm": 1.0431365337991538, "learning_rate": 2.924683615342607e-06, "loss": 0.1969, "step": 908 }, { "epoch": 0.4738385352186095, "grad_norm": 1.0527870765085736, "learning_rate": 2.920434999881998e-06, "loss": 0.2129, "step": 909 }, { "epoch": 0.4743598097348016, "grad_norm": 0.9369500309949982, "learning_rate": 2.9161851344451563e-06, "loss": 0.1817, "step": 910 }, { "epoch": 0.4748810842509937, "grad_norm": 0.9738920669778763, "learning_rate": 2.9119340316671663e-06, "loss": 0.1979, "step": 911 }, { "epoch": 0.4754023587671858, "grad_norm": 0.9329665064906625, "learning_rate": 2.9076817041867863e-06, "loss": 0.1922, "step": 912 }, { "epoch": 0.47592363328337783, "grad_norm": 0.9597372603106097, "learning_rate": 2.9034281646464197e-06, "loss": 0.1732, "step": 913 }, { "epoch": 0.47644490779956994, "grad_norm": 1.0198109368789043, "learning_rate": 2.8991734256920723e-06, "loss": 0.2053, "step": 914 }, { "epoch": 0.47696618231576204, "grad_norm": 0.950279779039853, "learning_rate": 2.894917499973315e-06, "loss": 0.1949, "step": 915 }, { "epoch": 0.47748745683195415, "grad_norm": 1.061855943908999, "learning_rate": 2.890660400143248e-06, "loss": 0.1928, "step": 916 }, { "epoch": 0.4780087313481462, "grad_norm": 1.094038115550398, "learning_rate": 2.8864021388584606e-06, "loss": 0.2108, "step": 917 }, { "epoch": 0.4785300058643383, "grad_norm": 0.962986987019024, "learning_rate": 2.882142728778997e-06, "loss": 0.2022, "step": 918 }, { "epoch": 0.4790512803805304, "grad_norm": 0.9886755913465457, "learning_rate": 2.877882182568317e-06, "loss": 0.1911, "step": 919 }, { "epoch": 0.4795725548967225, "grad_norm": 0.9420184069269022, "learning_rate": 2.873620512893257e-06, "loss": 0.1821, "step": 920 }, { "epoch": 0.48009382941291456, "grad_norm": 0.9907105831800497, "learning_rate": 2.8693577324239925e-06, "loss": 0.1901, "step": 921 }, { "epoch": 0.48061510392910667, "grad_norm": 1.1281804493203011, "learning_rate": 2.865093853834004e-06, "loss": 0.1941, "step": 922 }, { "epoch": 0.4811363784452988, "grad_norm": 1.0324726305012064, "learning_rate": 2.8608288898000356e-06, "loss": 0.2062, "step": 923 }, { "epoch": 0.4816576529614908, "grad_norm": 0.9998082294529183, "learning_rate": 2.8565628530020584e-06, "loss": 0.1969, "step": 924 }, { "epoch": 0.48217892747768293, "grad_norm": 0.9410194841828444, "learning_rate": 2.8522957561232323e-06, "loss": 0.1889, "step": 925 }, { "epoch": 0.48270020199387503, "grad_norm": 0.9981084233938012, "learning_rate": 2.84802761184987e-06, "loss": 0.1923, "step": 926 }, { "epoch": 0.48322147651006714, "grad_norm": 1.009744428574062, "learning_rate": 2.8437584328713976e-06, "loss": 0.2002, "step": 927 }, { "epoch": 0.4837427510262592, "grad_norm": 1.0191370091693872, "learning_rate": 2.8394882318803174e-06, "loss": 0.1917, "step": 928 }, { "epoch": 0.4842640255424513, "grad_norm": 0.9907623471199559, "learning_rate": 2.835217021572171e-06, "loss": 0.1932, "step": 929 }, { "epoch": 0.4847853000586434, "grad_norm": 0.9493877146289591, "learning_rate": 2.8309448146454993e-06, "loss": 0.1861, "step": 930 }, { "epoch": 0.48530657457483545, "grad_norm": 0.9851626213583532, "learning_rate": 2.8266716238018065e-06, "loss": 0.1877, "step": 931 }, { "epoch": 0.48582784909102755, "grad_norm": 1.0361940174011144, "learning_rate": 2.822397461745524e-06, "loss": 0.2025, "step": 932 }, { "epoch": 0.48634912360721966, "grad_norm": 1.0421742004935162, "learning_rate": 2.8181223411839686e-06, "loss": 0.2009, "step": 933 }, { "epoch": 0.48687039812341176, "grad_norm": 1.0040799179404327, "learning_rate": 2.8138462748273072e-06, "loss": 0.1994, "step": 934 }, { "epoch": 0.4873916726396038, "grad_norm": 0.9213913695963329, "learning_rate": 2.8095692753885177e-06, "loss": 0.1752, "step": 935 }, { "epoch": 0.4879129471557959, "grad_norm": 1.1012263545067054, "learning_rate": 2.805291355583355e-06, "loss": 0.2082, "step": 936 }, { "epoch": 0.488434221671988, "grad_norm": 1.0309678892259206, "learning_rate": 2.8010125281303076e-06, "loss": 0.1888, "step": 937 }, { "epoch": 0.4889554961881801, "grad_norm": 0.9436212370983429, "learning_rate": 2.7967328057505637e-06, "loss": 0.1876, "step": 938 }, { "epoch": 0.4894767707043722, "grad_norm": 1.0588743350687662, "learning_rate": 2.792452201167971e-06, "loss": 0.194, "step": 939 }, { "epoch": 0.4899980452205643, "grad_norm": 1.0231800414996428, "learning_rate": 2.7881707271090018e-06, "loss": 0.2057, "step": 940 }, { "epoch": 0.4905193197367564, "grad_norm": 1.0270862744387954, "learning_rate": 2.7838883963027118e-06, "loss": 0.2023, "step": 941 }, { "epoch": 0.49104059425294844, "grad_norm": 1.0566276299374076, "learning_rate": 2.779605221480706e-06, "loss": 0.2014, "step": 942 }, { "epoch": 0.49156186876914054, "grad_norm": 1.0569768214284592, "learning_rate": 2.7753212153770947e-06, "loss": 0.1861, "step": 943 }, { "epoch": 0.49208314328533265, "grad_norm": 0.9787126303296814, "learning_rate": 2.7710363907284643e-06, "loss": 0.1863, "step": 944 }, { "epoch": 0.49260441780152475, "grad_norm": 0.9767411490508309, "learning_rate": 2.766750760273831e-06, "loss": 0.188, "step": 945 }, { "epoch": 0.4931256923177168, "grad_norm": 1.0939658351889692, "learning_rate": 2.76246433675461e-06, "loss": 0.1951, "step": 946 }, { "epoch": 0.4936469668339089, "grad_norm": 1.172523118347166, "learning_rate": 2.7581771329145713e-06, "loss": 0.203, "step": 947 }, { "epoch": 0.494168241350101, "grad_norm": 1.0061442496305424, "learning_rate": 2.7538891614998074e-06, "loss": 0.2, "step": 948 }, { "epoch": 0.49468951586629306, "grad_norm": 1.041944208690642, "learning_rate": 2.749600435258691e-06, "loss": 0.197, "step": 949 }, { "epoch": 0.49521079038248517, "grad_norm": 1.0130983804244988, "learning_rate": 2.745310966941839e-06, "loss": 0.1904, "step": 950 }, { "epoch": 0.49573206489867727, "grad_norm": 1.0351990108914926, "learning_rate": 2.741020769302077e-06, "loss": 0.1853, "step": 951 }, { "epoch": 0.4962533394148694, "grad_norm": 1.0210238359665258, "learning_rate": 2.7367298550943954e-06, "loss": 0.1875, "step": 952 }, { "epoch": 0.4967746139310614, "grad_norm": 0.9970390003149875, "learning_rate": 2.7324382370759174e-06, "loss": 0.1855, "step": 953 }, { "epoch": 0.49729588844725353, "grad_norm": 1.0879513566840098, "learning_rate": 2.7281459280058563e-06, "loss": 0.2108, "step": 954 }, { "epoch": 0.49781716296344564, "grad_norm": 1.1430957619253195, "learning_rate": 2.7238529406454834e-06, "loss": 0.1819, "step": 955 }, { "epoch": 0.49833843747963774, "grad_norm": 0.9771932290793619, "learning_rate": 2.719559287758085e-06, "loss": 0.1796, "step": 956 }, { "epoch": 0.4988597119958298, "grad_norm": 1.0436141215860535, "learning_rate": 2.7152649821089245e-06, "loss": 0.1943, "step": 957 }, { "epoch": 0.4993809865120219, "grad_norm": 0.9892708407682, "learning_rate": 2.7109700364652075e-06, "loss": 0.1905, "step": 958 }, { "epoch": 0.499902261028214, "grad_norm": 0.9916434111409589, "learning_rate": 2.7066744635960423e-06, "loss": 0.1858, "step": 959 }, { "epoch": 0.5004235355444061, "grad_norm": 0.9748778629471573, "learning_rate": 2.702378276272402e-06, "loss": 0.2101, "step": 960 }, { "epoch": 0.5009448100605982, "grad_norm": 1.0520038494784436, "learning_rate": 2.6980814872670863e-06, "loss": 0.1945, "step": 961 }, { "epoch": 0.5014660845767902, "grad_norm": 1.019315172372774, "learning_rate": 2.6937841093546842e-06, "loss": 0.1931, "step": 962 }, { "epoch": 0.5019873590929823, "grad_norm": 1.0290239491636743, "learning_rate": 2.6894861553115337e-06, "loss": 0.2038, "step": 963 }, { "epoch": 0.5025086336091744, "grad_norm": 0.9753619324088938, "learning_rate": 2.6851876379156884e-06, "loss": 0.1858, "step": 964 }, { "epoch": 0.5030299081253665, "grad_norm": 1.073883143404841, "learning_rate": 2.680888569946874e-06, "loss": 0.2225, "step": 965 }, { "epoch": 0.5035511826415586, "grad_norm": 0.9492011107722095, "learning_rate": 2.6765889641864562e-06, "loss": 0.1881, "step": 966 }, { "epoch": 0.5040724571577507, "grad_norm": 0.9673199856485143, "learning_rate": 2.6722888334173974e-06, "loss": 0.1936, "step": 967 }, { "epoch": 0.5045937316739428, "grad_norm": 1.0076521636183045, "learning_rate": 2.6679881904242198e-06, "loss": 0.2029, "step": 968 }, { "epoch": 0.5051150061901348, "grad_norm": 0.9686161019023214, "learning_rate": 2.663687047992972e-06, "loss": 0.1841, "step": 969 }, { "epoch": 0.5056362807063269, "grad_norm": 1.0250906895196519, "learning_rate": 2.6593854189111857e-06, "loss": 0.1981, "step": 970 }, { "epoch": 0.506157555222519, "grad_norm": 0.9616510423370317, "learning_rate": 2.655083315967838e-06, "loss": 0.1869, "step": 971 }, { "epoch": 0.5066788297387111, "grad_norm": 0.9930176109805637, "learning_rate": 2.650780751953316e-06, "loss": 0.1873, "step": 972 }, { "epoch": 0.5072001042549033, "grad_norm": 0.9967805328060277, "learning_rate": 2.646477739659378e-06, "loss": 0.1952, "step": 973 }, { "epoch": 0.5077213787710954, "grad_norm": 0.9871931304412768, "learning_rate": 2.6421742918791155e-06, "loss": 0.1983, "step": 974 }, { "epoch": 0.5082426532872875, "grad_norm": 1.0323661862589764, "learning_rate": 2.6378704214069133e-06, "loss": 0.1804, "step": 975 }, { "epoch": 0.5087639278034795, "grad_norm": 1.0229802891072548, "learning_rate": 2.633566141038413e-06, "loss": 0.2017, "step": 976 }, { "epoch": 0.5092852023196716, "grad_norm": 1.0240567212255771, "learning_rate": 2.629261463570476e-06, "loss": 0.1983, "step": 977 }, { "epoch": 0.5098064768358637, "grad_norm": 1.0186229069169201, "learning_rate": 2.6249564018011437e-06, "loss": 0.1878, "step": 978 }, { "epoch": 0.5103277513520558, "grad_norm": 0.9399329028559811, "learning_rate": 2.6206509685296e-06, "loss": 0.1957, "step": 979 }, { "epoch": 0.5108490258682479, "grad_norm": 1.0070801784398393, "learning_rate": 2.6163451765561324e-06, "loss": 0.1963, "step": 980 }, { "epoch": 0.51137030038444, "grad_norm": 0.9756463630914503, "learning_rate": 2.6120390386820975e-06, "loss": 0.1999, "step": 981 }, { "epoch": 0.5118915749006321, "grad_norm": 1.0959003656589381, "learning_rate": 2.607732567709877e-06, "loss": 0.1931, "step": 982 }, { "epoch": 0.5124128494168242, "grad_norm": 1.0262562081196058, "learning_rate": 2.6034257764428456e-06, "loss": 0.1997, "step": 983 }, { "epoch": 0.5129341239330162, "grad_norm": 0.9511566278900171, "learning_rate": 2.5991186776853277e-06, "loss": 0.1849, "step": 984 }, { "epoch": 0.5134553984492083, "grad_norm": 0.958443517567957, "learning_rate": 2.594811284242565e-06, "loss": 0.1783, "step": 985 }, { "epoch": 0.5139766729654004, "grad_norm": 0.9842691568175002, "learning_rate": 2.590503608920672e-06, "loss": 0.1944, "step": 986 }, { "epoch": 0.5144979474815925, "grad_norm": 1.0055481610107908, "learning_rate": 2.5861956645266036e-06, "loss": 0.1949, "step": 987 }, { "epoch": 0.5150192219977846, "grad_norm": 0.9527635866518458, "learning_rate": 2.581887463868114e-06, "loss": 0.1887, "step": 988 }, { "epoch": 0.5155404965139767, "grad_norm": 1.025332524858524, "learning_rate": 2.57757901975372e-06, "loss": 0.1996, "step": 989 }, { "epoch": 0.5160617710301688, "grad_norm": 1.0680557620822233, "learning_rate": 2.5732703449926595e-06, "loss": 0.2055, "step": 990 }, { "epoch": 0.5165830455463608, "grad_norm": 1.0380369075475906, "learning_rate": 2.568961452394859e-06, "loss": 0.1906, "step": 991 }, { "epoch": 0.5171043200625529, "grad_norm": 0.9456688282630512, "learning_rate": 2.564652354770892e-06, "loss": 0.1865, "step": 992 }, { "epoch": 0.517625594578745, "grad_norm": 1.0840975960140231, "learning_rate": 2.560343064931941e-06, "loss": 0.2152, "step": 993 }, { "epoch": 0.5181468690949371, "grad_norm": 0.9754317560093736, "learning_rate": 2.5560335956897603e-06, "loss": 0.1862, "step": 994 }, { "epoch": 0.5186681436111292, "grad_norm": 0.975666731751929, "learning_rate": 2.551723959856637e-06, "loss": 0.1889, "step": 995 }, { "epoch": 0.5191894181273213, "grad_norm": 0.9498987479759115, "learning_rate": 2.5474141702453536e-06, "loss": 0.1894, "step": 996 }, { "epoch": 0.5197106926435134, "grad_norm": 1.0287537604691805, "learning_rate": 2.543104239669152e-06, "loss": 0.2087, "step": 997 }, { "epoch": 0.5202319671597054, "grad_norm": 0.9942554093772231, "learning_rate": 2.5387941809416895e-06, "loss": 0.1795, "step": 998 }, { "epoch": 0.5207532416758975, "grad_norm": 1.0290989991698256, "learning_rate": 2.5344840068770076e-06, "loss": 0.1873, "step": 999 }, { "epoch": 0.5212745161920896, "grad_norm": 1.062500241588878, "learning_rate": 2.530173730289488e-06, "loss": 0.1946, "step": 1000 }, { "epoch": 0.5212745161920896, "eval_loss": 0.19480524957180023, "eval_runtime": 52.2734, "eval_samples_per_second": 23.741, "eval_steps_per_second": 2.984, "step": 1000 }, { "epoch": 0.5217957907082817, "grad_norm": 1.0383174062236034, "learning_rate": 2.5258633639938195e-06, "loss": 0.1995, "step": 1001 }, { "epoch": 0.5223170652244739, "grad_norm": 1.0619346174205255, "learning_rate": 2.521552920804956e-06, "loss": 0.2085, "step": 1002 }, { "epoch": 0.522838339740666, "grad_norm": 1.1587513971294219, "learning_rate": 2.5172424135380817e-06, "loss": 0.2047, "step": 1003 }, { "epoch": 0.5233596142568581, "grad_norm": 1.0254346556986893, "learning_rate": 2.51293185500857e-06, "loss": 0.1905, "step": 1004 }, { "epoch": 0.5238808887730501, "grad_norm": 1.007732855163628, "learning_rate": 2.5086212580319457e-06, "loss": 0.1995, "step": 1005 }, { "epoch": 0.5244021632892422, "grad_norm": 1.0065215395102691, "learning_rate": 2.50431063542385e-06, "loss": 0.1931, "step": 1006 }, { "epoch": 0.5249234378054343, "grad_norm": 1.1028563144125332, "learning_rate": 2.5e-06, "loss": 0.2001, "step": 1007 }, { "epoch": 0.5254447123216264, "grad_norm": 1.0547991596381279, "learning_rate": 2.49568936457615e-06, "loss": 0.197, "step": 1008 }, { "epoch": 0.5259659868378185, "grad_norm": 1.0501645268059348, "learning_rate": 2.491378741968055e-06, "loss": 0.1898, "step": 1009 }, { "epoch": 0.5264872613540106, "grad_norm": 1.0453727558531773, "learning_rate": 2.487068144991431e-06, "loss": 0.1935, "step": 1010 }, { "epoch": 0.5270085358702027, "grad_norm": 0.9784268100081941, "learning_rate": 2.4827575864619183e-06, "loss": 0.2068, "step": 1011 }, { "epoch": 0.5275298103863947, "grad_norm": 0.9489702371591282, "learning_rate": 2.4784470791950442e-06, "loss": 0.1952, "step": 1012 }, { "epoch": 0.5280510849025868, "grad_norm": 1.066317094816582, "learning_rate": 2.4741366360061813e-06, "loss": 0.1911, "step": 1013 }, { "epoch": 0.5285723594187789, "grad_norm": 1.0310100518300547, "learning_rate": 2.4698262697105128e-06, "loss": 0.1986, "step": 1014 }, { "epoch": 0.529093633934971, "grad_norm": 0.983376330407415, "learning_rate": 2.4655159931229932e-06, "loss": 0.1794, "step": 1015 }, { "epoch": 0.5296149084511631, "grad_norm": 0.9973472750235451, "learning_rate": 2.461205819058311e-06, "loss": 0.2025, "step": 1016 }, { "epoch": 0.5301361829673552, "grad_norm": 0.9963200987254586, "learning_rate": 2.4568957603308494e-06, "loss": 0.1891, "step": 1017 }, { "epoch": 0.5306574574835473, "grad_norm": 1.03843991512085, "learning_rate": 2.452585829754647e-06, "loss": 0.1957, "step": 1018 }, { "epoch": 0.5311787319997394, "grad_norm": 1.0284403625647769, "learning_rate": 2.448276040143364e-06, "loss": 0.212, "step": 1019 }, { "epoch": 0.5317000065159314, "grad_norm": 1.0375123811085925, "learning_rate": 2.4439664043102414e-06, "loss": 0.2078, "step": 1020 }, { "epoch": 0.5322212810321235, "grad_norm": 1.0154917946726163, "learning_rate": 2.43965693506806e-06, "loss": 0.1905, "step": 1021 }, { "epoch": 0.5327425555483156, "grad_norm": 1.0771383672005177, "learning_rate": 2.4353476452291086e-06, "loss": 0.1882, "step": 1022 }, { "epoch": 0.5332638300645077, "grad_norm": 1.0576240219047033, "learning_rate": 2.431038547605142e-06, "loss": 0.1753, "step": 1023 }, { "epoch": 0.5337851045806998, "grad_norm": 1.0111187689876087, "learning_rate": 2.4267296550073413e-06, "loss": 0.1985, "step": 1024 }, { "epoch": 0.5343063790968919, "grad_norm": 1.0250830102181434, "learning_rate": 2.4224209802462818e-06, "loss": 0.1889, "step": 1025 }, { "epoch": 0.534827653613084, "grad_norm": 1.0333860347256845, "learning_rate": 2.4181125361318868e-06, "loss": 0.1883, "step": 1026 }, { "epoch": 0.535348928129276, "grad_norm": 1.052906861325362, "learning_rate": 2.413804335473397e-06, "loss": 0.1869, "step": 1027 }, { "epoch": 0.5358702026454681, "grad_norm": 1.0145844359267704, "learning_rate": 2.409496391079329e-06, "loss": 0.1856, "step": 1028 }, { "epoch": 0.5363914771616602, "grad_norm": 1.0461114420942073, "learning_rate": 2.4051887157574356e-06, "loss": 0.1971, "step": 1029 }, { "epoch": 0.5369127516778524, "grad_norm": 0.975138981928165, "learning_rate": 2.4008813223146723e-06, "loss": 0.1774, "step": 1030 }, { "epoch": 0.5374340261940445, "grad_norm": 1.079976404735243, "learning_rate": 2.3965742235571557e-06, "loss": 0.2078, "step": 1031 }, { "epoch": 0.5379553007102366, "grad_norm": 1.0856031527562608, "learning_rate": 2.3922674322901236e-06, "loss": 0.2026, "step": 1032 }, { "epoch": 0.5384765752264287, "grad_norm": 1.0705003957337433, "learning_rate": 2.387960961317903e-06, "loss": 0.2027, "step": 1033 }, { "epoch": 0.5389978497426207, "grad_norm": 1.0248865414814123, "learning_rate": 2.383654823443868e-06, "loss": 0.1992, "step": 1034 }, { "epoch": 0.5395191242588128, "grad_norm": 1.0290121022581171, "learning_rate": 2.3793490314704005e-06, "loss": 0.1914, "step": 1035 }, { "epoch": 0.5400403987750049, "grad_norm": 1.1076737668760843, "learning_rate": 2.3750435981988576e-06, "loss": 0.2115, "step": 1036 }, { "epoch": 0.540561673291197, "grad_norm": 1.0214795198933087, "learning_rate": 2.3707385364295245e-06, "loss": 0.2009, "step": 1037 }, { "epoch": 0.5410829478073891, "grad_norm": 0.9830881085492219, "learning_rate": 2.366433858961587e-06, "loss": 0.195, "step": 1038 }, { "epoch": 0.5416042223235812, "grad_norm": 1.0182317451712537, "learning_rate": 2.362129578593088e-06, "loss": 0.1926, "step": 1039 }, { "epoch": 0.5421254968397733, "grad_norm": 1.0927879125581443, "learning_rate": 2.3578257081208853e-06, "loss": 0.1947, "step": 1040 }, { "epoch": 0.5426467713559653, "grad_norm": 1.1605323377220922, "learning_rate": 2.3535222603406223e-06, "loss": 0.2107, "step": 1041 }, { "epoch": 0.5431680458721574, "grad_norm": 0.994353490468453, "learning_rate": 2.3492192480466845e-06, "loss": 0.1857, "step": 1042 }, { "epoch": 0.5436893203883495, "grad_norm": 0.9673106256287484, "learning_rate": 2.344916684032163e-06, "loss": 0.1784, "step": 1043 }, { "epoch": 0.5442105949045416, "grad_norm": 1.0775673476430663, "learning_rate": 2.3406145810888143e-06, "loss": 0.1993, "step": 1044 }, { "epoch": 0.5447318694207337, "grad_norm": 1.0071909867214548, "learning_rate": 2.3363129520070286e-06, "loss": 0.1858, "step": 1045 }, { "epoch": 0.5452531439369258, "grad_norm": 1.0486545569572066, "learning_rate": 2.3320118095757806e-06, "loss": 0.2105, "step": 1046 }, { "epoch": 0.5457744184531179, "grad_norm": 1.081028929707012, "learning_rate": 2.327711166582604e-06, "loss": 0.1985, "step": 1047 }, { "epoch": 0.5462956929693099, "grad_norm": 1.037851463032111, "learning_rate": 2.3234110358135446e-06, "loss": 0.1802, "step": 1048 }, { "epoch": 0.546816967485502, "grad_norm": 0.9955744873005768, "learning_rate": 2.319111430053126e-06, "loss": 0.19, "step": 1049 }, { "epoch": 0.5473382420016941, "grad_norm": 0.9625839485821711, "learning_rate": 2.3148123620843132e-06, "loss": 0.1903, "step": 1050 }, { "epoch": 0.5478595165178862, "grad_norm": 0.98467636090744, "learning_rate": 2.310513844688467e-06, "loss": 0.1858, "step": 1051 }, { "epoch": 0.5483807910340783, "grad_norm": 1.0085351128724585, "learning_rate": 2.306215890645316e-06, "loss": 0.1864, "step": 1052 }, { "epoch": 0.5489020655502704, "grad_norm": 0.997147524038755, "learning_rate": 2.3019185127329145e-06, "loss": 0.1871, "step": 1053 }, { "epoch": 0.5494233400664625, "grad_norm": 1.0495981467979099, "learning_rate": 2.2976217237275983e-06, "loss": 0.1896, "step": 1054 }, { "epoch": 0.5499446145826546, "grad_norm": 0.9845529574139438, "learning_rate": 2.293325536403958e-06, "loss": 0.2015, "step": 1055 }, { "epoch": 0.5504658890988466, "grad_norm": 1.052070335880451, "learning_rate": 2.2890299635347933e-06, "loss": 0.1978, "step": 1056 }, { "epoch": 0.5509871636150387, "grad_norm": 1.0192406532778389, "learning_rate": 2.2847350178910763e-06, "loss": 0.199, "step": 1057 }, { "epoch": 0.5515084381312308, "grad_norm": 0.9255815167749964, "learning_rate": 2.2804407122419165e-06, "loss": 0.1743, "step": 1058 }, { "epoch": 0.552029712647423, "grad_norm": 0.9425920036365858, "learning_rate": 2.276147059354517e-06, "loss": 0.1915, "step": 1059 }, { "epoch": 0.5525509871636151, "grad_norm": 0.9756852726978495, "learning_rate": 2.271854071994144e-06, "loss": 0.1851, "step": 1060 }, { "epoch": 0.5530722616798072, "grad_norm": 1.0319509062414018, "learning_rate": 2.2675617629240842e-06, "loss": 0.2026, "step": 1061 }, { "epoch": 0.5535935361959993, "grad_norm": 1.0469736880808636, "learning_rate": 2.2632701449056054e-06, "loss": 0.1985, "step": 1062 }, { "epoch": 0.5541148107121913, "grad_norm": 1.0154988381567833, "learning_rate": 2.258979230697923e-06, "loss": 0.184, "step": 1063 }, { "epoch": 0.5546360852283834, "grad_norm": 1.0706994099193394, "learning_rate": 2.2546890330581616e-06, "loss": 0.1861, "step": 1064 }, { "epoch": 0.5551573597445755, "grad_norm": 1.0465560677474248, "learning_rate": 2.25039956474131e-06, "loss": 0.1878, "step": 1065 }, { "epoch": 0.5556786342607676, "grad_norm": 1.029253640779406, "learning_rate": 2.246110838500194e-06, "loss": 0.1914, "step": 1066 }, { "epoch": 0.5561999087769597, "grad_norm": 1.0753625496287706, "learning_rate": 2.241822867085429e-06, "loss": 0.1935, "step": 1067 }, { "epoch": 0.5567211832931518, "grad_norm": 1.0894709655635972, "learning_rate": 2.2375356632453906e-06, "loss": 0.1893, "step": 1068 }, { "epoch": 0.5572424578093439, "grad_norm": 1.1026529187740728, "learning_rate": 2.2332492397261695e-06, "loss": 0.1951, "step": 1069 }, { "epoch": 0.5577637323255359, "grad_norm": 0.9641199594525882, "learning_rate": 2.2289636092715365e-06, "loss": 0.1834, "step": 1070 }, { "epoch": 0.558285006841728, "grad_norm": 1.0402753633451525, "learning_rate": 2.2246787846229057e-06, "loss": 0.1834, "step": 1071 }, { "epoch": 0.5588062813579201, "grad_norm": 1.081780488844143, "learning_rate": 2.2203947785192954e-06, "loss": 0.1878, "step": 1072 }, { "epoch": 0.5593275558741122, "grad_norm": 1.0234654215462535, "learning_rate": 2.2161116036972886e-06, "loss": 0.1943, "step": 1073 }, { "epoch": 0.5598488303903043, "grad_norm": 1.0411803692422346, "learning_rate": 2.2118292728909986e-06, "loss": 0.2069, "step": 1074 }, { "epoch": 0.5603701049064964, "grad_norm": 0.9713387589599493, "learning_rate": 2.2075477988320295e-06, "loss": 0.1849, "step": 1075 }, { "epoch": 0.5608913794226885, "grad_norm": 1.0246645842699202, "learning_rate": 2.2032671942494367e-06, "loss": 0.1844, "step": 1076 }, { "epoch": 0.5614126539388805, "grad_norm": 0.9611141873928662, "learning_rate": 2.1989874718696936e-06, "loss": 0.1926, "step": 1077 }, { "epoch": 0.5619339284550726, "grad_norm": 1.0452694949791048, "learning_rate": 2.194708644416646e-06, "loss": 0.1971, "step": 1078 }, { "epoch": 0.5624552029712647, "grad_norm": 0.96241502887155, "learning_rate": 2.1904307246114827e-06, "loss": 0.1917, "step": 1079 }, { "epoch": 0.5629764774874568, "grad_norm": 1.0410217584016626, "learning_rate": 2.1861537251726944e-06, "loss": 0.2003, "step": 1080 }, { "epoch": 0.5634977520036489, "grad_norm": 0.9990407192557624, "learning_rate": 2.1818776588160323e-06, "loss": 0.1872, "step": 1081 }, { "epoch": 0.564019026519841, "grad_norm": 1.0543177739241163, "learning_rate": 2.1776025382544765e-06, "loss": 0.2082, "step": 1082 }, { "epoch": 0.5645403010360331, "grad_norm": 1.0179741092006622, "learning_rate": 2.173328376198194e-06, "loss": 0.1894, "step": 1083 }, { "epoch": 0.5650615755522251, "grad_norm": 1.1269521168293783, "learning_rate": 2.1690551853545016e-06, "loss": 0.2155, "step": 1084 }, { "epoch": 0.5655828500684172, "grad_norm": 0.9797289046446224, "learning_rate": 2.1647829784278294e-06, "loss": 0.1908, "step": 1085 }, { "epoch": 0.5661041245846093, "grad_norm": 1.0049914572735903, "learning_rate": 2.1605117681196834e-06, "loss": 0.1878, "step": 1086 }, { "epoch": 0.5666253991008015, "grad_norm": 1.0936325945307788, "learning_rate": 2.1562415671286032e-06, "loss": 0.2054, "step": 1087 }, { "epoch": 0.5671466736169936, "grad_norm": 1.0392537049431012, "learning_rate": 2.151972388150131e-06, "loss": 0.2041, "step": 1088 }, { "epoch": 0.5676679481331857, "grad_norm": 1.0644432398734358, "learning_rate": 2.1477042438767685e-06, "loss": 0.2039, "step": 1089 }, { "epoch": 0.5681892226493778, "grad_norm": 0.9914071825710259, "learning_rate": 2.1434371469979424e-06, "loss": 0.1968, "step": 1090 }, { "epoch": 0.5687104971655699, "grad_norm": 1.0280714916865434, "learning_rate": 2.1391711101999656e-06, "loss": 0.2044, "step": 1091 }, { "epoch": 0.5692317716817619, "grad_norm": 1.015323078566454, "learning_rate": 2.1349061461659966e-06, "loss": 0.1994, "step": 1092 }, { "epoch": 0.569753046197954, "grad_norm": 1.0029348080456495, "learning_rate": 2.130642267576008e-06, "loss": 0.1848, "step": 1093 }, { "epoch": 0.5702743207141461, "grad_norm": 1.0256878184254634, "learning_rate": 2.1263794871067443e-06, "loss": 0.1999, "step": 1094 }, { "epoch": 0.5707955952303382, "grad_norm": 0.8964762595915654, "learning_rate": 2.1221178174316833e-06, "loss": 0.1693, "step": 1095 }, { "epoch": 0.5713168697465303, "grad_norm": 0.9850586416445359, "learning_rate": 2.117857271221003e-06, "loss": 0.1859, "step": 1096 }, { "epoch": 0.5718381442627224, "grad_norm": 1.052259025949159, "learning_rate": 2.11359786114154e-06, "loss": 0.1982, "step": 1097 }, { "epoch": 0.5723594187789145, "grad_norm": 0.9277809484196436, "learning_rate": 2.1093395998567527e-06, "loss": 0.1798, "step": 1098 }, { "epoch": 0.5728806932951065, "grad_norm": 0.983532854304064, "learning_rate": 2.1050825000266862e-06, "loss": 0.1765, "step": 1099 }, { "epoch": 0.5734019678112986, "grad_norm": 0.9631298651135378, "learning_rate": 2.1008265743079286e-06, "loss": 0.1958, "step": 1100 }, { "epoch": 0.5739232423274907, "grad_norm": 1.1033429405561814, "learning_rate": 2.0965718353535807e-06, "loss": 0.2055, "step": 1101 }, { "epoch": 0.5744445168436828, "grad_norm": 1.112349427593408, "learning_rate": 2.0923182958132146e-06, "loss": 0.2047, "step": 1102 }, { "epoch": 0.5749657913598749, "grad_norm": 0.9959652770009602, "learning_rate": 2.0880659683328346e-06, "loss": 0.187, "step": 1103 }, { "epoch": 0.575487065876067, "grad_norm": 1.0564422259924786, "learning_rate": 2.0838148655548433e-06, "loss": 0.2004, "step": 1104 }, { "epoch": 0.5760083403922591, "grad_norm": 1.0130265251843205, "learning_rate": 2.0795650001180028e-06, "loss": 0.191, "step": 1105 }, { "epoch": 0.5765296149084511, "grad_norm": 0.9988574601298932, "learning_rate": 2.0753163846573933e-06, "loss": 0.1997, "step": 1106 }, { "epoch": 0.5770508894246432, "grad_norm": 1.0298772663531115, "learning_rate": 2.0710690318043814e-06, "loss": 0.1913, "step": 1107 }, { "epoch": 0.5775721639408353, "grad_norm": 0.9523722397681231, "learning_rate": 2.0668229541865796e-06, "loss": 0.1857, "step": 1108 }, { "epoch": 0.5780934384570274, "grad_norm": 0.9797070321569168, "learning_rate": 2.0625781644278083e-06, "loss": 0.1943, "step": 1109 }, { "epoch": 0.5786147129732195, "grad_norm": 0.9838905532587987, "learning_rate": 2.058334675148061e-06, "loss": 0.1885, "step": 1110 }, { "epoch": 0.5791359874894116, "grad_norm": 0.9885106191955585, "learning_rate": 2.05409249896346e-06, "loss": 0.186, "step": 1111 }, { "epoch": 0.5796572620056037, "grad_norm": 0.9871469852945931, "learning_rate": 2.049851648486229e-06, "loss": 0.1935, "step": 1112 }, { "epoch": 0.5801785365217957, "grad_norm": 0.9966312159771141, "learning_rate": 2.04561213632465e-06, "loss": 0.1774, "step": 1113 }, { "epoch": 0.5806998110379878, "grad_norm": 1.0016101012425886, "learning_rate": 2.041373975083021e-06, "loss": 0.1978, "step": 1114 }, { "epoch": 0.58122108555418, "grad_norm": 1.0772865636871705, "learning_rate": 2.037137177361628e-06, "loss": 0.2117, "step": 1115 }, { "epoch": 0.581742360070372, "grad_norm": 1.031443556302358, "learning_rate": 2.0329017557567034e-06, "loss": 0.1997, "step": 1116 }, { "epoch": 0.5822636345865642, "grad_norm": 0.9954142013373319, "learning_rate": 2.028667722860386e-06, "loss": 0.195, "step": 1117 }, { "epoch": 0.5827849091027563, "grad_norm": 1.08950454301798, "learning_rate": 2.024435091260687e-06, "loss": 0.1965, "step": 1118 }, { "epoch": 0.5833061836189484, "grad_norm": 1.09820177135685, "learning_rate": 2.0202038735414532e-06, "loss": 0.2055, "step": 1119 }, { "epoch": 0.5838274581351404, "grad_norm": 1.0309431487311989, "learning_rate": 2.0159740822823233e-06, "loss": 0.1887, "step": 1120 }, { "epoch": 0.5843487326513325, "grad_norm": 0.9988968546634137, "learning_rate": 2.0117457300586996e-06, "loss": 0.1761, "step": 1121 }, { "epoch": 0.5848700071675246, "grad_norm": 1.0316307028818723, "learning_rate": 2.007518829441704e-06, "loss": 0.1849, "step": 1122 }, { "epoch": 0.5853912816837167, "grad_norm": 1.083940789434207, "learning_rate": 2.0032933929981425e-06, "loss": 0.1974, "step": 1123 }, { "epoch": 0.5859125561999088, "grad_norm": 1.0055559369202263, "learning_rate": 1.9990694332904705e-06, "loss": 0.1903, "step": 1124 }, { "epoch": 0.5864338307161009, "grad_norm": 1.1098092742660377, "learning_rate": 1.9948469628767475e-06, "loss": 0.197, "step": 1125 }, { "epoch": 0.586955105232293, "grad_norm": 0.9883818948839239, "learning_rate": 1.990625994310612e-06, "loss": 0.1956, "step": 1126 }, { "epoch": 0.5874763797484851, "grad_norm": 0.9848938775649817, "learning_rate": 1.986406540141234e-06, "loss": 0.1961, "step": 1127 }, { "epoch": 0.5879976542646771, "grad_norm": 1.0458643023845622, "learning_rate": 1.98218861291328e-06, "loss": 0.2008, "step": 1128 }, { "epoch": 0.5885189287808692, "grad_norm": 1.007080661415992, "learning_rate": 1.977972225166878e-06, "loss": 0.1744, "step": 1129 }, { "epoch": 0.5890402032970613, "grad_norm": 0.9341279253901913, "learning_rate": 1.973757389437581e-06, "loss": 0.1786, "step": 1130 }, { "epoch": 0.5895614778132534, "grad_norm": 1.0046180442150527, "learning_rate": 1.9695441182563237e-06, "loss": 0.201, "step": 1131 }, { "epoch": 0.5900827523294455, "grad_norm": 1.0791793317015532, "learning_rate": 1.965332424149394e-06, "loss": 0.1978, "step": 1132 }, { "epoch": 0.5906040268456376, "grad_norm": 0.9524534239736143, "learning_rate": 1.9611223196383866e-06, "loss": 0.1875, "step": 1133 }, { "epoch": 0.5911253013618297, "grad_norm": 0.9955184698085288, "learning_rate": 1.956913817240173e-06, "loss": 0.199, "step": 1134 }, { "epoch": 0.5916465758780217, "grad_norm": 0.9558706784940049, "learning_rate": 1.9527069294668617e-06, "loss": 0.1784, "step": 1135 }, { "epoch": 0.5921678503942138, "grad_norm": 0.9597385949296013, "learning_rate": 1.9485016688257578e-06, "loss": 0.1849, "step": 1136 }, { "epoch": 0.5926891249104059, "grad_norm": 1.0197141801144376, "learning_rate": 1.9442980478193332e-06, "loss": 0.1965, "step": 1137 }, { "epoch": 0.593210399426598, "grad_norm": 1.0512916670594386, "learning_rate": 1.9400960789451827e-06, "loss": 0.1944, "step": 1138 }, { "epoch": 0.5937316739427901, "grad_norm": 1.0086948626775847, "learning_rate": 1.935895774695988e-06, "loss": 0.1932, "step": 1139 }, { "epoch": 0.5942529484589822, "grad_norm": 1.008689795216177, "learning_rate": 1.9316971475594835e-06, "loss": 0.187, "step": 1140 }, { "epoch": 0.5947742229751743, "grad_norm": 0.9794699211499784, "learning_rate": 1.9275002100184186e-06, "loss": 0.1755, "step": 1141 }, { "epoch": 0.5952954974913663, "grad_norm": 0.969416416794242, "learning_rate": 1.9233049745505167e-06, "loss": 0.1822, "step": 1142 }, { "epoch": 0.5958167720075584, "grad_norm": 1.081025759730669, "learning_rate": 1.919111453628442e-06, "loss": 0.2018, "step": 1143 }, { "epoch": 0.5963380465237506, "grad_norm": 0.9710783893843369, "learning_rate": 1.914919659719762e-06, "loss": 0.1943, "step": 1144 }, { "epoch": 0.5968593210399427, "grad_norm": 1.0048716650665384, "learning_rate": 1.9107296052869086e-06, "loss": 0.1993, "step": 1145 }, { "epoch": 0.5973805955561348, "grad_norm": 1.0270218106767095, "learning_rate": 1.9065413027871437e-06, "loss": 0.2039, "step": 1146 }, { "epoch": 0.5979018700723269, "grad_norm": 0.9785024434468242, "learning_rate": 1.902354764672518e-06, "loss": 0.1835, "step": 1147 }, { "epoch": 0.598423144588519, "grad_norm": 0.974720285983276, "learning_rate": 1.8981700033898387e-06, "loss": 0.1916, "step": 1148 }, { "epoch": 0.598944419104711, "grad_norm": 0.9702604223153065, "learning_rate": 1.8939870313806302e-06, "loss": 0.1709, "step": 1149 }, { "epoch": 0.5994656936209031, "grad_norm": 0.989097265484285, "learning_rate": 1.8898058610810963e-06, "loss": 0.1976, "step": 1150 }, { "epoch": 0.5999869681370952, "grad_norm": 0.9488843498461761, "learning_rate": 1.8856265049220852e-06, "loss": 0.1763, "step": 1151 }, { "epoch": 0.6005082426532873, "grad_norm": 0.9596820229292397, "learning_rate": 1.8814489753290517e-06, "loss": 0.1905, "step": 1152 }, { "epoch": 0.6010295171694794, "grad_norm": 1.0384689717698112, "learning_rate": 1.8772732847220182e-06, "loss": 0.1805, "step": 1153 }, { "epoch": 0.6015507916856715, "grad_norm": 1.0290105611946982, "learning_rate": 1.873099445515542e-06, "loss": 0.1988, "step": 1154 }, { "epoch": 0.6020720662018636, "grad_norm": 1.025034490780247, "learning_rate": 1.868927470118675e-06, "loss": 0.1873, "step": 1155 }, { "epoch": 0.6025933407180556, "grad_norm": 1.025733485025965, "learning_rate": 1.8647573709349275e-06, "loss": 0.19, "step": 1156 }, { "epoch": 0.6031146152342477, "grad_norm": 0.9609127405778992, "learning_rate": 1.860589160362234e-06, "loss": 0.1855, "step": 1157 }, { "epoch": 0.6036358897504398, "grad_norm": 0.995953626450909, "learning_rate": 1.8564228507929099e-06, "loss": 0.1885, "step": 1158 }, { "epoch": 0.6041571642666319, "grad_norm": 0.9578995736358235, "learning_rate": 1.852258454613623e-06, "loss": 0.1839, "step": 1159 }, { "epoch": 0.604678438782824, "grad_norm": 1.039678404554969, "learning_rate": 1.8480959842053508e-06, "loss": 0.1871, "step": 1160 }, { "epoch": 0.6051997132990161, "grad_norm": 1.0276613326638602, "learning_rate": 1.843935451943344e-06, "loss": 0.1863, "step": 1161 }, { "epoch": 0.6057209878152082, "grad_norm": 1.0166903319135137, "learning_rate": 1.839776870197093e-06, "loss": 0.1895, "step": 1162 }, { "epoch": 0.6062422623314003, "grad_norm": 1.0187451093130595, "learning_rate": 1.8356202513302896e-06, "loss": 0.1851, "step": 1163 }, { "epoch": 0.6067635368475923, "grad_norm": 1.0097296058564171, "learning_rate": 1.831465607700787e-06, "loss": 0.1845, "step": 1164 }, { "epoch": 0.6072848113637844, "grad_norm": 1.0328200061175161, "learning_rate": 1.8273129516605698e-06, "loss": 0.1938, "step": 1165 }, { "epoch": 0.6078060858799765, "grad_norm": 1.052763044966393, "learning_rate": 1.8231622955557094e-06, "loss": 0.1871, "step": 1166 }, { "epoch": 0.6083273603961686, "grad_norm": 0.9365124362744572, "learning_rate": 1.819013651726334e-06, "loss": 0.183, "step": 1167 }, { "epoch": 0.6088486349123607, "grad_norm": 0.9824999481894984, "learning_rate": 1.8148670325065893e-06, "loss": 0.192, "step": 1168 }, { "epoch": 0.6093699094285528, "grad_norm": 1.0403686506787184, "learning_rate": 1.8107224502245997e-06, "loss": 0.1932, "step": 1169 }, { "epoch": 0.609891183944745, "grad_norm": 0.9876137785006327, "learning_rate": 1.806579917202435e-06, "loss": 0.1843, "step": 1170 }, { "epoch": 0.610412458460937, "grad_norm": 1.0298525415804813, "learning_rate": 1.802439445756073e-06, "loss": 0.1998, "step": 1171 }, { "epoch": 0.610933732977129, "grad_norm": 1.0240435196918738, "learning_rate": 1.7983010481953605e-06, "loss": 0.2012, "step": 1172 }, { "epoch": 0.6114550074933212, "grad_norm": 0.9783351242362962, "learning_rate": 1.7941647368239806e-06, "loss": 0.1767, "step": 1173 }, { "epoch": 0.6119762820095133, "grad_norm": 1.075799158466481, "learning_rate": 1.7900305239394134e-06, "loss": 0.1962, "step": 1174 }, { "epoch": 0.6124975565257054, "grad_norm": 1.0272437821538145, "learning_rate": 1.785898421832898e-06, "loss": 0.1991, "step": 1175 }, { "epoch": 0.6130188310418975, "grad_norm": 0.9289145434032438, "learning_rate": 1.7817684427894016e-06, "loss": 0.175, "step": 1176 }, { "epoch": 0.6135401055580896, "grad_norm": 1.0215094809266112, "learning_rate": 1.7776405990875761e-06, "loss": 0.2054, "step": 1177 }, { "epoch": 0.6140613800742816, "grad_norm": 1.006967541928481, "learning_rate": 1.7735149029997273e-06, "loss": 0.1742, "step": 1178 }, { "epoch": 0.6145826545904737, "grad_norm": 0.9941973792423793, "learning_rate": 1.7693913667917757e-06, "loss": 0.1904, "step": 1179 }, { "epoch": 0.6151039291066658, "grad_norm": 1.0478055612749158, "learning_rate": 1.7652700027232184e-06, "loss": 0.19, "step": 1180 }, { "epoch": 0.6156252036228579, "grad_norm": 0.9806980882496268, "learning_rate": 1.7611508230470963e-06, "loss": 0.1918, "step": 1181 }, { "epoch": 0.61614647813905, "grad_norm": 0.9472007679518236, "learning_rate": 1.7570338400099569e-06, "loss": 0.1811, "step": 1182 }, { "epoch": 0.6166677526552421, "grad_norm": 1.0323067946116886, "learning_rate": 1.7529190658518142e-06, "loss": 0.1946, "step": 1183 }, { "epoch": 0.6171890271714342, "grad_norm": 0.9486288320757825, "learning_rate": 1.7488065128061187e-06, "loss": 0.184, "step": 1184 }, { "epoch": 0.6177103016876262, "grad_norm": 0.9846912548005768, "learning_rate": 1.7446961930997126e-06, "loss": 0.1832, "step": 1185 }, { "epoch": 0.6182315762038183, "grad_norm": 1.0191390866118366, "learning_rate": 1.7405881189528024e-06, "loss": 0.2049, "step": 1186 }, { "epoch": 0.6187528507200104, "grad_norm": 1.0498041064818562, "learning_rate": 1.7364823025789184e-06, "loss": 0.195, "step": 1187 }, { "epoch": 0.6192741252362025, "grad_norm": 1.004045862782643, "learning_rate": 1.732378756184875e-06, "loss": 0.1893, "step": 1188 }, { "epoch": 0.6197953997523946, "grad_norm": 0.9919733197061383, "learning_rate": 1.7282774919707406e-06, "loss": 0.1851, "step": 1189 }, { "epoch": 0.6203166742685867, "grad_norm": 1.0767337921705271, "learning_rate": 1.7241785221297984e-06, "loss": 0.1955, "step": 1190 }, { "epoch": 0.6208379487847788, "grad_norm": 1.0235629614034516, "learning_rate": 1.7200818588485088e-06, "loss": 0.1835, "step": 1191 }, { "epoch": 0.6213592233009708, "grad_norm": 1.0357338095764963, "learning_rate": 1.715987514306476e-06, "loss": 0.192, "step": 1192 }, { "epoch": 0.6218804978171629, "grad_norm": 1.0642681157967024, "learning_rate": 1.7118955006764116e-06, "loss": 0.1832, "step": 1193 }, { "epoch": 0.622401772333355, "grad_norm": 0.9887558170762537, "learning_rate": 1.707805830124093e-06, "loss": 0.1851, "step": 1194 }, { "epoch": 0.6229230468495471, "grad_norm": 0.9869295102295927, "learning_rate": 1.703718514808336e-06, "loss": 0.1854, "step": 1195 }, { "epoch": 0.6234443213657392, "grad_norm": 1.0395086127510296, "learning_rate": 1.6996335668809515e-06, "loss": 0.1872, "step": 1196 }, { "epoch": 0.6239655958819313, "grad_norm": 1.001948089481037, "learning_rate": 1.6955509984867136e-06, "loss": 0.2012, "step": 1197 }, { "epoch": 0.6244868703981235, "grad_norm": 1.0298879796915752, "learning_rate": 1.691470821763322e-06, "loss": 0.1918, "step": 1198 }, { "epoch": 0.6250081449143156, "grad_norm": 1.0513920418408271, "learning_rate": 1.6873930488413628e-06, "loss": 0.2038, "step": 1199 }, { "epoch": 0.6255294194305075, "grad_norm": 0.9574789209372537, "learning_rate": 1.6833176918442796e-06, "loss": 0.172, "step": 1200 }, { "epoch": 0.6260506939466997, "grad_norm": 0.9811702658841635, "learning_rate": 1.6792447628883318e-06, "loss": 0.1931, "step": 1201 }, { "epoch": 0.6265719684628918, "grad_norm": 0.9521761233434269, "learning_rate": 1.6751742740825583e-06, "loss": 0.1859, "step": 1202 }, { "epoch": 0.6270932429790839, "grad_norm": 0.9755265815503429, "learning_rate": 1.6711062375287451e-06, "loss": 0.1886, "step": 1203 }, { "epoch": 0.627614517495276, "grad_norm": 0.9957314201963082, "learning_rate": 1.6670406653213883e-06, "loss": 0.1879, "step": 1204 }, { "epoch": 0.6281357920114681, "grad_norm": 0.985722885510226, "learning_rate": 1.6629775695476551e-06, "loss": 0.1888, "step": 1205 }, { "epoch": 0.6286570665276602, "grad_norm": 0.9638328517939708, "learning_rate": 1.6589169622873524e-06, "loss": 0.1768, "step": 1206 }, { "epoch": 0.6291783410438522, "grad_norm": 1.0202343212469378, "learning_rate": 1.654858855612886e-06, "loss": 0.1854, "step": 1207 }, { "epoch": 0.6296996155600443, "grad_norm": 1.0969397772782907, "learning_rate": 1.6508032615892294e-06, "loss": 0.1941, "step": 1208 }, { "epoch": 0.6302208900762364, "grad_norm": 1.1023320539818071, "learning_rate": 1.6467501922738862e-06, "loss": 0.2016, "step": 1209 }, { "epoch": 0.6307421645924285, "grad_norm": 1.0065554440090716, "learning_rate": 1.6426996597168517e-06, "loss": 0.2032, "step": 1210 }, { "epoch": 0.6312634391086206, "grad_norm": 0.9810314552742739, "learning_rate": 1.6386516759605813e-06, "loss": 0.2042, "step": 1211 }, { "epoch": 0.6317847136248127, "grad_norm": 1.0224511831624452, "learning_rate": 1.6346062530399525e-06, "loss": 0.1806, "step": 1212 }, { "epoch": 0.6323059881410048, "grad_norm": 1.0894152231671272, "learning_rate": 1.6305634029822267e-06, "loss": 0.1953, "step": 1213 }, { "epoch": 0.6328272626571968, "grad_norm": 1.0801698994360478, "learning_rate": 1.6265231378070197e-06, "loss": 0.1896, "step": 1214 }, { "epoch": 0.6333485371733889, "grad_norm": 1.0133080206136096, "learning_rate": 1.6224854695262616e-06, "loss": 0.1931, "step": 1215 }, { "epoch": 0.633869811689581, "grad_norm": 1.0103400995536298, "learning_rate": 1.6184504101441584e-06, "loss": 0.1796, "step": 1216 }, { "epoch": 0.6343910862057731, "grad_norm": 0.9640891691753638, "learning_rate": 1.614417971657164e-06, "loss": 0.1785, "step": 1217 }, { "epoch": 0.6349123607219652, "grad_norm": 1.0045197351637396, "learning_rate": 1.6103881660539369e-06, "loss": 0.1989, "step": 1218 }, { "epoch": 0.6354336352381573, "grad_norm": 1.0001304471152574, "learning_rate": 1.6063610053153106e-06, "loss": 0.1953, "step": 1219 }, { "epoch": 0.6359549097543494, "grad_norm": 1.046094461507422, "learning_rate": 1.6023365014142544e-06, "loss": 0.1887, "step": 1220 }, { "epoch": 0.6364761842705414, "grad_norm": 1.0184247756475113, "learning_rate": 1.5983146663158368e-06, "loss": 0.1912, "step": 1221 }, { "epoch": 0.6369974587867335, "grad_norm": 0.9979630692334167, "learning_rate": 1.5942955119771942e-06, "loss": 0.1931, "step": 1222 }, { "epoch": 0.6375187333029256, "grad_norm": 1.0610360668546952, "learning_rate": 1.5902790503474928e-06, "loss": 0.2011, "step": 1223 }, { "epoch": 0.6380400078191177, "grad_norm": 1.0520291292243582, "learning_rate": 1.586265293367891e-06, "loss": 0.1898, "step": 1224 }, { "epoch": 0.6385612823353098, "grad_norm": 1.1004751904858854, "learning_rate": 1.5822542529715084e-06, "loss": 0.193, "step": 1225 }, { "epoch": 0.639082556851502, "grad_norm": 0.9426792149238837, "learning_rate": 1.578245941083389e-06, "loss": 0.1713, "step": 1226 }, { "epoch": 0.639603831367694, "grad_norm": 1.0373871973414508, "learning_rate": 1.57424036962046e-06, "loss": 0.1879, "step": 1227 }, { "epoch": 0.640125105883886, "grad_norm": 0.9881065573845652, "learning_rate": 1.5702375504915062e-06, "loss": 0.1802, "step": 1228 }, { "epoch": 0.6406463804000782, "grad_norm": 1.0347702489584836, "learning_rate": 1.5662374955971268e-06, "loss": 0.1995, "step": 1229 }, { "epoch": 0.6411676549162703, "grad_norm": 1.0019968601970073, "learning_rate": 1.562240216829704e-06, "loss": 0.1839, "step": 1230 }, { "epoch": 0.6416889294324624, "grad_norm": 1.0410722029026276, "learning_rate": 1.5582457260733664e-06, "loss": 0.2073, "step": 1231 }, { "epoch": 0.6422102039486545, "grad_norm": 0.9942687899851895, "learning_rate": 1.5542540352039525e-06, "loss": 0.1994, "step": 1232 }, { "epoch": 0.6427314784648466, "grad_norm": 1.0482151628235123, "learning_rate": 1.550265156088978e-06, "loss": 0.1987, "step": 1233 }, { "epoch": 0.6432527529810387, "grad_norm": 0.9939264455923658, "learning_rate": 1.5462791005875994e-06, "loss": 0.1968, "step": 1234 }, { "epoch": 0.6437740274972308, "grad_norm": 1.0121688496454913, "learning_rate": 1.542295880550575e-06, "loss": 0.1856, "step": 1235 }, { "epoch": 0.6442953020134228, "grad_norm": 0.9801231914876962, "learning_rate": 1.5383155078202377e-06, "loss": 0.1789, "step": 1236 }, { "epoch": 0.6448165765296149, "grad_norm": 0.950154507161884, "learning_rate": 1.534337994230453e-06, "loss": 0.1833, "step": 1237 }, { "epoch": 0.645337851045807, "grad_norm": 1.065207980128952, "learning_rate": 1.5303633516065852e-06, "loss": 0.2029, "step": 1238 }, { "epoch": 0.6458591255619991, "grad_norm": 1.0117872135317376, "learning_rate": 1.5263915917654654e-06, "loss": 0.1895, "step": 1239 }, { "epoch": 0.6463804000781912, "grad_norm": 0.9920726473045625, "learning_rate": 1.5224227265153512e-06, "loss": 0.1843, "step": 1240 }, { "epoch": 0.6469016745943833, "grad_norm": 0.9517933928073986, "learning_rate": 1.5184567676558965e-06, "loss": 0.177, "step": 1241 }, { "epoch": 0.6474229491105754, "grad_norm": 1.0080575561411724, "learning_rate": 1.5144937269781142e-06, "loss": 0.1902, "step": 1242 }, { "epoch": 0.6479442236267674, "grad_norm": 1.0107231835819686, "learning_rate": 1.5105336162643403e-06, "loss": 0.1879, "step": 1243 }, { "epoch": 0.6484654981429595, "grad_norm": 1.0925930567014153, "learning_rate": 1.5065764472882e-06, "loss": 0.1991, "step": 1244 }, { "epoch": 0.6489867726591516, "grad_norm": 1.0814704323195263, "learning_rate": 1.5026222318145745e-06, "loss": 0.1776, "step": 1245 }, { "epoch": 0.6495080471753437, "grad_norm": 1.0148219928164357, "learning_rate": 1.4986709815995604e-06, "loss": 0.1811, "step": 1246 }, { "epoch": 0.6500293216915358, "grad_norm": 0.9554614027381633, "learning_rate": 1.494722708390442e-06, "loss": 0.1829, "step": 1247 }, { "epoch": 0.6505505962077279, "grad_norm": 1.0642386717671874, "learning_rate": 1.4907774239256517e-06, "loss": 0.2026, "step": 1248 }, { "epoch": 0.65107187072392, "grad_norm": 1.0345865561234546, "learning_rate": 1.486835139934734e-06, "loss": 0.1875, "step": 1249 }, { "epoch": 0.651593145240112, "grad_norm": 0.9737287029963221, "learning_rate": 1.4828958681383163e-06, "loss": 0.1806, "step": 1250 }, { "epoch": 0.6521144197563041, "grad_norm": 1.0602613532722558, "learning_rate": 1.4789596202480678e-06, "loss": 0.1954, "step": 1251 }, { "epoch": 0.6526356942724962, "grad_norm": 1.0691301072685224, "learning_rate": 1.475026407966669e-06, "loss": 0.1919, "step": 1252 }, { "epoch": 0.6531569687886883, "grad_norm": 0.9584321444229563, "learning_rate": 1.4710962429877763e-06, "loss": 0.1794, "step": 1253 }, { "epoch": 0.6536782433048804, "grad_norm": 0.9266776818848917, "learning_rate": 1.4671691369959826e-06, "loss": 0.1858, "step": 1254 }, { "epoch": 0.6541995178210726, "grad_norm": 0.9960819403397485, "learning_rate": 1.4632451016667899e-06, "loss": 0.1879, "step": 1255 }, { "epoch": 0.6547207923372647, "grad_norm": 0.9468971247993424, "learning_rate": 1.4593241486665705e-06, "loss": 0.17, "step": 1256 }, { "epoch": 0.6552420668534567, "grad_norm": 1.007504071715992, "learning_rate": 1.4554062896525295e-06, "loss": 0.1882, "step": 1257 }, { "epoch": 0.6557633413696488, "grad_norm": 1.0514437808565542, "learning_rate": 1.4514915362726773e-06, "loss": 0.196, "step": 1258 }, { "epoch": 0.6562846158858409, "grad_norm": 0.950780660352539, "learning_rate": 1.447579900165792e-06, "loss": 0.1737, "step": 1259 }, { "epoch": 0.656805890402033, "grad_norm": 0.996415800183811, "learning_rate": 1.4436713929613771e-06, "loss": 0.1839, "step": 1260 }, { "epoch": 0.6573271649182251, "grad_norm": 0.9380132689198648, "learning_rate": 1.4397660262796411e-06, "loss": 0.1874, "step": 1261 }, { "epoch": 0.6578484394344172, "grad_norm": 0.9607994317507901, "learning_rate": 1.4358638117314521e-06, "loss": 0.1735, "step": 1262 }, { "epoch": 0.6583697139506093, "grad_norm": 1.0481774247515332, "learning_rate": 1.4319647609183058e-06, "loss": 0.1877, "step": 1263 }, { "epoch": 0.6588909884668013, "grad_norm": 1.0605765278321324, "learning_rate": 1.428068885432296e-06, "loss": 0.1832, "step": 1264 }, { "epoch": 0.6594122629829934, "grad_norm": 0.935028645038226, "learning_rate": 1.4241761968560703e-06, "loss": 0.1707, "step": 1265 }, { "epoch": 0.6599335374991855, "grad_norm": 1.088512403659691, "learning_rate": 1.4202867067628068e-06, "loss": 0.1935, "step": 1266 }, { "epoch": 0.6604548120153776, "grad_norm": 1.1050024584482743, "learning_rate": 1.4164004267161718e-06, "loss": 0.1972, "step": 1267 }, { "epoch": 0.6609760865315697, "grad_norm": 1.082962753309766, "learning_rate": 1.4125173682702869e-06, "loss": 0.2, "step": 1268 }, { "epoch": 0.6614973610477618, "grad_norm": 1.0872379502189071, "learning_rate": 1.4086375429696987e-06, "loss": 0.184, "step": 1269 }, { "epoch": 0.6620186355639539, "grad_norm": 1.0164027128987314, "learning_rate": 1.4047609623493396e-06, "loss": 0.1728, "step": 1270 }, { "epoch": 0.662539910080146, "grad_norm": 1.027205814045248, "learning_rate": 1.400887637934495e-06, "loss": 0.1812, "step": 1271 }, { "epoch": 0.663061184596338, "grad_norm": 0.9821499317534202, "learning_rate": 1.3970175812407705e-06, "loss": 0.1807, "step": 1272 }, { "epoch": 0.6635824591125301, "grad_norm": 0.9665906056557528, "learning_rate": 1.393150803774055e-06, "loss": 0.1776, "step": 1273 }, { "epoch": 0.6641037336287222, "grad_norm": 1.035799794747972, "learning_rate": 1.3892873170304913e-06, "loss": 0.192, "step": 1274 }, { "epoch": 0.6646250081449143, "grad_norm": 1.0872770993767031, "learning_rate": 1.3854271324964353e-06, "loss": 0.2011, "step": 1275 }, { "epoch": 0.6651462826611064, "grad_norm": 0.9635813935447365, "learning_rate": 1.3815702616484262e-06, "loss": 0.1862, "step": 1276 }, { "epoch": 0.6656675571772985, "grad_norm": 1.002332685225423, "learning_rate": 1.3777167159531532e-06, "loss": 0.1731, "step": 1277 }, { "epoch": 0.6661888316934906, "grad_norm": 0.9888613603357296, "learning_rate": 1.3738665068674176e-06, "loss": 0.1947, "step": 1278 }, { "epoch": 0.6667101062096826, "grad_norm": 0.9740172319261662, "learning_rate": 1.3700196458381012e-06, "loss": 0.1804, "step": 1279 }, { "epoch": 0.6672313807258747, "grad_norm": 1.0305686003951755, "learning_rate": 1.3661761443021316e-06, "loss": 0.1943, "step": 1280 }, { "epoch": 0.6677526552420668, "grad_norm": 0.9511757362279438, "learning_rate": 1.36233601368645e-06, "loss": 0.1842, "step": 1281 }, { "epoch": 0.6682739297582589, "grad_norm": 1.013556966120342, "learning_rate": 1.3584992654079742e-06, "loss": 0.1867, "step": 1282 }, { "epoch": 0.668795204274451, "grad_norm": 0.9860617236699549, "learning_rate": 1.3546659108735666e-06, "loss": 0.178, "step": 1283 }, { "epoch": 0.6693164787906432, "grad_norm": 1.0204468647005256, "learning_rate": 1.3508359614799998e-06, "loss": 0.1867, "step": 1284 }, { "epoch": 0.6698377533068353, "grad_norm": 1.005403169634692, "learning_rate": 1.3470094286139213e-06, "loss": 0.2016, "step": 1285 }, { "epoch": 0.6703590278230273, "grad_norm": 0.9842739318151207, "learning_rate": 1.3431863236518242e-06, "loss": 0.1878, "step": 1286 }, { "epoch": 0.6708803023392194, "grad_norm": 0.9967671972856613, "learning_rate": 1.3393666579600078e-06, "loss": 0.1778, "step": 1287 }, { "epoch": 0.6714015768554115, "grad_norm": 1.0979134267108357, "learning_rate": 1.3355504428945464e-06, "loss": 0.1897, "step": 1288 }, { "epoch": 0.6719228513716036, "grad_norm": 1.0286269914897095, "learning_rate": 1.3317376898012573e-06, "loss": 0.1864, "step": 1289 }, { "epoch": 0.6724441258877957, "grad_norm": 1.1079206380471165, "learning_rate": 1.3279284100156633e-06, "loss": 0.2057, "step": 1290 }, { "epoch": 0.6729654004039878, "grad_norm": 0.9777662583461869, "learning_rate": 1.32412261486296e-06, "loss": 0.1848, "step": 1291 }, { "epoch": 0.6734866749201799, "grad_norm": 0.9233332390149452, "learning_rate": 1.3203203156579875e-06, "loss": 0.1712, "step": 1292 }, { "epoch": 0.6740079494363719, "grad_norm": 0.9886048566457575, "learning_rate": 1.316521523705185e-06, "loss": 0.1836, "step": 1293 }, { "epoch": 0.674529223952564, "grad_norm": 0.9565948337244332, "learning_rate": 1.3127262502985722e-06, "loss": 0.1837, "step": 1294 }, { "epoch": 0.6750504984687561, "grad_norm": 1.0257962365139226, "learning_rate": 1.3089345067217025e-06, "loss": 0.1881, "step": 1295 }, { "epoch": 0.6755717729849482, "grad_norm": 0.9891148901824458, "learning_rate": 1.3051463042476358e-06, "loss": 0.1729, "step": 1296 }, { "epoch": 0.6760930475011403, "grad_norm": 0.9216274264750317, "learning_rate": 1.301361654138909e-06, "loss": 0.1734, "step": 1297 }, { "epoch": 0.6766143220173324, "grad_norm": 1.1038391115681303, "learning_rate": 1.297580567647489e-06, "loss": 0.1936, "step": 1298 }, { "epoch": 0.6771355965335245, "grad_norm": 1.0276297679110642, "learning_rate": 1.2938030560147558e-06, "loss": 0.1824, "step": 1299 }, { "epoch": 0.6776568710497165, "grad_norm": 1.04852485176481, "learning_rate": 1.2900291304714568e-06, "loss": 0.1937, "step": 1300 }, { "epoch": 0.6781781455659086, "grad_norm": 1.0424297328699568, "learning_rate": 1.2862588022376782e-06, "loss": 0.1999, "step": 1301 }, { "epoch": 0.6786994200821007, "grad_norm": 0.9759091830528064, "learning_rate": 1.2824920825228132e-06, "loss": 0.1801, "step": 1302 }, { "epoch": 0.6792206945982928, "grad_norm": 1.0620848679730106, "learning_rate": 1.278728982525525e-06, "loss": 0.2058, "step": 1303 }, { "epoch": 0.6797419691144849, "grad_norm": 1.0090625327508094, "learning_rate": 1.2749695134337149e-06, "loss": 0.1842, "step": 1304 }, { "epoch": 0.680263243630677, "grad_norm": 0.9242576575771541, "learning_rate": 1.2712136864244917e-06, "loss": 0.1608, "step": 1305 }, { "epoch": 0.6807845181468691, "grad_norm": 0.9933430990539174, "learning_rate": 1.2674615126641313e-06, "loss": 0.1855, "step": 1306 }, { "epoch": 0.6813057926630612, "grad_norm": 0.9469833404696006, "learning_rate": 1.2637130033080541e-06, "loss": 0.1778, "step": 1307 }, { "epoch": 0.6818270671792532, "grad_norm": 1.014407572677528, "learning_rate": 1.2599681695007822e-06, "loss": 0.1908, "step": 1308 }, { "epoch": 0.6823483416954453, "grad_norm": 1.0462359805351626, "learning_rate": 1.2562270223759104e-06, "loss": 0.1896, "step": 1309 }, { "epoch": 0.6828696162116374, "grad_norm": 0.9820482321193924, "learning_rate": 1.2524895730560755e-06, "loss": 0.188, "step": 1310 }, { "epoch": 0.6833908907278295, "grad_norm": 0.9857078784821011, "learning_rate": 1.2487558326529177e-06, "loss": 0.1936, "step": 1311 }, { "epoch": 0.6839121652440217, "grad_norm": 0.9560315476350769, "learning_rate": 1.2450258122670508e-06, "loss": 0.1765, "step": 1312 }, { "epoch": 0.6844334397602138, "grad_norm": 0.9715559877348052, "learning_rate": 1.2412995229880295e-06, "loss": 0.1826, "step": 1313 }, { "epoch": 0.6849547142764059, "grad_norm": 1.036183134119075, "learning_rate": 1.2375769758943148e-06, "loss": 0.1991, "step": 1314 }, { "epoch": 0.6854759887925979, "grad_norm": 1.0175228043953535, "learning_rate": 1.2338581820532442e-06, "loss": 0.1942, "step": 1315 }, { "epoch": 0.68599726330879, "grad_norm": 0.9369199956509254, "learning_rate": 1.2301431525209942e-06, "loss": 0.1787, "step": 1316 }, { "epoch": 0.6865185378249821, "grad_norm": 0.9634845863264452, "learning_rate": 1.2264318983425498e-06, "loss": 0.1861, "step": 1317 }, { "epoch": 0.6870398123411742, "grad_norm": 0.9397809158825329, "learning_rate": 1.222724430551674e-06, "loss": 0.1753, "step": 1318 }, { "epoch": 0.6875610868573663, "grad_norm": 0.9675975576563132, "learning_rate": 1.2190207601708707e-06, "loss": 0.1778, "step": 1319 }, { "epoch": 0.6880823613735584, "grad_norm": 1.0333022331875106, "learning_rate": 1.215320898211354e-06, "loss": 0.1705, "step": 1320 }, { "epoch": 0.6886036358897505, "grad_norm": 0.9675011004283072, "learning_rate": 1.2116248556730151e-06, "loss": 0.1853, "step": 1321 }, { "epoch": 0.6891249104059425, "grad_norm": 1.2644809920198241, "learning_rate": 1.207932643544392e-06, "loss": 0.1869, "step": 1322 }, { "epoch": 0.6896461849221346, "grad_norm": 0.9943646922901057, "learning_rate": 1.2042442728026325e-06, "loss": 0.1817, "step": 1323 }, { "epoch": 0.6901674594383267, "grad_norm": 1.0673514818907925, "learning_rate": 1.200559754413464e-06, "loss": 0.2027, "step": 1324 }, { "epoch": 0.6906887339545188, "grad_norm": 1.0287772854680546, "learning_rate": 1.1968790993311613e-06, "loss": 0.1875, "step": 1325 }, { "epoch": 0.6912100084707109, "grad_norm": 1.0051001942800528, "learning_rate": 1.193202318498512e-06, "loss": 0.1863, "step": 1326 }, { "epoch": 0.691731282986903, "grad_norm": 0.9998296525007605, "learning_rate": 1.1895294228467886e-06, "loss": 0.1888, "step": 1327 }, { "epoch": 0.6922525575030951, "grad_norm": 1.0858913140394213, "learning_rate": 1.1858604232957096e-06, "loss": 0.1849, "step": 1328 }, { "epoch": 0.6927738320192871, "grad_norm": 1.0530841322620519, "learning_rate": 1.18219533075341e-06, "loss": 0.1911, "step": 1329 }, { "epoch": 0.6932951065354792, "grad_norm": 0.9766766830693403, "learning_rate": 1.1785341561164135e-06, "loss": 0.1748, "step": 1330 }, { "epoch": 0.6938163810516713, "grad_norm": 0.9730885083934125, "learning_rate": 1.1748769102695882e-06, "loss": 0.1812, "step": 1331 }, { "epoch": 0.6943376555678634, "grad_norm": 0.9801328173252677, "learning_rate": 1.1712236040861278e-06, "loss": 0.1847, "step": 1332 }, { "epoch": 0.6948589300840555, "grad_norm": 1.0299850610113763, "learning_rate": 1.1675742484275132e-06, "loss": 0.1868, "step": 1333 }, { "epoch": 0.6953802046002476, "grad_norm": 0.9980716124589071, "learning_rate": 1.1639288541434745e-06, "loss": 0.1892, "step": 1334 }, { "epoch": 0.6959014791164397, "grad_norm": 0.9331877883209707, "learning_rate": 1.160287432071971e-06, "loss": 0.1736, "step": 1335 }, { "epoch": 0.6964227536326317, "grad_norm": 0.9134410030075388, "learning_rate": 1.1566499930391484e-06, "loss": 0.1714, "step": 1336 }, { "epoch": 0.6969440281488238, "grad_norm": 1.0091365360794284, "learning_rate": 1.153016547859311e-06, "loss": 0.1859, "step": 1337 }, { "epoch": 0.6974653026650159, "grad_norm": 1.060667791878475, "learning_rate": 1.1493871073348926e-06, "loss": 0.1839, "step": 1338 }, { "epoch": 0.697986577181208, "grad_norm": 0.9791411501305487, "learning_rate": 1.1457616822564145e-06, "loss": 0.1862, "step": 1339 }, { "epoch": 0.6985078516974002, "grad_norm": 0.9968145078955214, "learning_rate": 1.1421402834024662e-06, "loss": 0.1773, "step": 1340 }, { "epoch": 0.6990291262135923, "grad_norm": 0.9968965667803882, "learning_rate": 1.1385229215396638e-06, "loss": 0.1854, "step": 1341 }, { "epoch": 0.6995504007297844, "grad_norm": 0.9841396870056438, "learning_rate": 1.1349096074226205e-06, "loss": 0.1813, "step": 1342 }, { "epoch": 0.7000716752459765, "grad_norm": 1.0163737558040131, "learning_rate": 1.1313003517939189e-06, "loss": 0.1893, "step": 1343 }, { "epoch": 0.7005929497621685, "grad_norm": 0.9657072281019743, "learning_rate": 1.127695165384072e-06, "loss": 0.1781, "step": 1344 }, { "epoch": 0.7011142242783606, "grad_norm": 1.0059536067719912, "learning_rate": 1.1240940589114953e-06, "loss": 0.1937, "step": 1345 }, { "epoch": 0.7016354987945527, "grad_norm": 1.0220451516359383, "learning_rate": 1.1204970430824782e-06, "loss": 0.188, "step": 1346 }, { "epoch": 0.7021567733107448, "grad_norm": 1.2826564607452682, "learning_rate": 1.1169041285911411e-06, "loss": 0.1978, "step": 1347 }, { "epoch": 0.7026780478269369, "grad_norm": 0.9652970950610329, "learning_rate": 1.113315326119418e-06, "loss": 0.1869, "step": 1348 }, { "epoch": 0.703199322343129, "grad_norm": 1.0123161734090071, "learning_rate": 1.1097306463370142e-06, "loss": 0.1939, "step": 1349 }, { "epoch": 0.7037205968593211, "grad_norm": 0.9817702021580154, "learning_rate": 1.1061500999013771e-06, "loss": 0.1764, "step": 1350 }, { "epoch": 0.7042418713755131, "grad_norm": 0.9729692552593922, "learning_rate": 1.1025736974576693e-06, "loss": 0.1818, "step": 1351 }, { "epoch": 0.7047631458917052, "grad_norm": 0.9431936624148214, "learning_rate": 1.0990014496387296e-06, "loss": 0.1705, "step": 1352 }, { "epoch": 0.7052844204078973, "grad_norm": 1.0042776662576223, "learning_rate": 1.0954333670650461e-06, "loss": 0.1925, "step": 1353 }, { "epoch": 0.7058056949240894, "grad_norm": 1.0977586396369685, "learning_rate": 1.0918694603447221e-06, "loss": 0.1837, "step": 1354 }, { "epoch": 0.7063269694402815, "grad_norm": 1.0168679210111984, "learning_rate": 1.0883097400734494e-06, "loss": 0.1962, "step": 1355 }, { "epoch": 0.7068482439564736, "grad_norm": 1.0292920840070696, "learning_rate": 1.0847542168344695e-06, "loss": 0.1801, "step": 1356 }, { "epoch": 0.7073695184726657, "grad_norm": 1.0207478308048166, "learning_rate": 1.0812029011985472e-06, "loss": 0.1976, "step": 1357 }, { "epoch": 0.7078907929888577, "grad_norm": 1.0183283198559208, "learning_rate": 1.0776558037239385e-06, "loss": 0.1817, "step": 1358 }, { "epoch": 0.7084120675050498, "grad_norm": 1.0247674891113325, "learning_rate": 1.0741129349563567e-06, "loss": 0.1893, "step": 1359 }, { "epoch": 0.7089333420212419, "grad_norm": 0.9495105171569985, "learning_rate": 1.0705743054289464e-06, "loss": 0.1691, "step": 1360 }, { "epoch": 0.709454616537434, "grad_norm": 0.997066256080277, "learning_rate": 1.0670399256622455e-06, "loss": 0.1811, "step": 1361 }, { "epoch": 0.7099758910536261, "grad_norm": 1.0041061371700566, "learning_rate": 1.0635098061641572e-06, "loss": 0.1951, "step": 1362 }, { "epoch": 0.7104971655698182, "grad_norm": 0.9971517329809013, "learning_rate": 1.0599839574299217e-06, "loss": 0.1877, "step": 1363 }, { "epoch": 0.7110184400860103, "grad_norm": 0.933411114504205, "learning_rate": 1.056462389942079e-06, "loss": 0.1644, "step": 1364 }, { "epoch": 0.7115397146022023, "grad_norm": 1.0074540227246198, "learning_rate": 1.052945114170441e-06, "loss": 0.1759, "step": 1365 }, { "epoch": 0.7120609891183944, "grad_norm": 0.9771710556827581, "learning_rate": 1.0494321405720627e-06, "loss": 0.1829, "step": 1366 }, { "epoch": 0.7125822636345865, "grad_norm": 1.0241608798558146, "learning_rate": 1.045923479591204e-06, "loss": 0.1684, "step": 1367 }, { "epoch": 0.7131035381507786, "grad_norm": 1.0344869658445373, "learning_rate": 1.042419141659308e-06, "loss": 0.1912, "step": 1368 }, { "epoch": 0.7136248126669708, "grad_norm": 1.0488873275095165, "learning_rate": 1.038919137194962e-06, "loss": 0.1941, "step": 1369 }, { "epoch": 0.7141460871831629, "grad_norm": 0.9613500411786476, "learning_rate": 1.0354234766038696e-06, "loss": 0.1791, "step": 1370 }, { "epoch": 0.714667361699355, "grad_norm": 0.9873531305786635, "learning_rate": 1.0319321702788234e-06, "loss": 0.1962, "step": 1371 }, { "epoch": 0.715188636215547, "grad_norm": 1.0233494605971423, "learning_rate": 1.028445228599665e-06, "loss": 0.1905, "step": 1372 }, { "epoch": 0.7157099107317391, "grad_norm": 1.0121219803666412, "learning_rate": 1.0249626619332642e-06, "loss": 0.1848, "step": 1373 }, { "epoch": 0.7162311852479312, "grad_norm": 1.0829372012716103, "learning_rate": 1.0214844806334817e-06, "loss": 0.1794, "step": 1374 }, { "epoch": 0.7167524597641233, "grad_norm": 1.022954391315338, "learning_rate": 1.0180106950411397e-06, "loss": 0.1918, "step": 1375 }, { "epoch": 0.7172737342803154, "grad_norm": 0.9591593243680496, "learning_rate": 1.0145413154839945e-06, "loss": 0.1701, "step": 1376 }, { "epoch": 0.7177950087965075, "grad_norm": 1.0345938222382716, "learning_rate": 1.0110763522767e-06, "loss": 0.1876, "step": 1377 }, { "epoch": 0.7183162833126996, "grad_norm": 0.9570590329423115, "learning_rate": 1.0076158157207801e-06, "loss": 0.1768, "step": 1378 }, { "epoch": 0.7188375578288917, "grad_norm": 1.0197234887133053, "learning_rate": 1.0041597161046025e-06, "loss": 0.1765, "step": 1379 }, { "epoch": 0.7193588323450837, "grad_norm": 1.0473051909557438, "learning_rate": 1.0007080637033359e-06, "loss": 0.192, "step": 1380 }, { "epoch": 0.7198801068612758, "grad_norm": 0.9992399137729582, "learning_rate": 9.972608687789346e-07, "loss": 0.1785, "step": 1381 }, { "epoch": 0.7204013813774679, "grad_norm": 0.968324575355683, "learning_rate": 9.938181415800966e-07, "loss": 0.1815, "step": 1382 }, { "epoch": 0.72092265589366, "grad_norm": 0.9070465078823997, "learning_rate": 9.903798923422369e-07, "loss": 0.1688, "step": 1383 }, { "epoch": 0.7214439304098521, "grad_norm": 1.0229224399463046, "learning_rate": 9.869461312874603e-07, "loss": 0.1782, "step": 1384 }, { "epoch": 0.7219652049260442, "grad_norm": 0.983129325811245, "learning_rate": 9.835168686245252e-07, "loss": 0.1847, "step": 1385 }, { "epoch": 0.7224864794422363, "grad_norm": 0.9887481799923892, "learning_rate": 9.800921145488171e-07, "loss": 0.1933, "step": 1386 }, { "epoch": 0.7230077539584283, "grad_norm": 1.0022753881190445, "learning_rate": 9.766718792423159e-07, "loss": 0.1772, "step": 1387 }, { "epoch": 0.7235290284746204, "grad_norm": 0.9848977927448296, "learning_rate": 9.732561728735698e-07, "loss": 0.1914, "step": 1388 }, { "epoch": 0.7240503029908125, "grad_norm": 1.0227529951142291, "learning_rate": 9.698450055976596e-07, "loss": 0.1804, "step": 1389 }, { "epoch": 0.7245715775070046, "grad_norm": 0.9645113903876174, "learning_rate": 9.664383875561726e-07, "loss": 0.1887, "step": 1390 }, { "epoch": 0.7250928520231967, "grad_norm": 0.9861088156398282, "learning_rate": 9.630363288771689e-07, "loss": 0.1806, "step": 1391 }, { "epoch": 0.7256141265393888, "grad_norm": 0.9742197798488345, "learning_rate": 9.596388396751567e-07, "loss": 0.1752, "step": 1392 }, { "epoch": 0.7261354010555809, "grad_norm": 1.0016811389435136, "learning_rate": 9.562459300510562e-07, "loss": 0.1963, "step": 1393 }, { "epoch": 0.7266566755717729, "grad_norm": 0.9925381490501447, "learning_rate": 9.528576100921736e-07, "loss": 0.1914, "step": 1394 }, { "epoch": 0.727177950087965, "grad_norm": 0.974247719423833, "learning_rate": 9.494738898721681e-07, "loss": 0.1888, "step": 1395 }, { "epoch": 0.7276992246041571, "grad_norm": 0.937358269563298, "learning_rate": 9.460947794510264e-07, "loss": 0.199, "step": 1396 }, { "epoch": 0.7282204991203493, "grad_norm": 0.9831205201102619, "learning_rate": 9.427202888750278e-07, "loss": 0.1906, "step": 1397 }, { "epoch": 0.7287417736365414, "grad_norm": 0.9354371339815937, "learning_rate": 9.393504281767163e-07, "loss": 0.1728, "step": 1398 }, { "epoch": 0.7292630481527335, "grad_norm": 0.9256419264427953, "learning_rate": 9.359852073748746e-07, "loss": 0.1757, "step": 1399 }, { "epoch": 0.7297843226689256, "grad_norm": 0.9729037248406467, "learning_rate": 9.326246364744845e-07, "loss": 0.1774, "step": 1400 }, { "epoch": 0.7303055971851176, "grad_norm": 0.9810766776155514, "learning_rate": 9.292687254667096e-07, "loss": 0.1655, "step": 1401 }, { "epoch": 0.7308268717013097, "grad_norm": 1.0321001659719977, "learning_rate": 9.259174843288557e-07, "loss": 0.1858, "step": 1402 }, { "epoch": 0.7313481462175018, "grad_norm": 0.9208300723484822, "learning_rate": 9.225709230243455e-07, "loss": 0.1643, "step": 1403 }, { "epoch": 0.7318694207336939, "grad_norm": 0.9530814587607274, "learning_rate": 9.192290515026903e-07, "loss": 0.1656, "step": 1404 }, { "epoch": 0.732390695249886, "grad_norm": 1.0161767095557832, "learning_rate": 9.158918796994543e-07, "loss": 0.1882, "step": 1405 }, { "epoch": 0.7329119697660781, "grad_norm": 0.9466451255581054, "learning_rate": 9.125594175362326e-07, "loss": 0.1671, "step": 1406 }, { "epoch": 0.7334332442822702, "grad_norm": 0.9745495623256282, "learning_rate": 9.092316749206198e-07, "loss": 0.1761, "step": 1407 }, { "epoch": 0.7339545187984622, "grad_norm": 0.9933921444812821, "learning_rate": 9.059086617461723e-07, "loss": 0.199, "step": 1408 }, { "epoch": 0.7344757933146543, "grad_norm": 0.9846422781950723, "learning_rate": 9.025903878923934e-07, "loss": 0.1887, "step": 1409 }, { "epoch": 0.7349970678308464, "grad_norm": 1.0878272099998363, "learning_rate": 8.992768632246907e-07, "loss": 0.211, "step": 1410 }, { "epoch": 0.7355183423470385, "grad_norm": 0.9673466071846137, "learning_rate": 8.959680975943541e-07, "loss": 0.1875, "step": 1411 }, { "epoch": 0.7360396168632306, "grad_norm": 1.0541385848385707, "learning_rate": 8.926641008385268e-07, "loss": 0.1819, "step": 1412 }, { "epoch": 0.7365608913794227, "grad_norm": 1.0655569570932815, "learning_rate": 8.893648827801685e-07, "loss": 0.1946, "step": 1413 }, { "epoch": 0.7370821658956148, "grad_norm": 0.9489331305334471, "learning_rate": 8.860704532280373e-07, "loss": 0.1786, "step": 1414 }, { "epoch": 0.7376034404118069, "grad_norm": 0.9626760271408069, "learning_rate": 8.827808219766513e-07, "loss": 0.1729, "step": 1415 }, { "epoch": 0.7381247149279989, "grad_norm": 0.9608225208461724, "learning_rate": 8.794959988062632e-07, "loss": 0.1749, "step": 1416 }, { "epoch": 0.738645989444191, "grad_norm": 0.9933305652995392, "learning_rate": 8.762159934828337e-07, "loss": 0.1777, "step": 1417 }, { "epoch": 0.7391672639603831, "grad_norm": 0.9939192332322994, "learning_rate": 8.729408157579968e-07, "loss": 0.1918, "step": 1418 }, { "epoch": 0.7396885384765752, "grad_norm": 1.018471566040822, "learning_rate": 8.696704753690344e-07, "loss": 0.1819, "step": 1419 }, { "epoch": 0.7402098129927673, "grad_norm": 1.0614581746374432, "learning_rate": 8.664049820388492e-07, "loss": 0.1968, "step": 1420 }, { "epoch": 0.7407310875089594, "grad_norm": 1.0222909779878884, "learning_rate": 8.631443454759283e-07, "loss": 0.1915, "step": 1421 }, { "epoch": 0.7412523620251515, "grad_norm": 1.0488852837468665, "learning_rate": 8.598885753743247e-07, "loss": 0.1858, "step": 1422 }, { "epoch": 0.7417736365413435, "grad_norm": 0.9724091270831652, "learning_rate": 8.566376814136201e-07, "loss": 0.1759, "step": 1423 }, { "epoch": 0.7422949110575356, "grad_norm": 1.0285374753801173, "learning_rate": 8.533916732588996e-07, "loss": 0.1965, "step": 1424 }, { "epoch": 0.7428161855737277, "grad_norm": 1.032068131509467, "learning_rate": 8.501505605607236e-07, "loss": 0.1918, "step": 1425 }, { "epoch": 0.7433374600899199, "grad_norm": 0.9604042665606899, "learning_rate": 8.469143529550968e-07, "loss": 0.1882, "step": 1426 }, { "epoch": 0.743858734606112, "grad_norm": 0.9805324536722502, "learning_rate": 8.436830600634416e-07, "loss": 0.1785, "step": 1427 }, { "epoch": 0.7443800091223041, "grad_norm": 0.9804859313469502, "learning_rate": 8.404566914925672e-07, "loss": 0.1726, "step": 1428 }, { "epoch": 0.7449012836384962, "grad_norm": 1.0411249495355863, "learning_rate": 8.372352568346453e-07, "loss": 0.1942, "step": 1429 }, { "epoch": 0.7454225581546882, "grad_norm": 1.065983987357002, "learning_rate": 8.340187656671767e-07, "loss": 0.1898, "step": 1430 }, { "epoch": 0.7459438326708803, "grad_norm": 0.997010556310911, "learning_rate": 8.308072275529652e-07, "loss": 0.1793, "step": 1431 }, { "epoch": 0.7464651071870724, "grad_norm": 0.9721936402384388, "learning_rate": 8.27600652040089e-07, "loss": 0.195, "step": 1432 }, { "epoch": 0.7469863817032645, "grad_norm": 0.9805292308865531, "learning_rate": 8.243990486618717e-07, "loss": 0.1837, "step": 1433 }, { "epoch": 0.7475076562194566, "grad_norm": 0.9925749768449906, "learning_rate": 8.212024269368565e-07, "loss": 0.1723, "step": 1434 }, { "epoch": 0.7480289307356487, "grad_norm": 0.9735123460764685, "learning_rate": 8.180107963687741e-07, "loss": 0.1743, "step": 1435 }, { "epoch": 0.7485502052518408, "grad_norm": 0.9538754854147554, "learning_rate": 8.148241664465157e-07, "loss": 0.1703, "step": 1436 }, { "epoch": 0.7490714797680328, "grad_norm": 0.9671414937233449, "learning_rate": 8.116425466441077e-07, "loss": 0.1702, "step": 1437 }, { "epoch": 0.7495927542842249, "grad_norm": 0.9358794599973769, "learning_rate": 8.084659464206787e-07, "loss": 0.1754, "step": 1438 }, { "epoch": 0.750114028800417, "grad_norm": 1.0328819574667487, "learning_rate": 8.052943752204339e-07, "loss": 0.1951, "step": 1439 }, { "epoch": 0.7506353033166091, "grad_norm": 0.9391446564466408, "learning_rate": 8.021278424726308e-07, "loss": 0.182, "step": 1440 }, { "epoch": 0.7511565778328012, "grad_norm": 0.9973077636582282, "learning_rate": 7.989663575915407e-07, "loss": 0.183, "step": 1441 }, { "epoch": 0.7516778523489933, "grad_norm": 0.994948001820505, "learning_rate": 7.958099299764332e-07, "loss": 0.1816, "step": 1442 }, { "epoch": 0.7521991268651854, "grad_norm": 0.9745039882273455, "learning_rate": 7.926585690115396e-07, "loss": 0.1849, "step": 1443 }, { "epoch": 0.7527204013813774, "grad_norm": 1.0585792440522037, "learning_rate": 7.895122840660272e-07, "loss": 0.1891, "step": 1444 }, { "epoch": 0.7532416758975695, "grad_norm": 1.0072933672269964, "learning_rate": 7.863710844939759e-07, "loss": 0.1813, "step": 1445 }, { "epoch": 0.7537629504137616, "grad_norm": 1.046799665395406, "learning_rate": 7.8323497963434e-07, "loss": 0.196, "step": 1446 }, { "epoch": 0.7542842249299537, "grad_norm": 0.9996936215535054, "learning_rate": 7.801039788109332e-07, "loss": 0.1818, "step": 1447 }, { "epoch": 0.7548054994461458, "grad_norm": 1.0324011507953594, "learning_rate": 7.769780913323916e-07, "loss": 0.1862, "step": 1448 }, { "epoch": 0.7553267739623379, "grad_norm": 0.9634685781762996, "learning_rate": 7.738573264921481e-07, "loss": 0.1837, "step": 1449 }, { "epoch": 0.75584804847853, "grad_norm": 0.9954983995056319, "learning_rate": 7.707416935684092e-07, "loss": 0.1841, "step": 1450 }, { "epoch": 0.7563693229947221, "grad_norm": 0.9465965431404901, "learning_rate": 7.676312018241211e-07, "loss": 0.182, "step": 1451 }, { "epoch": 0.7568905975109141, "grad_norm": 0.9844735426559792, "learning_rate": 7.645258605069444e-07, "loss": 0.1822, "step": 1452 }, { "epoch": 0.7574118720271062, "grad_norm": 0.9918860172082978, "learning_rate": 7.614256788492316e-07, "loss": 0.1748, "step": 1453 }, { "epoch": 0.7579331465432984, "grad_norm": 0.9681618564765927, "learning_rate": 7.583306660679888e-07, "loss": 0.1759, "step": 1454 }, { "epoch": 0.7584544210594905, "grad_norm": 1.0081967981460622, "learning_rate": 7.552408313648602e-07, "loss": 0.1874, "step": 1455 }, { "epoch": 0.7589756955756826, "grad_norm": 0.9932265000906999, "learning_rate": 7.521561839260927e-07, "loss": 0.1733, "step": 1456 }, { "epoch": 0.7594969700918747, "grad_norm": 1.037111313956083, "learning_rate": 7.49076732922511e-07, "loss": 0.1912, "step": 1457 }, { "epoch": 0.7600182446080668, "grad_norm": 0.9698497048428991, "learning_rate": 7.460024875094926e-07, "loss": 0.1791, "step": 1458 }, { "epoch": 0.7605395191242588, "grad_norm": 1.000100859624916, "learning_rate": 7.429334568269364e-07, "loss": 0.1951, "step": 1459 }, { "epoch": 0.7610607936404509, "grad_norm": 0.9698217611279528, "learning_rate": 7.398696499992386e-07, "loss": 0.1678, "step": 1460 }, { "epoch": 0.761582068156643, "grad_norm": 0.9813749452116728, "learning_rate": 7.368110761352631e-07, "loss": 0.1872, "step": 1461 }, { "epoch": 0.7621033426728351, "grad_norm": 0.9916765810612727, "learning_rate": 7.337577443283192e-07, "loss": 0.1775, "step": 1462 }, { "epoch": 0.7626246171890272, "grad_norm": 0.9499274567356653, "learning_rate": 7.307096636561281e-07, "loss": 0.1801, "step": 1463 }, { "epoch": 0.7631458917052193, "grad_norm": 0.981945431179099, "learning_rate": 7.276668431808004e-07, "loss": 0.1828, "step": 1464 }, { "epoch": 0.7636671662214114, "grad_norm": 0.9566783558292398, "learning_rate": 7.246292919488068e-07, "loss": 0.1752, "step": 1465 }, { "epoch": 0.7641884407376034, "grad_norm": 0.992884475254835, "learning_rate": 7.215970189909546e-07, "loss": 0.1814, "step": 1466 }, { "epoch": 0.7647097152537955, "grad_norm": 1.052003115861817, "learning_rate": 7.185700333223564e-07, "loss": 0.2006, "step": 1467 }, { "epoch": 0.7652309897699876, "grad_norm": 0.9624719382988022, "learning_rate": 7.155483439424058e-07, "loss": 0.1843, "step": 1468 }, { "epoch": 0.7657522642861797, "grad_norm": 1.0339494657208437, "learning_rate": 7.1253195983475e-07, "loss": 0.1867, "step": 1469 }, { "epoch": 0.7662735388023718, "grad_norm": 1.0270868784537983, "learning_rate": 7.09520889967265e-07, "loss": 0.1819, "step": 1470 }, { "epoch": 0.7667948133185639, "grad_norm": 0.9934453354538934, "learning_rate": 7.065151432920258e-07, "loss": 0.1841, "step": 1471 }, { "epoch": 0.767316087834756, "grad_norm": 1.1315817573018363, "learning_rate": 7.035147287452803e-07, "loss": 0.1995, "step": 1472 }, { "epoch": 0.767837362350948, "grad_norm": 1.018463046969739, "learning_rate": 7.005196552474275e-07, "loss": 0.1871, "step": 1473 }, { "epoch": 0.7683586368671401, "grad_norm": 1.0421420164623663, "learning_rate": 6.975299317029813e-07, "loss": 0.1922, "step": 1474 }, { "epoch": 0.7688799113833322, "grad_norm": 1.002838203747042, "learning_rate": 6.945455670005558e-07, "loss": 0.191, "step": 1475 }, { "epoch": 0.7694011858995243, "grad_norm": 0.9810312260984373, "learning_rate": 6.915665700128291e-07, "loss": 0.1786, "step": 1476 }, { "epoch": 0.7699224604157164, "grad_norm": 0.980439512239371, "learning_rate": 6.885929495965216e-07, "loss": 0.188, "step": 1477 }, { "epoch": 0.7704437349319085, "grad_norm": 1.022741021825978, "learning_rate": 6.856247145923708e-07, "loss": 0.1857, "step": 1478 }, { "epoch": 0.7709650094481006, "grad_norm": 0.9557018945651466, "learning_rate": 6.82661873825101e-07, "loss": 0.176, "step": 1479 }, { "epoch": 0.7714862839642926, "grad_norm": 1.0289243917403224, "learning_rate": 6.797044361033986e-07, "loss": 0.195, "step": 1480 }, { "epoch": 0.7720075584804847, "grad_norm": 1.0648986089824894, "learning_rate": 6.767524102198903e-07, "loss": 0.1812, "step": 1481 }, { "epoch": 0.7725288329966769, "grad_norm": 1.0620665115031733, "learning_rate": 6.738058049511067e-07, "loss": 0.2003, "step": 1482 }, { "epoch": 0.773050107512869, "grad_norm": 1.0327996964046404, "learning_rate": 6.708646290574694e-07, "loss": 0.1962, "step": 1483 }, { "epoch": 0.7735713820290611, "grad_norm": 1.0114282559427161, "learning_rate": 6.679288912832538e-07, "loss": 0.1913, "step": 1484 }, { "epoch": 0.7740926565452532, "grad_norm": 0.9816443250943625, "learning_rate": 6.649986003565678e-07, "loss": 0.1768, "step": 1485 }, { "epoch": 0.7746139310614453, "grad_norm": 0.9331501300978031, "learning_rate": 6.620737649893291e-07, "loss": 0.1678, "step": 1486 }, { "epoch": 0.7751352055776374, "grad_norm": 1.0566663982761528, "learning_rate": 6.591543938772302e-07, "loss": 0.1949, "step": 1487 }, { "epoch": 0.7756564800938294, "grad_norm": 0.967850497825642, "learning_rate": 6.562404956997229e-07, "loss": 0.1736, "step": 1488 }, { "epoch": 0.7761777546100215, "grad_norm": 1.0157020122014457, "learning_rate": 6.533320791199848e-07, "loss": 0.1779, "step": 1489 }, { "epoch": 0.7766990291262136, "grad_norm": 1.045591809870396, "learning_rate": 6.504291527848972e-07, "loss": 0.1834, "step": 1490 }, { "epoch": 0.7772203036424057, "grad_norm": 1.0434125948047916, "learning_rate": 6.475317253250196e-07, "loss": 0.1854, "step": 1491 }, { "epoch": 0.7777415781585978, "grad_norm": 0.9525166005684407, "learning_rate": 6.446398053545619e-07, "loss": 0.1679, "step": 1492 }, { "epoch": 0.7782628526747899, "grad_norm": 0.9809309122145046, "learning_rate": 6.417534014713594e-07, "loss": 0.182, "step": 1493 }, { "epoch": 0.778784127190982, "grad_norm": 1.0183445694309556, "learning_rate": 6.388725222568498e-07, "loss": 0.1922, "step": 1494 }, { "epoch": 0.779305401707174, "grad_norm": 1.030579937127978, "learning_rate": 6.359971762760442e-07, "loss": 0.182, "step": 1495 }, { "epoch": 0.7798266762233661, "grad_norm": 0.9551228877332707, "learning_rate": 6.331273720775028e-07, "loss": 0.1754, "step": 1496 }, { "epoch": 0.7803479507395582, "grad_norm": 1.0195208828277464, "learning_rate": 6.302631181933106e-07, "loss": 0.1863, "step": 1497 }, { "epoch": 0.7808692252557503, "grad_norm": 0.9720903989483768, "learning_rate": 6.274044231390503e-07, "loss": 0.1845, "step": 1498 }, { "epoch": 0.7813904997719424, "grad_norm": 0.9897645284030194, "learning_rate": 6.245512954137795e-07, "loss": 0.187, "step": 1499 }, { "epoch": 0.7819117742881345, "grad_norm": 0.9745175491361323, "learning_rate": 6.217037435000028e-07, "loss": 0.1811, "step": 1500 }, { "epoch": 0.7819117742881345, "eval_loss": 0.18243278563022614, "eval_runtime": 52.0221, "eval_samples_per_second": 23.855, "eval_steps_per_second": 2.999, "step": 1500 }, { "epoch": 0.7824330488043266, "grad_norm": 0.9624283120304148, "learning_rate": 6.18861775863647e-07, "loss": 0.1843, "step": 1501 }, { "epoch": 0.7829543233205186, "grad_norm": 0.9997178294787541, "learning_rate": 6.16025400954037e-07, "loss": 0.1844, "step": 1502 }, { "epoch": 0.7834755978367107, "grad_norm": 0.9690170298588008, "learning_rate": 6.131946272038719e-07, "loss": 0.1747, "step": 1503 }, { "epoch": 0.7839968723529028, "grad_norm": 0.9872530531107133, "learning_rate": 6.103694630291954e-07, "loss": 0.1889, "step": 1504 }, { "epoch": 0.7845181468690949, "grad_norm": 1.0002803187642446, "learning_rate": 6.07549916829375e-07, "loss": 0.1693, "step": 1505 }, { "epoch": 0.785039421385287, "grad_norm": 0.9866149594139076, "learning_rate": 6.047359969870773e-07, "loss": 0.1752, "step": 1506 }, { "epoch": 0.7855606959014791, "grad_norm": 1.0156582051001866, "learning_rate": 6.019277118682371e-07, "loss": 0.1932, "step": 1507 }, { "epoch": 0.7860819704176712, "grad_norm": 0.9848342276162435, "learning_rate": 5.991250698220416e-07, "loss": 0.185, "step": 1508 }, { "epoch": 0.7866032449338632, "grad_norm": 0.984391137727102, "learning_rate": 5.963280791808984e-07, "loss": 0.1962, "step": 1509 }, { "epoch": 0.7871245194500553, "grad_norm": 0.9483291063857775, "learning_rate": 5.935367482604124e-07, "loss": 0.1802, "step": 1510 }, { "epoch": 0.7876457939662475, "grad_norm": 1.0418162412838423, "learning_rate": 5.907510853593642e-07, "loss": 0.1857, "step": 1511 }, { "epoch": 0.7881670684824396, "grad_norm": 1.0343511985551024, "learning_rate": 5.879710987596818e-07, "loss": 0.1887, "step": 1512 }, { "epoch": 0.7886883429986317, "grad_norm": 0.9773227817505601, "learning_rate": 5.851967967264164e-07, "loss": 0.1718, "step": 1513 }, { "epoch": 0.7892096175148238, "grad_norm": 0.9692717128786674, "learning_rate": 5.824281875077212e-07, "loss": 0.1747, "step": 1514 }, { "epoch": 0.7897308920310159, "grad_norm": 0.9592152925255154, "learning_rate": 5.796652793348203e-07, "loss": 0.1698, "step": 1515 }, { "epoch": 0.7902521665472079, "grad_norm": 1.0199630611548223, "learning_rate": 5.769080804219929e-07, "loss": 0.1823, "step": 1516 }, { "epoch": 0.7907734410634, "grad_norm": 0.993001420164248, "learning_rate": 5.741565989665413e-07, "loss": 0.1914, "step": 1517 }, { "epoch": 0.7912947155795921, "grad_norm": 1.0044001865259375, "learning_rate": 5.714108431487692e-07, "loss": 0.1919, "step": 1518 }, { "epoch": 0.7918159900957842, "grad_norm": 1.009348827060207, "learning_rate": 5.68670821131961e-07, "loss": 0.1886, "step": 1519 }, { "epoch": 0.7923372646119763, "grad_norm": 1.0193106668548484, "learning_rate": 5.659365410623491e-07, "loss": 0.1932, "step": 1520 }, { "epoch": 0.7928585391281684, "grad_norm": 0.9849031420188077, "learning_rate": 5.63208011069099e-07, "loss": 0.1841, "step": 1521 }, { "epoch": 0.7933798136443605, "grad_norm": 0.9564397362264664, "learning_rate": 5.604852392642813e-07, "loss": 0.179, "step": 1522 }, { "epoch": 0.7939010881605526, "grad_norm": 0.9832377238334014, "learning_rate": 5.577682337428417e-07, "loss": 0.1882, "step": 1523 }, { "epoch": 0.7944223626767446, "grad_norm": 0.9964927270554703, "learning_rate": 5.550570025825888e-07, "loss": 0.185, "step": 1524 }, { "epoch": 0.7949436371929367, "grad_norm": 1.078579571117194, "learning_rate": 5.523515538441604e-07, "loss": 0.1885, "step": 1525 }, { "epoch": 0.7954649117091288, "grad_norm": 0.9423733562321697, "learning_rate": 5.496518955710023e-07, "loss": 0.1642, "step": 1526 }, { "epoch": 0.7959861862253209, "grad_norm": 0.9932650132284885, "learning_rate": 5.469580357893484e-07, "loss": 0.1834, "step": 1527 }, { "epoch": 0.796507460741513, "grad_norm": 0.9874453522435449, "learning_rate": 5.442699825081885e-07, "loss": 0.1781, "step": 1528 }, { "epoch": 0.7970287352577051, "grad_norm": 0.9898112554414138, "learning_rate": 5.415877437192535e-07, "loss": 0.1871, "step": 1529 }, { "epoch": 0.7975500097738972, "grad_norm": 1.0099309005097243, "learning_rate": 5.389113273969857e-07, "loss": 0.1916, "step": 1530 }, { "epoch": 0.7980712842900892, "grad_norm": 0.9194794514622183, "learning_rate": 5.36240741498516e-07, "loss": 0.1715, "step": 1531 }, { "epoch": 0.7985925588062813, "grad_norm": 0.9525100925930671, "learning_rate": 5.33575993963644e-07, "loss": 0.1677, "step": 1532 }, { "epoch": 0.7991138333224734, "grad_norm": 1.0095716989433727, "learning_rate": 5.309170927148088e-07, "loss": 0.173, "step": 1533 }, { "epoch": 0.7996351078386655, "grad_norm": 1.0361628682209612, "learning_rate": 5.282640456570692e-07, "loss": 0.19, "step": 1534 }, { "epoch": 0.8001563823548576, "grad_norm": 1.0032246075672515, "learning_rate": 5.256168606780784e-07, "loss": 0.1826, "step": 1535 }, { "epoch": 0.8006776568710497, "grad_norm": 0.9910274150150304, "learning_rate": 5.229755456480635e-07, "loss": 0.1661, "step": 1536 }, { "epoch": 0.8011989313872419, "grad_norm": 1.0188050211844513, "learning_rate": 5.203401084197973e-07, "loss": 0.1888, "step": 1537 }, { "epoch": 0.8017202059034338, "grad_norm": 0.9812489181895766, "learning_rate": 5.177105568285793e-07, "loss": 0.1745, "step": 1538 }, { "epoch": 0.802241480419626, "grad_norm": 1.0094080576230375, "learning_rate": 5.150868986922092e-07, "loss": 0.1841, "step": 1539 }, { "epoch": 0.8027627549358181, "grad_norm": 0.9390952886758952, "learning_rate": 5.124691418109673e-07, "loss": 0.1698, "step": 1540 }, { "epoch": 0.8032840294520102, "grad_norm": 1.0670290135128153, "learning_rate": 5.09857293967587e-07, "loss": 0.1911, "step": 1541 }, { "epoch": 0.8038053039682023, "grad_norm": 0.9622567515885702, "learning_rate": 5.072513629272352e-07, "loss": 0.1792, "step": 1542 }, { "epoch": 0.8043265784843944, "grad_norm": 1.0833879216384623, "learning_rate": 5.046513564374861e-07, "loss": 0.1918, "step": 1543 }, { "epoch": 0.8048478530005865, "grad_norm": 1.071394833175433, "learning_rate": 5.020572822283027e-07, "loss": 0.2082, "step": 1544 }, { "epoch": 0.8053691275167785, "grad_norm": 0.9465583168284061, "learning_rate": 4.994691480120086e-07, "loss": 0.1778, "step": 1545 }, { "epoch": 0.8058904020329706, "grad_norm": 0.9692484651932467, "learning_rate": 4.968869614832681e-07, "loss": 0.1616, "step": 1546 }, { "epoch": 0.8064116765491627, "grad_norm": 1.0420699332824392, "learning_rate": 4.943107303190639e-07, "loss": 0.1946, "step": 1547 }, { "epoch": 0.8069329510653548, "grad_norm": 0.9720960517462183, "learning_rate": 4.917404621786703e-07, "loss": 0.1729, "step": 1548 }, { "epoch": 0.8074542255815469, "grad_norm": 0.9837707057560047, "learning_rate": 4.891761647036364e-07, "loss": 0.1881, "step": 1549 }, { "epoch": 0.807975500097739, "grad_norm": 0.978722167032406, "learning_rate": 4.866178455177584e-07, "loss": 0.1739, "step": 1550 }, { "epoch": 0.8084967746139311, "grad_norm": 0.9409676553020354, "learning_rate": 4.840655122270582e-07, "loss": 0.1649, "step": 1551 }, { "epoch": 0.8090180491301231, "grad_norm": 0.9685240805958721, "learning_rate": 4.815191724197634e-07, "loss": 0.1782, "step": 1552 }, { "epoch": 0.8095393236463152, "grad_norm": 0.9901929023181778, "learning_rate": 4.78978833666281e-07, "loss": 0.171, "step": 1553 }, { "epoch": 0.8100605981625073, "grad_norm": 1.0737101425140607, "learning_rate": 4.7644450351917617e-07, "loss": 0.1779, "step": 1554 }, { "epoch": 0.8105818726786994, "grad_norm": 1.0148124979347264, "learning_rate": 4.7391618951315277e-07, "loss": 0.1831, "step": 1555 }, { "epoch": 0.8111031471948915, "grad_norm": 1.038320233035674, "learning_rate": 4.713938991650241e-07, "loss": 0.1961, "step": 1556 }, { "epoch": 0.8116244217110836, "grad_norm": 0.9697118349993694, "learning_rate": 4.688776399736991e-07, "loss": 0.1789, "step": 1557 }, { "epoch": 0.8121456962272757, "grad_norm": 0.9976052679774774, "learning_rate": 4.6636741942015314e-07, "loss": 0.1812, "step": 1558 }, { "epoch": 0.8126669707434678, "grad_norm": 0.9351359560800986, "learning_rate": 4.6386324496740855e-07, "loss": 0.174, "step": 1559 }, { "epoch": 0.8131882452596598, "grad_norm": 0.996276397945659, "learning_rate": 4.6136512406051495e-07, "loss": 0.1804, "step": 1560 }, { "epoch": 0.8137095197758519, "grad_norm": 1.0250044182372304, "learning_rate": 4.5887306412651933e-07, "loss": 0.1897, "step": 1561 }, { "epoch": 0.814230794292044, "grad_norm": 0.9542320294681652, "learning_rate": 4.563870725744543e-07, "loss": 0.186, "step": 1562 }, { "epoch": 0.8147520688082361, "grad_norm": 0.9837597440865188, "learning_rate": 4.539071567953077e-07, "loss": 0.1911, "step": 1563 }, { "epoch": 0.8152733433244282, "grad_norm": 0.981422035168465, "learning_rate": 4.514333241620045e-07, "loss": 0.1864, "step": 1564 }, { "epoch": 0.8157946178406204, "grad_norm": 0.9991362993949138, "learning_rate": 4.4896558202938505e-07, "loss": 0.1849, "step": 1565 }, { "epoch": 0.8163158923568125, "grad_norm": 0.9329728006170872, "learning_rate": 4.4650393773418123e-07, "loss": 0.1726, "step": 1566 }, { "epoch": 0.8168371668730044, "grad_norm": 1.0092147678909589, "learning_rate": 4.440483985949953e-07, "loss": 0.1772, "step": 1567 }, { "epoch": 0.8173584413891966, "grad_norm": 0.936495487087966, "learning_rate": 4.4159897191228096e-07, "loss": 0.1784, "step": 1568 }, { "epoch": 0.8178797159053887, "grad_norm": 1.032928505703867, "learning_rate": 4.3915566496831654e-07, "loss": 0.1889, "step": 1569 }, { "epoch": 0.8184009904215808, "grad_norm": 1.0075404329801207, "learning_rate": 4.367184850271872e-07, "loss": 0.1879, "step": 1570 }, { "epoch": 0.8189222649377729, "grad_norm": 0.9624770935173103, "learning_rate": 4.3428743933476207e-07, "loss": 0.1717, "step": 1571 }, { "epoch": 0.819443539453965, "grad_norm": 1.012318148309551, "learning_rate": 4.3186253511867227e-07, "loss": 0.1798, "step": 1572 }, { "epoch": 0.8199648139701571, "grad_norm": 1.0103015687227486, "learning_rate": 4.294437795882919e-07, "loss": 0.1855, "step": 1573 }, { "epoch": 0.8204860884863491, "grad_norm": 1.0291823813828462, "learning_rate": 4.2703117993471295e-07, "loss": 0.1857, "step": 1574 }, { "epoch": 0.8210073630025412, "grad_norm": 1.0164999339217138, "learning_rate": 4.2462474333072544e-07, "loss": 0.1813, "step": 1575 }, { "epoch": 0.8215286375187333, "grad_norm": 1.1952579229840181, "learning_rate": 4.222244769307965e-07, "loss": 0.1835, "step": 1576 }, { "epoch": 0.8220499120349254, "grad_norm": 1.036232662086274, "learning_rate": 4.198303878710508e-07, "loss": 0.1791, "step": 1577 }, { "epoch": 0.8225711865511175, "grad_norm": 0.9831689035420226, "learning_rate": 4.174424832692453e-07, "loss": 0.1799, "step": 1578 }, { "epoch": 0.8230924610673096, "grad_norm": 1.0187121088831226, "learning_rate": 4.1506077022475e-07, "loss": 0.1864, "step": 1579 }, { "epoch": 0.8236137355835017, "grad_norm": 0.9756861806562158, "learning_rate": 4.1268525581853015e-07, "loss": 0.1797, "step": 1580 }, { "epoch": 0.8241350100996937, "grad_norm": 0.9987252492688348, "learning_rate": 4.1031594711311686e-07, "loss": 0.1792, "step": 1581 }, { "epoch": 0.8246562846158858, "grad_norm": 1.0161464474871202, "learning_rate": 4.079528511525968e-07, "loss": 0.1886, "step": 1582 }, { "epoch": 0.8251775591320779, "grad_norm": 0.9675388822474331, "learning_rate": 4.0559597496258263e-07, "loss": 0.1861, "step": 1583 }, { "epoch": 0.82569883364827, "grad_norm": 0.9620631526320462, "learning_rate": 4.032453255501956e-07, "loss": 0.1828, "step": 1584 }, { "epoch": 0.8262201081644621, "grad_norm": 0.9659011244034107, "learning_rate": 4.0090090990404563e-07, "loss": 0.1763, "step": 1585 }, { "epoch": 0.8267413826806542, "grad_norm": 1.0028308830055879, "learning_rate": 3.9856273499420837e-07, "loss": 0.1872, "step": 1586 }, { "epoch": 0.8272626571968463, "grad_norm": 0.9981885758468215, "learning_rate": 3.962308077722049e-07, "loss": 0.1806, "step": 1587 }, { "epoch": 0.8277839317130383, "grad_norm": 0.9135455364846385, "learning_rate": 3.9390513517098413e-07, "loss": 0.1691, "step": 1588 }, { "epoch": 0.8283052062292304, "grad_norm": 0.9733994445538949, "learning_rate": 3.9158572410489537e-07, "loss": 0.153, "step": 1589 }, { "epoch": 0.8288264807454225, "grad_norm": 0.9694813007897026, "learning_rate": 3.8927258146967626e-07, "loss": 0.1703, "step": 1590 }, { "epoch": 0.8293477552616146, "grad_norm": 1.0508545379309457, "learning_rate": 3.8696571414242546e-07, "loss": 0.1929, "step": 1591 }, { "epoch": 0.8298690297778067, "grad_norm": 0.9910207028835948, "learning_rate": 3.846651289815856e-07, "loss": 0.1699, "step": 1592 }, { "epoch": 0.8303903042939988, "grad_norm": 0.970422630360107, "learning_rate": 3.823708328269232e-07, "loss": 0.183, "step": 1593 }, { "epoch": 0.830911578810191, "grad_norm": 1.0433065768511562, "learning_rate": 3.800828324995043e-07, "loss": 0.1776, "step": 1594 }, { "epoch": 0.8314328533263831, "grad_norm": 0.9840284379510608, "learning_rate": 3.7780113480167967e-07, "loss": 0.1698, "step": 1595 }, { "epoch": 0.831954127842575, "grad_norm": 1.0584347815355786, "learning_rate": 3.7552574651706253e-07, "loss": 0.187, "step": 1596 }, { "epoch": 0.8324754023587672, "grad_norm": 1.0497713676498555, "learning_rate": 3.7325667441050457e-07, "loss": 0.189, "step": 1597 }, { "epoch": 0.8329966768749593, "grad_norm": 0.9679001486261071, "learning_rate": 3.7099392522808253e-07, "loss": 0.1814, "step": 1598 }, { "epoch": 0.8335179513911514, "grad_norm": 0.969875902904243, "learning_rate": 3.6873750569707297e-07, "loss": 0.1837, "step": 1599 }, { "epoch": 0.8340392259073435, "grad_norm": 1.000730042756625, "learning_rate": 3.664874225259338e-07, "loss": 0.1926, "step": 1600 }, { "epoch": 0.8345605004235356, "grad_norm": 0.9799204124672941, "learning_rate": 3.6424368240428666e-07, "loss": 0.1805, "step": 1601 }, { "epoch": 0.8350817749397277, "grad_norm": 0.9720793205092597, "learning_rate": 3.62006292002893e-07, "loss": 0.1822, "step": 1602 }, { "epoch": 0.8356030494559197, "grad_norm": 0.9414624324828083, "learning_rate": 3.597752579736363e-07, "loss": 0.1773, "step": 1603 }, { "epoch": 0.8361243239721118, "grad_norm": 0.9704325170983187, "learning_rate": 3.57550586949503e-07, "loss": 0.1773, "step": 1604 }, { "epoch": 0.8366455984883039, "grad_norm": 1.0278784833228696, "learning_rate": 3.5533228554456095e-07, "loss": 0.1749, "step": 1605 }, { "epoch": 0.837166873004496, "grad_norm": 0.9698076444384477, "learning_rate": 3.5312036035394263e-07, "loss": 0.1668, "step": 1606 }, { "epoch": 0.8376881475206881, "grad_norm": 0.9865415755933876, "learning_rate": 3.5091481795382216e-07, "loss": 0.1908, "step": 1607 }, { "epoch": 0.8382094220368802, "grad_norm": 0.9841444286679596, "learning_rate": 3.487156649013973e-07, "loss": 0.175, "step": 1608 }, { "epoch": 0.8387306965530723, "grad_norm": 1.0613086922617954, "learning_rate": 3.4652290773487003e-07, "loss": 0.2001, "step": 1609 }, { "epoch": 0.8392519710692643, "grad_norm": 0.9987120923297828, "learning_rate": 3.44336552973428e-07, "loss": 0.1861, "step": 1610 }, { "epoch": 0.8397732455854564, "grad_norm": 0.9957523602327897, "learning_rate": 3.4215660711722276e-07, "loss": 0.1882, "step": 1611 }, { "epoch": 0.8402945201016485, "grad_norm": 1.0417753308685644, "learning_rate": 3.399830766473525e-07, "loss": 0.2053, "step": 1612 }, { "epoch": 0.8408157946178406, "grad_norm": 0.994487304090861, "learning_rate": 3.378159680258428e-07, "loss": 0.1869, "step": 1613 }, { "epoch": 0.8413370691340327, "grad_norm": 0.986003967741045, "learning_rate": 3.356552876956257e-07, "loss": 0.1716, "step": 1614 }, { "epoch": 0.8418583436502248, "grad_norm": 0.9510075016608932, "learning_rate": 3.3350104208052153e-07, "loss": 0.1794, "step": 1615 }, { "epoch": 0.8423796181664169, "grad_norm": 1.0680380304167463, "learning_rate": 3.3135323758522047e-07, "loss": 0.179, "step": 1616 }, { "epoch": 0.8429008926826089, "grad_norm": 0.9344897888121284, "learning_rate": 3.292118805952621e-07, "loss": 0.1666, "step": 1617 }, { "epoch": 0.843422167198801, "grad_norm": 0.9995816237383274, "learning_rate": 3.2707697747701913e-07, "loss": 0.1817, "step": 1618 }, { "epoch": 0.8439434417149931, "grad_norm": 0.9527368112369238, "learning_rate": 3.249485345776743e-07, "loss": 0.1818, "step": 1619 }, { "epoch": 0.8444647162311852, "grad_norm": 0.9563185247038841, "learning_rate": 3.228265582252041e-07, "loss": 0.1842, "step": 1620 }, { "epoch": 0.8449859907473773, "grad_norm": 0.9676686730390603, "learning_rate": 3.2071105472836207e-07, "loss": 0.1842, "step": 1621 }, { "epoch": 0.8455072652635695, "grad_norm": 0.945759746107448, "learning_rate": 3.186020303766532e-07, "loss": 0.1678, "step": 1622 }, { "epoch": 0.8460285397797616, "grad_norm": 0.9897130171138304, "learning_rate": 3.164994914403241e-07, "loss": 0.1692, "step": 1623 }, { "epoch": 0.8465498142959537, "grad_norm": 0.9858965780032267, "learning_rate": 3.1440344417033754e-07, "loss": 0.1798, "step": 1624 }, { "epoch": 0.8470710888121457, "grad_norm": 1.006916832075123, "learning_rate": 3.123138947983556e-07, "loss": 0.1724, "step": 1625 }, { "epoch": 0.8475923633283378, "grad_norm": 0.9659860247439446, "learning_rate": 3.102308495367243e-07, "loss": 0.173, "step": 1626 }, { "epoch": 0.8481136378445299, "grad_norm": 0.9804528945179373, "learning_rate": 3.081543145784499e-07, "loss": 0.1792, "step": 1627 }, { "epoch": 0.848634912360722, "grad_norm": 1.0081587481145602, "learning_rate": 3.060842960971844e-07, "loss": 0.1919, "step": 1628 }, { "epoch": 0.8491561868769141, "grad_norm": 1.0093772965493921, "learning_rate": 3.0402080024720724e-07, "loss": 0.1862, "step": 1629 }, { "epoch": 0.8496774613931062, "grad_norm": 0.9873162646732845, "learning_rate": 3.0196383316340226e-07, "loss": 0.1857, "step": 1630 }, { "epoch": 0.8501987359092983, "grad_norm": 0.9265922477417696, "learning_rate": 2.9991340096124644e-07, "loss": 0.1734, "step": 1631 }, { "epoch": 0.8507200104254903, "grad_norm": 1.0215134877772702, "learning_rate": 2.978695097367862e-07, "loss": 0.2025, "step": 1632 }, { "epoch": 0.8512412849416824, "grad_norm": 0.9613814433527633, "learning_rate": 2.958321655666219e-07, "loss": 0.1762, "step": 1633 }, { "epoch": 0.8517625594578745, "grad_norm": 0.9939851036657399, "learning_rate": 2.938013745078899e-07, "loss": 0.1817, "step": 1634 }, { "epoch": 0.8522838339740666, "grad_norm": 0.9361338440124887, "learning_rate": 2.917771425982413e-07, "loss": 0.1582, "step": 1635 }, { "epoch": 0.8528051084902587, "grad_norm": 0.9335310987532818, "learning_rate": 2.897594758558298e-07, "loss": 0.1726, "step": 1636 }, { "epoch": 0.8533263830064508, "grad_norm": 0.9993168548254366, "learning_rate": 2.8774838027928854e-07, "loss": 0.1754, "step": 1637 }, { "epoch": 0.8538476575226429, "grad_norm": 0.9662051256266582, "learning_rate": 2.857438618477135e-07, "loss": 0.1734, "step": 1638 }, { "epoch": 0.8543689320388349, "grad_norm": 0.9798016098663039, "learning_rate": 2.837459265206491e-07, "loss": 0.1781, "step": 1639 }, { "epoch": 0.854890206555027, "grad_norm": 1.083203468022416, "learning_rate": 2.817545802380653e-07, "loss": 0.2084, "step": 1640 }, { "epoch": 0.8554114810712191, "grad_norm": 0.9702767949680986, "learning_rate": 2.797698289203432e-07, "loss": 0.1814, "step": 1641 }, { "epoch": 0.8559327555874112, "grad_norm": 0.9864042489800868, "learning_rate": 2.7779167846825797e-07, "loss": 0.1742, "step": 1642 }, { "epoch": 0.8564540301036033, "grad_norm": 0.9525936742900207, "learning_rate": 2.7582013476295826e-07, "loss": 0.1826, "step": 1643 }, { "epoch": 0.8569753046197954, "grad_norm": 0.975678936997716, "learning_rate": 2.738552036659517e-07, "loss": 0.1838, "step": 1644 }, { "epoch": 0.8574965791359875, "grad_norm": 0.9472599686570292, "learning_rate": 2.718968910190853e-07, "loss": 0.1674, "step": 1645 }, { "epoch": 0.8580178536521795, "grad_norm": 0.959904368853958, "learning_rate": 2.6994520264452957e-07, "loss": 0.1753, "step": 1646 }, { "epoch": 0.8585391281683716, "grad_norm": 0.9094487422267438, "learning_rate": 2.6800014434476137e-07, "loss": 0.1621, "step": 1647 }, { "epoch": 0.8590604026845637, "grad_norm": 0.9597770843306208, "learning_rate": 2.6606172190254467e-07, "loss": 0.1722, "step": 1648 }, { "epoch": 0.8595816772007558, "grad_norm": 1.0318500304093965, "learning_rate": 2.6412994108091586e-07, "loss": 0.1856, "step": 1649 }, { "epoch": 0.860102951716948, "grad_norm": 1.0407206119375378, "learning_rate": 2.6220480762316396e-07, "loss": 0.1905, "step": 1650 }, { "epoch": 0.86062422623314, "grad_norm": 0.9806484699231431, "learning_rate": 2.6028632725281723e-07, "loss": 0.192, "step": 1651 }, { "epoch": 0.8611455007493322, "grad_norm": 1.0021777700354728, "learning_rate": 2.58374505673622e-07, "loss": 0.2076, "step": 1652 }, { "epoch": 0.8616667752655242, "grad_norm": 1.0056279393326901, "learning_rate": 2.56469348569528e-07, "loss": 0.1964, "step": 1653 }, { "epoch": 0.8621880497817163, "grad_norm": 0.9696793333836914, "learning_rate": 2.5457086160467187e-07, "loss": 0.1753, "step": 1654 }, { "epoch": 0.8627093242979084, "grad_norm": 0.9772770309895201, "learning_rate": 2.526790504233592e-07, "loss": 0.176, "step": 1655 }, { "epoch": 0.8632305988141005, "grad_norm": 0.9821643376455257, "learning_rate": 2.507939206500479e-07, "loss": 0.1791, "step": 1656 }, { "epoch": 0.8637518733302926, "grad_norm": 0.9905289226333893, "learning_rate": 2.4891547788933185e-07, "loss": 0.1931, "step": 1657 }, { "epoch": 0.8642731478464847, "grad_norm": 0.96401523470504, "learning_rate": 2.4704372772592326e-07, "loss": 0.1797, "step": 1658 }, { "epoch": 0.8647944223626768, "grad_norm": 0.9499449664889251, "learning_rate": 2.4517867572463884e-07, "loss": 0.1743, "step": 1659 }, { "epoch": 0.8653156968788689, "grad_norm": 0.9850999754121548, "learning_rate": 2.433203274303794e-07, "loss": 0.1689, "step": 1660 }, { "epoch": 0.8658369713950609, "grad_norm": 1.01772409958344, "learning_rate": 2.414686883681158e-07, "loss": 0.1753, "step": 1661 }, { "epoch": 0.866358245911253, "grad_norm": 0.9691772600585687, "learning_rate": 2.3962376404287365e-07, "loss": 0.1825, "step": 1662 }, { "epoch": 0.8668795204274451, "grad_norm": 1.0124591744101696, "learning_rate": 2.3778555993971164e-07, "loss": 0.1866, "step": 1663 }, { "epoch": 0.8674007949436372, "grad_norm": 0.9965123836345188, "learning_rate": 2.359540815237124e-07, "loss": 0.1811, "step": 1664 }, { "epoch": 0.8679220694598293, "grad_norm": 1.0446021288366747, "learning_rate": 2.3412933423996114e-07, "loss": 0.1909, "step": 1665 }, { "epoch": 0.8684433439760214, "grad_norm": 0.9880596622848659, "learning_rate": 2.3231132351353075e-07, "loss": 0.1809, "step": 1666 }, { "epoch": 0.8689646184922135, "grad_norm": 1.014446771053952, "learning_rate": 2.305000547494679e-07, "loss": 0.1878, "step": 1667 }, { "epoch": 0.8694858930084055, "grad_norm": 1.0142208291598611, "learning_rate": 2.2869553333277145e-07, "loss": 0.1852, "step": 1668 }, { "epoch": 0.8700071675245976, "grad_norm": 0.9751837375695704, "learning_rate": 2.2689776462838348e-07, "loss": 0.1733, "step": 1669 }, { "epoch": 0.8705284420407897, "grad_norm": 0.9939676929319553, "learning_rate": 2.251067539811691e-07, "loss": 0.1897, "step": 1670 }, { "epoch": 0.8710497165569818, "grad_norm": 0.9773698978757089, "learning_rate": 2.233225067158995e-07, "loss": 0.1735, "step": 1671 }, { "epoch": 0.8715709910731739, "grad_norm": 0.9841460047764211, "learning_rate": 2.2154502813724026e-07, "loss": 0.1938, "step": 1672 }, { "epoch": 0.872092265589366, "grad_norm": 1.02614898643778, "learning_rate": 2.1977432352973226e-07, "loss": 0.1958, "step": 1673 }, { "epoch": 0.8726135401055581, "grad_norm": 0.9629274090579781, "learning_rate": 2.1801039815777637e-07, "loss": 0.184, "step": 1674 }, { "epoch": 0.8731348146217501, "grad_norm": 0.9458912450222245, "learning_rate": 2.1625325726562009e-07, "loss": 0.1737, "step": 1675 }, { "epoch": 0.8736560891379422, "grad_norm": 0.9854272942394587, "learning_rate": 2.1450290607733903e-07, "loss": 0.1892, "step": 1676 }, { "epoch": 0.8741773636541343, "grad_norm": 0.9170439224566707, "learning_rate": 2.127593497968225e-07, "loss": 0.1625, "step": 1677 }, { "epoch": 0.8746986381703264, "grad_norm": 0.9912800697939179, "learning_rate": 2.1102259360775895e-07, "loss": 0.1869, "step": 1678 }, { "epoch": 0.8752199126865186, "grad_norm": 1.0010841890382143, "learning_rate": 2.0929264267361888e-07, "loss": 0.1876, "step": 1679 }, { "epoch": 0.8757411872027107, "grad_norm": 0.982385353388596, "learning_rate": 2.07569502137642e-07, "loss": 0.1662, "step": 1680 }, { "epoch": 0.8762624617189028, "grad_norm": 1.0012670470341332, "learning_rate": 2.0585317712281866e-07, "loss": 0.1799, "step": 1681 }, { "epoch": 0.8767837362350948, "grad_norm": 0.9876128950810698, "learning_rate": 2.0414367273187713e-07, "loss": 0.1897, "step": 1682 }, { "epoch": 0.8773050107512869, "grad_norm": 1.0244502518557284, "learning_rate": 2.0244099404726686e-07, "loss": 0.1916, "step": 1683 }, { "epoch": 0.877826285267479, "grad_norm": 0.9310013268689511, "learning_rate": 2.007451461311455e-07, "loss": 0.1784, "step": 1684 }, { "epoch": 0.8783475597836711, "grad_norm": 1.0023999616194614, "learning_rate": 1.990561340253608e-07, "loss": 0.1858, "step": 1685 }, { "epoch": 0.8788688342998632, "grad_norm": 1.0212257693639213, "learning_rate": 1.9737396275143816e-07, "loss": 0.1876, "step": 1686 }, { "epoch": 0.8793901088160553, "grad_norm": 0.9863311854678503, "learning_rate": 1.9569863731056544e-07, "loss": 0.1865, "step": 1687 }, { "epoch": 0.8799113833322474, "grad_norm": 1.0293010657590143, "learning_rate": 1.940301626835761e-07, "loss": 0.1725, "step": 1688 }, { "epoch": 0.8804326578484394, "grad_norm": 0.9900962579192577, "learning_rate": 1.9236854383093668e-07, "loss": 0.1896, "step": 1689 }, { "epoch": 0.8809539323646315, "grad_norm": 0.990946654309898, "learning_rate": 1.9071378569273047e-07, "loss": 0.1713, "step": 1690 }, { "epoch": 0.8814752068808236, "grad_norm": 0.9571615778995071, "learning_rate": 1.8906589318864384e-07, "loss": 0.1791, "step": 1691 }, { "epoch": 0.8819964813970157, "grad_norm": 1.0306381407381788, "learning_rate": 1.8742487121795188e-07, "loss": 0.1839, "step": 1692 }, { "epoch": 0.8825177559132078, "grad_norm": 0.9923315907519844, "learning_rate": 1.8579072465950197e-07, "loss": 0.1829, "step": 1693 }, { "epoch": 0.8830390304293999, "grad_norm": 1.0142772500987272, "learning_rate": 1.8416345837170114e-07, "loss": 0.1887, "step": 1694 }, { "epoch": 0.883560304945592, "grad_norm": 1.0235689781648594, "learning_rate": 1.8254307719250214e-07, "loss": 0.1876, "step": 1695 }, { "epoch": 0.8840815794617841, "grad_norm": 1.0182076394574577, "learning_rate": 1.8092958593938476e-07, "loss": 0.178, "step": 1696 }, { "epoch": 0.8846028539779761, "grad_norm": 0.9805280925563719, "learning_rate": 1.793229894093479e-07, "loss": 0.1771, "step": 1697 }, { "epoch": 0.8851241284941682, "grad_norm": 0.9615291181545174, "learning_rate": 1.7772329237889158e-07, "loss": 0.1782, "step": 1698 }, { "epoch": 0.8856454030103603, "grad_norm": 0.9507665938359553, "learning_rate": 1.7613049960400057e-07, "loss": 0.1761, "step": 1699 }, { "epoch": 0.8861666775265524, "grad_norm": 0.9922955728775525, "learning_rate": 1.7454461582013665e-07, "loss": 0.1656, "step": 1700 }, { "epoch": 0.8866879520427445, "grad_norm": 0.9994874120091833, "learning_rate": 1.7296564574221796e-07, "loss": 0.1829, "step": 1701 }, { "epoch": 0.8872092265589366, "grad_norm": 1.0136266446019506, "learning_rate": 1.7139359406460888e-07, "loss": 0.174, "step": 1702 }, { "epoch": 0.8877305010751287, "grad_norm": 0.9290349996522272, "learning_rate": 1.698284654611057e-07, "loss": 0.1709, "step": 1703 }, { "epoch": 0.8882517755913207, "grad_norm": 0.9395954285051222, "learning_rate": 1.6827026458492018e-07, "loss": 0.1808, "step": 1704 }, { "epoch": 0.8887730501075128, "grad_norm": 0.9943901878207128, "learning_rate": 1.6671899606866966e-07, "loss": 0.1835, "step": 1705 }, { "epoch": 0.889294324623705, "grad_norm": 0.967900298914638, "learning_rate": 1.651746645243596e-07, "loss": 0.1679, "step": 1706 }, { "epoch": 0.889815599139897, "grad_norm": 0.9619624947520601, "learning_rate": 1.6363727454337202e-07, "loss": 0.1767, "step": 1707 }, { "epoch": 0.8903368736560892, "grad_norm": 1.0538594805661374, "learning_rate": 1.6210683069645212e-07, "loss": 0.1799, "step": 1708 }, { "epoch": 0.8908581481722813, "grad_norm": 0.9162046996093606, "learning_rate": 1.6058333753369265e-07, "loss": 0.1651, "step": 1709 }, { "epoch": 0.8913794226884734, "grad_norm": 1.0302723932161726, "learning_rate": 1.5906679958452215e-07, "loss": 0.1893, "step": 1710 }, { "epoch": 0.8919006972046654, "grad_norm": 0.9303753947237247, "learning_rate": 1.5755722135769036e-07, "loss": 0.178, "step": 1711 }, { "epoch": 0.8924219717208575, "grad_norm": 0.9865120547157974, "learning_rate": 1.5605460734125582e-07, "loss": 0.1841, "step": 1712 }, { "epoch": 0.8929432462370496, "grad_norm": 0.9679394859027707, "learning_rate": 1.545589620025731e-07, "loss": 0.1772, "step": 1713 }, { "epoch": 0.8934645207532417, "grad_norm": 0.9911898955173043, "learning_rate": 1.530702897882766e-07, "loss": 0.1743, "step": 1714 }, { "epoch": 0.8939857952694338, "grad_norm": 0.9569232485846784, "learning_rate": 1.5158859512427044e-07, "loss": 0.1797, "step": 1715 }, { "epoch": 0.8945070697856259, "grad_norm": 1.03339302500057, "learning_rate": 1.501138824157139e-07, "loss": 0.1722, "step": 1716 }, { "epoch": 0.895028344301818, "grad_norm": 0.9977760722380256, "learning_rate": 1.4864615604700872e-07, "loss": 0.1839, "step": 1717 }, { "epoch": 0.89554961881801, "grad_norm": 1.0410265865550448, "learning_rate": 1.471854203817852e-07, "loss": 0.2006, "step": 1718 }, { "epoch": 0.8960708933342021, "grad_norm": 0.9508714325641775, "learning_rate": 1.4573167976288994e-07, "loss": 0.1812, "step": 1719 }, { "epoch": 0.8965921678503942, "grad_norm": 0.9661676662319102, "learning_rate": 1.4428493851237403e-07, "loss": 0.1803, "step": 1720 }, { "epoch": 0.8971134423665863, "grad_norm": 1.0269137740020284, "learning_rate": 1.4284520093147818e-07, "loss": 0.1837, "step": 1721 }, { "epoch": 0.8976347168827784, "grad_norm": 1.0052857327761444, "learning_rate": 1.4141247130062069e-07, "loss": 0.1856, "step": 1722 }, { "epoch": 0.8981559913989705, "grad_norm": 0.9634382762365437, "learning_rate": 1.3998675387938476e-07, "loss": 0.1864, "step": 1723 }, { "epoch": 0.8986772659151626, "grad_norm": 0.8783811648251456, "learning_rate": 1.3856805290650644e-07, "loss": 0.1592, "step": 1724 }, { "epoch": 0.8991985404313546, "grad_norm": 0.9966480441661861, "learning_rate": 1.3715637259986147e-07, "loss": 0.1749, "step": 1725 }, { "epoch": 0.8997198149475467, "grad_norm": 1.0294818185014007, "learning_rate": 1.3575171715645235e-07, "loss": 0.1798, "step": 1726 }, { "epoch": 0.9002410894637388, "grad_norm": 0.9459331203996059, "learning_rate": 1.343540907523963e-07, "loss": 0.1691, "step": 1727 }, { "epoch": 0.9007623639799309, "grad_norm": 0.9905884624653489, "learning_rate": 1.3296349754291366e-07, "loss": 0.1848, "step": 1728 }, { "epoch": 0.901283638496123, "grad_norm": 0.9777004946205554, "learning_rate": 1.3157994166231342e-07, "loss": 0.1821, "step": 1729 }, { "epoch": 0.9018049130123151, "grad_norm": 0.9862503242538797, "learning_rate": 1.3020342722398273e-07, "loss": 0.179, "step": 1730 }, { "epoch": 0.9023261875285072, "grad_norm": 0.9425193881670688, "learning_rate": 1.2883395832037516e-07, "loss": 0.1847, "step": 1731 }, { "epoch": 0.9028474620446993, "grad_norm": 0.9716954562376383, "learning_rate": 1.2747153902299524e-07, "loss": 0.1866, "step": 1732 }, { "epoch": 0.9033687365608913, "grad_norm": 1.0551193782368478, "learning_rate": 1.2611617338239097e-07, "loss": 0.1985, "step": 1733 }, { "epoch": 0.9038900110770834, "grad_norm": 0.9661387222553628, "learning_rate": 1.247678654281384e-07, "loss": 0.1728, "step": 1734 }, { "epoch": 0.9044112855932755, "grad_norm": 0.9355653605767951, "learning_rate": 1.234266191688302e-07, "loss": 0.1624, "step": 1735 }, { "epoch": 0.9049325601094677, "grad_norm": 1.0231771733704602, "learning_rate": 1.2209243859206577e-07, "loss": 0.1828, "step": 1736 }, { "epoch": 0.9054538346256598, "grad_norm": 0.9316307065559123, "learning_rate": 1.2076532766443578e-07, "loss": 0.1718, "step": 1737 }, { "epoch": 0.9059751091418519, "grad_norm": 0.9809448672654858, "learning_rate": 1.1944529033151436e-07, "loss": 0.1793, "step": 1738 }, { "epoch": 0.906496383658044, "grad_norm": 0.929916278700842, "learning_rate": 1.1813233051784417e-07, "loss": 0.1548, "step": 1739 }, { "epoch": 0.907017658174236, "grad_norm": 0.9390839502307686, "learning_rate": 1.1682645212692634e-07, "loss": 0.1735, "step": 1740 }, { "epoch": 0.9075389326904281, "grad_norm": 0.9234037263108668, "learning_rate": 1.1552765904120966e-07, "loss": 0.1723, "step": 1741 }, { "epoch": 0.9080602072066202, "grad_norm": 0.9300393036632397, "learning_rate": 1.1423595512207591e-07, "loss": 0.1707, "step": 1742 }, { "epoch": 0.9085814817228123, "grad_norm": 0.9172461012788677, "learning_rate": 1.1295134420983179e-07, "loss": 0.1727, "step": 1743 }, { "epoch": 0.9091027562390044, "grad_norm": 0.9728819849512029, "learning_rate": 1.1167383012369609e-07, "loss": 0.1719, "step": 1744 }, { "epoch": 0.9096240307551965, "grad_norm": 0.9265239942171128, "learning_rate": 1.1040341666178733e-07, "loss": 0.1732, "step": 1745 }, { "epoch": 0.9101453052713886, "grad_norm": 0.9663796287818257, "learning_rate": 1.0914010760111476e-07, "loss": 0.1814, "step": 1746 }, { "epoch": 0.9106665797875806, "grad_norm": 0.9621454320512921, "learning_rate": 1.078839066975651e-07, "loss": 0.1755, "step": 1747 }, { "epoch": 0.9111878543037727, "grad_norm": 0.9923823819866243, "learning_rate": 1.0663481768589196e-07, "loss": 0.1845, "step": 1748 }, { "epoch": 0.9117091288199648, "grad_norm": 0.9525261178445472, "learning_rate": 1.0539284427970559e-07, "loss": 0.1709, "step": 1749 }, { "epoch": 0.9122304033361569, "grad_norm": 0.9096762584124705, "learning_rate": 1.0415799017146094e-07, "loss": 0.1746, "step": 1750 }, { "epoch": 0.912751677852349, "grad_norm": 0.9350011193890286, "learning_rate": 1.029302590324463e-07, "loss": 0.173, "step": 1751 }, { "epoch": 0.9132729523685411, "grad_norm": 0.9554852724445152, "learning_rate": 1.0170965451277382e-07, "loss": 0.1766, "step": 1752 }, { "epoch": 0.9137942268847332, "grad_norm": 0.9392313516485675, "learning_rate": 1.0049618024136703e-07, "loss": 0.1831, "step": 1753 }, { "epoch": 0.9143155014009252, "grad_norm": 0.9347402829794793, "learning_rate": 9.928983982595175e-08, "loss": 0.1649, "step": 1754 }, { "epoch": 0.9148367759171173, "grad_norm": 1.0157651703635753, "learning_rate": 9.809063685304376e-08, "loss": 0.192, "step": 1755 }, { "epoch": 0.9153580504333094, "grad_norm": 0.9877484103720534, "learning_rate": 9.689857488793891e-08, "loss": 0.1837, "step": 1756 }, { "epoch": 0.9158793249495015, "grad_norm": 0.972763311198933, "learning_rate": 9.571365747470251e-08, "loss": 0.1837, "step": 1757 }, { "epoch": 0.9164005994656936, "grad_norm": 0.9461520910270284, "learning_rate": 9.45358881361591e-08, "loss": 0.1731, "step": 1758 }, { "epoch": 0.9169218739818857, "grad_norm": 0.9798701003507027, "learning_rate": 9.336527037388132e-08, "loss": 0.1756, "step": 1759 }, { "epoch": 0.9174431484980778, "grad_norm": 0.947510382571863, "learning_rate": 9.22018076681791e-08, "loss": 0.1826, "step": 1760 }, { "epoch": 0.9179644230142698, "grad_norm": 0.9414920161240843, "learning_rate": 9.10455034780916e-08, "loss": 0.1721, "step": 1761 }, { "epoch": 0.9184856975304619, "grad_norm": 1.0507502280842818, "learning_rate": 8.989636124137363e-08, "loss": 0.1827, "step": 1762 }, { "epoch": 0.919006972046654, "grad_norm": 1.012501147988642, "learning_rate": 8.875438437448813e-08, "loss": 0.1785, "step": 1763 }, { "epoch": 0.9195282465628462, "grad_norm": 0.9943000289098797, "learning_rate": 8.761957627259509e-08, "loss": 0.1718, "step": 1764 }, { "epoch": 0.9200495210790383, "grad_norm": 0.9614370675987417, "learning_rate": 8.649194030953989e-08, "loss": 0.1744, "step": 1765 }, { "epoch": 0.9205707955952304, "grad_norm": 0.9802822351352877, "learning_rate": 8.537147983784716e-08, "loss": 0.1751, "step": 1766 }, { "epoch": 0.9210920701114225, "grad_norm": 0.987961849964845, "learning_rate": 8.425819818870667e-08, "loss": 0.1757, "step": 1767 }, { "epoch": 0.9216133446276146, "grad_norm": 0.9858588747564501, "learning_rate": 8.3152098671965e-08, "loss": 0.1854, "step": 1768 }, { "epoch": 0.9221346191438066, "grad_norm": 0.9862525017141891, "learning_rate": 8.205318457611772e-08, "loss": 0.1859, "step": 1769 }, { "epoch": 0.9226558936599987, "grad_norm": 1.0116874533509572, "learning_rate": 8.096145916829529e-08, "loss": 0.1756, "step": 1770 }, { "epoch": 0.9231771681761908, "grad_norm": 0.966563812364647, "learning_rate": 7.98769256942572e-08, "loss": 0.1679, "step": 1771 }, { "epoch": 0.9236984426923829, "grad_norm": 0.9882146281688113, "learning_rate": 7.879958737838201e-08, "loss": 0.1877, "step": 1772 }, { "epoch": 0.924219717208575, "grad_norm": 0.9573779183902307, "learning_rate": 7.77294474236534e-08, "loss": 0.1773, "step": 1773 }, { "epoch": 0.9247409917247671, "grad_norm": 1.0125029487857253, "learning_rate": 7.66665090116575e-08, "loss": 0.1889, "step": 1774 }, { "epoch": 0.9252622662409592, "grad_norm": 0.9814908494118568, "learning_rate": 7.561077530256755e-08, "loss": 0.1837, "step": 1775 }, { "epoch": 0.9257835407571512, "grad_norm": 1.0057087926289285, "learning_rate": 7.456224943513779e-08, "loss": 0.1952, "step": 1776 }, { "epoch": 0.9263048152733433, "grad_norm": 0.9491641116301497, "learning_rate": 7.352093452669324e-08, "loss": 0.1787, "step": 1777 }, { "epoch": 0.9268260897895354, "grad_norm": 0.9733883352714918, "learning_rate": 7.248683367311937e-08, "loss": 0.1841, "step": 1778 }, { "epoch": 0.9273473643057275, "grad_norm": 1.0650855434205495, "learning_rate": 7.145994994885497e-08, "loss": 0.1821, "step": 1779 }, { "epoch": 0.9278686388219196, "grad_norm": 0.9395412545960169, "learning_rate": 7.044028640688122e-08, "loss": 0.1768, "step": 1780 }, { "epoch": 0.9283899133381117, "grad_norm": 0.939290034198391, "learning_rate": 6.942784607871373e-08, "loss": 0.1759, "step": 1781 }, { "epoch": 0.9289111878543038, "grad_norm": 0.975299666327165, "learning_rate": 6.842263197439303e-08, "loss": 0.1776, "step": 1782 }, { "epoch": 0.9294324623704958, "grad_norm": 0.9578243465235713, "learning_rate": 6.742464708247548e-08, "loss": 0.1889, "step": 1783 }, { "epoch": 0.9299537368866879, "grad_norm": 0.976760719372984, "learning_rate": 6.643389437002462e-08, "loss": 0.1671, "step": 1784 }, { "epoch": 0.93047501140288, "grad_norm": 0.9681019698776531, "learning_rate": 6.545037678260257e-08, "loss": 0.1823, "step": 1785 }, { "epoch": 0.9309962859190721, "grad_norm": 0.9418208808797467, "learning_rate": 6.447409724426063e-08, "loss": 0.1688, "step": 1786 }, { "epoch": 0.9315175604352642, "grad_norm": 0.9542619947417413, "learning_rate": 6.350505865753142e-08, "loss": 0.1722, "step": 1787 }, { "epoch": 0.9320388349514563, "grad_norm": 0.9039000863194366, "learning_rate": 6.254326390341958e-08, "loss": 0.1577, "step": 1788 }, { "epoch": 0.9325601094676484, "grad_norm": 1.0115354290432894, "learning_rate": 6.158871584139303e-08, "loss": 0.1828, "step": 1789 }, { "epoch": 0.9330813839838404, "grad_norm": 0.9691949292307307, "learning_rate": 6.06414173093764e-08, "loss": 0.1841, "step": 1790 }, { "epoch": 0.9336026585000325, "grad_norm": 0.9779034851126065, "learning_rate": 5.970137112373903e-08, "loss": 0.1789, "step": 1791 }, { "epoch": 0.9341239330162247, "grad_norm": 1.0407528483833595, "learning_rate": 5.876858007929004e-08, "loss": 0.1953, "step": 1792 }, { "epoch": 0.9346452075324168, "grad_norm": 0.9502877127875533, "learning_rate": 5.784304694926773e-08, "loss": 0.1655, "step": 1793 }, { "epoch": 0.9351664820486089, "grad_norm": 0.9913769974126586, "learning_rate": 5.692477448533351e-08, "loss": 0.1764, "step": 1794 }, { "epoch": 0.935687756564801, "grad_norm": 0.9549107760173099, "learning_rate": 5.601376541756076e-08, "loss": 0.1748, "step": 1795 }, { "epoch": 0.9362090310809931, "grad_norm": 0.929469099400491, "learning_rate": 5.511002245442987e-08, "loss": 0.1738, "step": 1796 }, { "epoch": 0.9367303055971851, "grad_norm": 0.9376408086550527, "learning_rate": 5.4213548282817664e-08, "loss": 0.1787, "step": 1797 }, { "epoch": 0.9372515801133772, "grad_norm": 0.9752712888369567, "learning_rate": 5.3324345567990485e-08, "loss": 0.1811, "step": 1798 }, { "epoch": 0.9377728546295693, "grad_norm": 0.9339109536282504, "learning_rate": 5.244241695359753e-08, "loss": 0.177, "step": 1799 }, { "epoch": 0.9382941291457614, "grad_norm": 0.9362640052206154, "learning_rate": 5.156776506166028e-08, "loss": 0.1651, "step": 1800 }, { "epoch": 0.9388154036619535, "grad_norm": 1.023708920025889, "learning_rate": 5.070039249256642e-08, "loss": 0.2017, "step": 1801 }, { "epoch": 0.9393366781781456, "grad_norm": 1.0157857758661593, "learning_rate": 4.984030182506233e-08, "loss": 0.1755, "step": 1802 }, { "epoch": 0.9398579526943377, "grad_norm": 0.9717374919195858, "learning_rate": 4.89874956162445e-08, "loss": 0.183, "step": 1803 }, { "epoch": 0.9403792272105298, "grad_norm": 0.9900306970894935, "learning_rate": 4.814197640155199e-08, "loss": 0.1795, "step": 1804 }, { "epoch": 0.9409005017267218, "grad_norm": 0.967636437974983, "learning_rate": 4.73037466947604e-08, "loss": 0.1775, "step": 1805 }, { "epoch": 0.9414217762429139, "grad_norm": 0.9676033944229104, "learning_rate": 4.64728089879718e-08, "loss": 0.1862, "step": 1806 }, { "epoch": 0.941943050759106, "grad_norm": 0.9423911448551001, "learning_rate": 4.564916575160977e-08, "loss": 0.1729, "step": 1807 }, { "epoch": 0.9424643252752981, "grad_norm": 0.9835107361519987, "learning_rate": 4.4832819434410535e-08, "loss": 0.1844, "step": 1808 }, { "epoch": 0.9429855997914902, "grad_norm": 0.9705696207920599, "learning_rate": 4.402377246341627e-08, "loss": 0.17, "step": 1809 }, { "epoch": 0.9435068743076823, "grad_norm": 1.011121337318435, "learning_rate": 4.322202724396818e-08, "loss": 0.1765, "step": 1810 }, { "epoch": 0.9440281488238744, "grad_norm": 0.9970207191170647, "learning_rate": 4.2427586159697886e-08, "loss": 0.1811, "step": 1811 }, { "epoch": 0.9445494233400664, "grad_norm": 0.9538788894330599, "learning_rate": 4.164045157252272e-08, "loss": 0.1796, "step": 1812 }, { "epoch": 0.9450706978562585, "grad_norm": 0.9552600819442721, "learning_rate": 4.086062582263656e-08, "loss": 0.1669, "step": 1813 }, { "epoch": 0.9455919723724506, "grad_norm": 1.0041872565936156, "learning_rate": 4.008811122850426e-08, "loss": 0.1843, "step": 1814 }, { "epoch": 0.9461132468886427, "grad_norm": 1.0088670031477684, "learning_rate": 3.9322910086853625e-08, "loss": 0.1894, "step": 1815 }, { "epoch": 0.9466345214048348, "grad_norm": 0.9739660114381572, "learning_rate": 3.856502467266987e-08, "loss": 0.1737, "step": 1816 }, { "epoch": 0.9471557959210269, "grad_norm": 0.9807421580490452, "learning_rate": 3.7814457239187255e-08, "loss": 0.1792, "step": 1817 }, { "epoch": 0.947677070437219, "grad_norm": 1.0412592625400767, "learning_rate": 3.707121001788438e-08, "loss": 0.1973, "step": 1818 }, { "epoch": 0.948198344953411, "grad_norm": 0.9606151347345824, "learning_rate": 3.633528521847507e-08, "loss": 0.1765, "step": 1819 }, { "epoch": 0.9487196194696031, "grad_norm": 0.9910087858061905, "learning_rate": 3.5606685028904686e-08, "loss": 0.1896, "step": 1820 }, { "epoch": 0.9492408939857953, "grad_norm": 1.0248547952029403, "learning_rate": 3.4885411615341034e-08, "loss": 0.1827, "step": 1821 }, { "epoch": 0.9497621685019874, "grad_norm": 0.9804471832119095, "learning_rate": 3.4171467122169344e-08, "loss": 0.1791, "step": 1822 }, { "epoch": 0.9502834430181795, "grad_norm": 0.9426961210980224, "learning_rate": 3.346485367198588e-08, "loss": 0.1821, "step": 1823 }, { "epoch": 0.9508047175343716, "grad_norm": 0.9666961026314688, "learning_rate": 3.2765573365591e-08, "loss": 0.1771, "step": 1824 }, { "epoch": 0.9513259920505637, "grad_norm": 0.9843902692560007, "learning_rate": 3.207362828198307e-08, "loss": 0.1812, "step": 1825 }, { "epoch": 0.9518472665667557, "grad_norm": 0.954584741810203, "learning_rate": 3.138902047835263e-08, "loss": 0.1781, "step": 1826 }, { "epoch": 0.9523685410829478, "grad_norm": 0.9758177238201036, "learning_rate": 3.071175199007653e-08, "loss": 0.1763, "step": 1827 }, { "epoch": 0.9528898155991399, "grad_norm": 0.9565731632841014, "learning_rate": 3.00418248307105e-08, "loss": 0.1762, "step": 1828 }, { "epoch": 0.953411090115332, "grad_norm": 0.982455562401073, "learning_rate": 2.9379240991984635e-08, "loss": 0.1769, "step": 1829 }, { "epoch": 0.9539323646315241, "grad_norm": 0.9777036199887083, "learning_rate": 2.8724002443797083e-08, "loss": 0.1694, "step": 1830 }, { "epoch": 0.9544536391477162, "grad_norm": 0.9873801352066159, "learning_rate": 2.807611113420816e-08, "loss": 0.1901, "step": 1831 }, { "epoch": 0.9549749136639083, "grad_norm": 0.9383563774784727, "learning_rate": 2.743556898943345e-08, "loss": 0.1732, "step": 1832 }, { "epoch": 0.9554961881801003, "grad_norm": 0.9941922540262297, "learning_rate": 2.680237791384044e-08, "loss": 0.1687, "step": 1833 }, { "epoch": 0.9560174626962924, "grad_norm": 0.9811859829646321, "learning_rate": 2.6176539789939947e-08, "loss": 0.1787, "step": 1834 }, { "epoch": 0.9565387372124845, "grad_norm": 0.9723634244862422, "learning_rate": 2.5558056478383887e-08, "loss": 0.1721, "step": 1835 }, { "epoch": 0.9570600117286766, "grad_norm": 0.9529231554509275, "learning_rate": 2.4946929817956376e-08, "loss": 0.1726, "step": 1836 }, { "epoch": 0.9575812862448687, "grad_norm": 0.9847804287996057, "learning_rate": 2.4343161625570433e-08, "loss": 0.1799, "step": 1837 }, { "epoch": 0.9581025607610608, "grad_norm": 0.9941204892407931, "learning_rate": 2.3746753696262113e-08, "loss": 0.1767, "step": 1838 }, { "epoch": 0.9586238352772529, "grad_norm": 0.9577619056788892, "learning_rate": 2.3157707803184438e-08, "loss": 0.1735, "step": 1839 }, { "epoch": 0.959145109793445, "grad_norm": 1.0057338275939725, "learning_rate": 2.2576025697603198e-08, "loss": 0.179, "step": 1840 }, { "epoch": 0.959666384309637, "grad_norm": 0.9547311082594001, "learning_rate": 2.2001709108891155e-08, "loss": 0.1721, "step": 1841 }, { "epoch": 0.9601876588258291, "grad_norm": 0.9520610446159212, "learning_rate": 2.143475974452275e-08, "loss": 0.1816, "step": 1842 }, { "epoch": 0.9607089333420212, "grad_norm": 0.9653141350324072, "learning_rate": 2.0875179290069934e-08, "loss": 0.1759, "step": 1843 }, { "epoch": 0.9612302078582133, "grad_norm": 0.9856672742904604, "learning_rate": 2.032296940919526e-08, "loss": 0.1792, "step": 1844 }, { "epoch": 0.9617514823744054, "grad_norm": 1.0179941740243892, "learning_rate": 1.9778131743649064e-08, "loss": 0.1823, "step": 1845 }, { "epoch": 0.9622727568905975, "grad_norm": 0.9494362255500836, "learning_rate": 1.9240667913264233e-08, "loss": 0.1639, "step": 1846 }, { "epoch": 0.9627940314067897, "grad_norm": 0.9549890993068286, "learning_rate": 1.8710579515948957e-08, "loss": 0.1877, "step": 1847 }, { "epoch": 0.9633153059229816, "grad_norm": 0.9899460282043995, "learning_rate": 1.8187868127685914e-08, "loss": 0.1812, "step": 1848 }, { "epoch": 0.9638365804391738, "grad_norm": 0.9220303796166097, "learning_rate": 1.767253530252422e-08, "loss": 0.165, "step": 1849 }, { "epoch": 0.9643578549553659, "grad_norm": 0.9530499549516781, "learning_rate": 1.716458257257636e-08, "loss": 0.1846, "step": 1850 }, { "epoch": 0.964879129471558, "grad_norm": 0.9523111335894338, "learning_rate": 1.666401144801405e-08, "loss": 0.1845, "step": 1851 }, { "epoch": 0.9654004039877501, "grad_norm": 0.9562990194496472, "learning_rate": 1.6170823417062386e-08, "loss": 0.1809, "step": 1852 }, { "epoch": 0.9659216785039422, "grad_norm": 0.9631481610629716, "learning_rate": 1.568501994599625e-08, "loss": 0.1865, "step": 1853 }, { "epoch": 0.9664429530201343, "grad_norm": 1.0239927293856008, "learning_rate": 1.5206602479135857e-08, "loss": 0.1912, "step": 1854 }, { "epoch": 0.9669642275363263, "grad_norm": 0.9467044982817666, "learning_rate": 1.4735572438842605e-08, "loss": 0.1788, "step": 1855 }, { "epoch": 0.9674855020525184, "grad_norm": 0.9330536711923619, "learning_rate": 1.4271931225514624e-08, "loss": 0.1653, "step": 1856 }, { "epoch": 0.9680067765687105, "grad_norm": 0.9853895262009704, "learning_rate": 1.381568021758234e-08, "loss": 0.1838, "step": 1857 }, { "epoch": 0.9685280510849026, "grad_norm": 0.9829637868018, "learning_rate": 1.336682077150514e-08, "loss": 0.1801, "step": 1858 }, { "epoch": 0.9690493256010947, "grad_norm": 1.019613063122009, "learning_rate": 1.2925354221766661e-08, "loss": 0.1944, "step": 1859 }, { "epoch": 0.9695706001172868, "grad_norm": 0.9769879106201981, "learning_rate": 1.2491281880871175e-08, "loss": 0.1866, "step": 1860 }, { "epoch": 0.9700918746334789, "grad_norm": 0.9529620322406079, "learning_rate": 1.206460503933915e-08, "loss": 0.1726, "step": 1861 }, { "epoch": 0.9706131491496709, "grad_norm": 0.9871500196046391, "learning_rate": 1.1645324965704473e-08, "loss": 0.1764, "step": 1862 }, { "epoch": 0.971134423665863, "grad_norm": 0.9920422198495212, "learning_rate": 1.1233442906509462e-08, "loss": 0.1856, "step": 1863 }, { "epoch": 0.9716556981820551, "grad_norm": 0.9926789149113099, "learning_rate": 1.0828960086302075e-08, "loss": 0.1735, "step": 1864 }, { "epoch": 0.9721769726982472, "grad_norm": 1.0341066215790733, "learning_rate": 1.0431877707632043e-08, "loss": 0.1865, "step": 1865 }, { "epoch": 0.9726982472144393, "grad_norm": 0.9854581872034558, "learning_rate": 1.0042196951046968e-08, "loss": 0.1834, "step": 1866 }, { "epoch": 0.9732195217306314, "grad_norm": 1.1297948530399125, "learning_rate": 9.659918975088444e-09, "loss": 0.1955, "step": 1867 }, { "epoch": 0.9737407962468235, "grad_norm": 1.0247512856595942, "learning_rate": 9.285044916290675e-09, "loss": 0.1891, "step": 1868 }, { "epoch": 0.9742620707630155, "grad_norm": 1.004117650985569, "learning_rate": 8.91757588917408e-09, "loss": 0.1802, "step": 1869 }, { "epoch": 0.9747833452792076, "grad_norm": 0.9811005564124128, "learning_rate": 8.557512986244464e-09, "loss": 0.1824, "step": 1870 }, { "epoch": 0.9753046197953997, "grad_norm": 1.0235065678803086, "learning_rate": 8.20485727798831e-09, "loss": 0.1766, "step": 1871 }, { "epoch": 0.9758258943115918, "grad_norm": 0.9684314864439314, "learning_rate": 7.85960981287026e-09, "loss": 0.173, "step": 1872 }, { "epoch": 0.9763471688277839, "grad_norm": 1.0244425663418408, "learning_rate": 7.521771617330365e-09, "loss": 0.1688, "step": 1873 }, { "epoch": 0.976868443343976, "grad_norm": 1.0859697598329392, "learning_rate": 7.191343695779618e-09, "loss": 0.1881, "step": 1874 }, { "epoch": 0.9773897178601682, "grad_norm": 1.0027582554431416, "learning_rate": 6.86832703059831e-09, "loss": 0.1907, "step": 1875 }, { "epoch": 0.9779109923763603, "grad_norm": 0.966843277209175, "learning_rate": 6.552722582132687e-09, "loss": 0.1774, "step": 1876 }, { "epoch": 0.9784322668925522, "grad_norm": 0.9805017694840367, "learning_rate": 6.24453128869218e-09, "loss": 0.1806, "step": 1877 }, { "epoch": 0.9789535414087444, "grad_norm": 0.9485250193876247, "learning_rate": 5.94375406654607e-09, "loss": 0.1768, "step": 1878 }, { "epoch": 0.9794748159249365, "grad_norm": 0.9889744757779998, "learning_rate": 5.650391809922107e-09, "loss": 0.1861, "step": 1879 }, { "epoch": 0.9799960904411286, "grad_norm": 0.9203513698825401, "learning_rate": 5.364445391001228e-09, "loss": 0.1728, "step": 1880 }, { "epoch": 0.9805173649573207, "grad_norm": 0.9577200856228432, "learning_rate": 5.085915659918672e-09, "loss": 0.1743, "step": 1881 }, { "epoch": 0.9810386394735128, "grad_norm": 1.0558998893036378, "learning_rate": 4.814803444758431e-09, "loss": 0.1763, "step": 1882 }, { "epoch": 0.9815599139897049, "grad_norm": 1.030845679885158, "learning_rate": 4.551109551551581e-09, "loss": 0.1911, "step": 1883 }, { "epoch": 0.9820811885058969, "grad_norm": 1.0383646331449794, "learning_rate": 4.294834764274614e-09, "loss": 0.175, "step": 1884 }, { "epoch": 0.982602463022089, "grad_norm": 0.9543632616013837, "learning_rate": 4.04597984484667e-09, "loss": 0.1782, "step": 1885 }, { "epoch": 0.9831237375382811, "grad_norm": 0.9717416279826157, "learning_rate": 3.804545533126758e-09, "loss": 0.183, "step": 1886 }, { "epoch": 0.9836450120544732, "grad_norm": 0.9729548931314076, "learning_rate": 3.5705325469123644e-09, "loss": 0.1711, "step": 1887 }, { "epoch": 0.9841662865706653, "grad_norm": 0.9930751786965978, "learning_rate": 3.3439415819369604e-09, "loss": 0.1839, "step": 1888 }, { "epoch": 0.9846875610868574, "grad_norm": 0.9311441519535106, "learning_rate": 3.124773311867779e-09, "loss": 0.1733, "step": 1889 }, { "epoch": 0.9852088356030495, "grad_norm": 0.9076890202701972, "learning_rate": 2.9130283883044285e-09, "loss": 0.1617, "step": 1890 }, { "epoch": 0.9857301101192415, "grad_norm": 0.95940450023715, "learning_rate": 2.70870744077556e-09, "loss": 0.1814, "step": 1891 }, { "epoch": 0.9862513846354336, "grad_norm": 0.9828810600577166, "learning_rate": 2.5118110767388682e-09, "loss": 0.1815, "step": 1892 }, { "epoch": 0.9867726591516257, "grad_norm": 0.9701859016924141, "learning_rate": 2.322339881577762e-09, "loss": 0.1867, "step": 1893 }, { "epoch": 0.9872939336678178, "grad_norm": 0.9454338957561875, "learning_rate": 2.140294418600808e-09, "loss": 0.1683, "step": 1894 }, { "epoch": 0.9878152081840099, "grad_norm": 0.9976385720375264, "learning_rate": 1.965675229038677e-09, "loss": 0.1847, "step": 1895 }, { "epoch": 0.988336482700202, "grad_norm": 0.9459984251155135, "learning_rate": 1.7984828320444236e-09, "loss": 0.1759, "step": 1896 }, { "epoch": 0.9888577572163941, "grad_norm": 0.9943708549205489, "learning_rate": 1.6387177246893205e-09, "loss": 0.1794, "step": 1897 }, { "epoch": 0.9893790317325861, "grad_norm": 0.955276488511367, "learning_rate": 1.486380381964525e-09, "loss": 0.1804, "step": 1898 }, { "epoch": 0.9899003062487782, "grad_norm": 1.0166893589029946, "learning_rate": 1.3414712567769161e-09, "loss": 0.1828, "step": 1899 }, { "epoch": 0.9904215807649703, "grad_norm": 0.9808551871654355, "learning_rate": 1.2039907799490935e-09, "loss": 0.1801, "step": 1900 }, { "epoch": 0.9909428552811624, "grad_norm": 0.9382284653572882, "learning_rate": 1.0739393602185454e-09, "loss": 0.1755, "step": 1901 }, { "epoch": 0.9914641297973545, "grad_norm": 0.9763362834740261, "learning_rate": 9.513173842348732e-10, "loss": 0.1699, "step": 1902 }, { "epoch": 0.9919854043135466, "grad_norm": 1.0171291150299013, "learning_rate": 8.361252165597911e-10, "loss": 0.2021, "step": 1903 }, { "epoch": 0.9925066788297388, "grad_norm": 0.9744374859882523, "learning_rate": 7.283631996662933e-10, "loss": 0.1709, "step": 1904 }, { "epoch": 0.9930279533459307, "grad_norm": 0.9653344994919608, "learning_rate": 6.28031653936434e-10, "loss": 0.1874, "step": 1905 }, { "epoch": 0.9935492278621229, "grad_norm": 0.9621859639605892, "learning_rate": 5.351308776613273e-10, "loss": 0.1696, "step": 1906 }, { "epoch": 0.994070502378315, "grad_norm": 0.9895447054675127, "learning_rate": 4.49661147040592e-10, "loss": 0.1886, "step": 1907 }, { "epoch": 0.9945917768945071, "grad_norm": 0.956049965794608, "learning_rate": 3.7162271617985357e-10, "loss": 0.1804, "step": 1908 }, { "epoch": 0.9951130514106992, "grad_norm": 0.9678203013495367, "learning_rate": 3.0101581709185424e-10, "loss": 0.1739, "step": 1909 }, { "epoch": 0.9956343259268913, "grad_norm": 0.9580407642873671, "learning_rate": 2.3784065969451043e-10, "loss": 0.1751, "step": 1910 }, { "epoch": 0.9961556004430834, "grad_norm": 0.9948205703446217, "learning_rate": 1.8209743181146766e-10, "loss": 0.1773, "step": 1911 }, { "epoch": 0.9966768749592755, "grad_norm": 1.0286522179292514, "learning_rate": 1.3378629917015772e-10, "loss": 0.1898, "step": 1912 }, { "epoch": 0.9971981494754675, "grad_norm": 1.0318534355598075, "learning_rate": 9.290740540207621e-11, "loss": 0.2015, "step": 1913 }, { "epoch": 0.9977194239916596, "grad_norm": 1.0006269657703835, "learning_rate": 5.94608720427825e-11, "loss": 0.1774, "step": 1914 }, { "epoch": 0.9982406985078517, "grad_norm": 0.9509156275592338, "learning_rate": 3.344679853023447e-11, "loss": 0.1849, "step": 1915 }, { "epoch": 0.9987619730240438, "grad_norm": 0.9960321151226665, "learning_rate": 1.4865262205898678e-11, "loss": 0.1845, "step": 1916 }, { "epoch": 0.9992832475402359, "grad_norm": 0.9897232776542689, "learning_rate": 3.716318313640166e-12, "loss": 0.1807, "step": 1917 }, { "epoch": 0.999804522056428, "grad_norm": 0.9846363278734621, "learning_rate": 0.0, "loss": 0.1743, "step": 1918 }, { "epoch": 0.999804522056428, "step": 1918, "total_flos": 331183228846080.0, "train_loss": 0.20316935279353193, "train_runtime": 20098.2432, "train_samples_per_second": 6.108, "train_steps_per_second": 0.095 } ], "logging_steps": 1, "max_steps": 1918, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 331183228846080.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }