| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 4971, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.006035913686434284, |
| "grad_norm": 13.465597639885102, |
| "learning_rate": 1.8072289156626505e-07, |
| "loss": 0.8887, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.012071827372868568, |
| "grad_norm": 12.414937705631793, |
| "learning_rate": 3.8152610441767073e-07, |
| "loss": 0.8871, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01810774105930285, |
| "grad_norm": 5.604563580675332, |
| "learning_rate": 5.823293172690764e-07, |
| "loss": 0.7943, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.024143654745737136, |
| "grad_norm": 3.082127771205323, |
| "learning_rate": 7.83132530120482e-07, |
| "loss": 0.6989, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03017956843217142, |
| "grad_norm": 1.9253363533227204, |
| "learning_rate": 9.839357429718876e-07, |
| "loss": 0.6283, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0362154821186057, |
| "grad_norm": 1.2352792533570607, |
| "learning_rate": 1.1847389558232934e-06, |
| "loss": 0.5916, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04225139580503999, |
| "grad_norm": 0.8094703225757798, |
| "learning_rate": 1.385542168674699e-06, |
| "loss": 0.5623, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04828730949147427, |
| "grad_norm": 0.7924082712954621, |
| "learning_rate": 1.5863453815261046e-06, |
| "loss": 0.536, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05432322317790855, |
| "grad_norm": 0.7765422489934142, |
| "learning_rate": 1.7871485943775102e-06, |
| "loss": 0.5246, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.06035913686434284, |
| "grad_norm": 1.2024335532490196, |
| "learning_rate": 1.987951807228916e-06, |
| "loss": 0.5112, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06639505055077713, |
| "grad_norm": 0.7361271470838762, |
| "learning_rate": 2.1887550200803216e-06, |
| "loss": 0.4973, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0724309642372114, |
| "grad_norm": 0.9636947285799289, |
| "learning_rate": 2.389558232931727e-06, |
| "loss": 0.4926, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07846687792364569, |
| "grad_norm": 0.834221678860187, |
| "learning_rate": 2.590361445783133e-06, |
| "loss": 0.4868, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08450279161007998, |
| "grad_norm": 0.721459089158014, |
| "learning_rate": 2.791164658634538e-06, |
| "loss": 0.4836, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09053870529651425, |
| "grad_norm": 0.7680662169711512, |
| "learning_rate": 2.991967871485944e-06, |
| "loss": 0.4759, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09657461898294854, |
| "grad_norm": 0.8074470103289187, |
| "learning_rate": 3.1927710843373494e-06, |
| "loss": 0.4753, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10261053266938283, |
| "grad_norm": 0.7821569953929599, |
| "learning_rate": 3.393574297188755e-06, |
| "loss": 0.4689, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.1086464463558171, |
| "grad_norm": 0.8046561770221946, |
| "learning_rate": 3.5943775100401606e-06, |
| "loss": 0.4678, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1146823600422514, |
| "grad_norm": 0.8787311236716008, |
| "learning_rate": 3.7951807228915664e-06, |
| "loss": 0.463, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.12071827372868568, |
| "grad_norm": 0.8810490177348705, |
| "learning_rate": 3.995983935742972e-06, |
| "loss": 0.4601, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12675418741511996, |
| "grad_norm": 0.8889957032229883, |
| "learning_rate": 4.196787148594378e-06, |
| "loss": 0.4589, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.13279010110155426, |
| "grad_norm": 1.0047774511651133, |
| "learning_rate": 4.397590361445783e-06, |
| "loss": 0.4533, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13882601478798853, |
| "grad_norm": 0.8382898002966074, |
| "learning_rate": 4.598393574297189e-06, |
| "loss": 0.4541, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1448619284744228, |
| "grad_norm": 0.8867952107395614, |
| "learning_rate": 4.799196787148594e-06, |
| "loss": 0.4488, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1508978421608571, |
| "grad_norm": 0.8306941422038123, |
| "learning_rate": 5e-06, |
| "loss": 0.4521, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15693375584729138, |
| "grad_norm": 0.8874942497893604, |
| "learning_rate": 5.200803212851407e-06, |
| "loss": 0.4505, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16296966953372566, |
| "grad_norm": 0.8911658824764517, |
| "learning_rate": 5.401606425702812e-06, |
| "loss": 0.446, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.16900558322015996, |
| "grad_norm": 0.8852855388263275, |
| "learning_rate": 5.602409638554217e-06, |
| "loss": 0.4452, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17504149690659423, |
| "grad_norm": 0.9910318655855725, |
| "learning_rate": 5.803212851405623e-06, |
| "loss": 0.4413, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.1810774105930285, |
| "grad_norm": 1.0110708733608424, |
| "learning_rate": 6.004016064257029e-06, |
| "loss": 0.4397, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1871133242794628, |
| "grad_norm": 0.9003864963841174, |
| "learning_rate": 6.2048192771084344e-06, |
| "loss": 0.4414, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.19314923796589709, |
| "grad_norm": 0.906399226331659, |
| "learning_rate": 6.40562248995984e-06, |
| "loss": 0.4372, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.19918515165233136, |
| "grad_norm": 1.071695971731785, |
| "learning_rate": 6.606425702811245e-06, |
| "loss": 0.4381, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.20522106533876566, |
| "grad_norm": 0.9761062070856111, |
| "learning_rate": 6.8072289156626514e-06, |
| "loss": 0.4343, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.21125697902519994, |
| "grad_norm": 0.9721838815300707, |
| "learning_rate": 7.008032128514058e-06, |
| "loss": 0.4373, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.2172928927116342, |
| "grad_norm": 0.8909414468062403, |
| "learning_rate": 7.208835341365462e-06, |
| "loss": 0.4336, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2233288063980685, |
| "grad_norm": 0.964718436271309, |
| "learning_rate": 7.4096385542168684e-06, |
| "loss": 0.4348, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2293647200845028, |
| "grad_norm": 1.025409695885071, |
| "learning_rate": 7.610441767068274e-06, |
| "loss": 0.4326, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23540063377093706, |
| "grad_norm": 0.9270275542948012, |
| "learning_rate": 7.81124497991968e-06, |
| "loss": 0.4324, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.24143654745737136, |
| "grad_norm": 0.9390965093376761, |
| "learning_rate": 8.012048192771085e-06, |
| "loss": 0.4302, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24747246114380564, |
| "grad_norm": 0.7707812298350031, |
| "learning_rate": 8.21285140562249e-06, |
| "loss": 0.4276, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2535083748302399, |
| "grad_norm": 0.8215921682895242, |
| "learning_rate": 8.413654618473896e-06, |
| "loss": 0.4274, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2595442885166742, |
| "grad_norm": 1.0290878620245738, |
| "learning_rate": 8.614457831325302e-06, |
| "loss": 0.427, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2655802022031085, |
| "grad_norm": 0.9009095092288704, |
| "learning_rate": 8.815261044176707e-06, |
| "loss": 0.4232, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.27161611588954276, |
| "grad_norm": 0.9646916353387767, |
| "learning_rate": 9.016064257028112e-06, |
| "loss": 0.4235, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.27765202957597707, |
| "grad_norm": 0.8009669905789347, |
| "learning_rate": 9.21686746987952e-06, |
| "loss": 0.4248, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.28368794326241137, |
| "grad_norm": 2.57971922495045, |
| "learning_rate": 9.417670682730925e-06, |
| "loss": 0.4246, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.2897238569488456, |
| "grad_norm": 0.9225235875464007, |
| "learning_rate": 9.61847389558233e-06, |
| "loss": 0.4256, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2957597706352799, |
| "grad_norm": 0.8937790567235143, |
| "learning_rate": 9.819277108433736e-06, |
| "loss": 0.4232, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.3017956843217142, |
| "grad_norm": 0.992661961364272, |
| "learning_rate": 9.99999876677608e-06, |
| "loss": 0.4236, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30783159800814847, |
| "grad_norm": 1.1203639087859305, |
| "learning_rate": 9.999850780641762e-06, |
| "loss": 0.423, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.31386751169458277, |
| "grad_norm": 0.9504675074156581, |
| "learning_rate": 9.999456158087994e-06, |
| "loss": 0.4255, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.31990342538101707, |
| "grad_norm": 1.1526705071263037, |
| "learning_rate": 9.998814918581017e-06, |
| "loss": 0.4236, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.3259393390674513, |
| "grad_norm": 0.9400926329756719, |
| "learning_rate": 9.99792709375238e-06, |
| "loss": 0.4193, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3319752527538856, |
| "grad_norm": 0.761979605644821, |
| "learning_rate": 9.996792727397374e-06, |
| "loss": 0.4178, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3380111664403199, |
| "grad_norm": 0.7761858463434534, |
| "learning_rate": 9.995411875472882e-06, |
| "loss": 0.4172, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.34404708012675417, |
| "grad_norm": 0.8353265789234773, |
| "learning_rate": 9.993784606094612e-06, |
| "loss": 0.417, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.35008299381318847, |
| "grad_norm": 0.7921534241896437, |
| "learning_rate": 9.991910999533739e-06, |
| "loss": 0.4164, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.35611890749962277, |
| "grad_norm": 0.8368518529458858, |
| "learning_rate": 9.98979114821294e-06, |
| "loss": 0.4212, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.362154821186057, |
| "grad_norm": 0.8526689259731893, |
| "learning_rate": 9.98742515670185e-06, |
| "loss": 0.413, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3681907348724913, |
| "grad_norm": 0.8691355689423315, |
| "learning_rate": 9.98481314171188e-06, |
| "loss": 0.4147, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3742266485589256, |
| "grad_norm": 0.7413766525933784, |
| "learning_rate": 9.981955232090484e-06, |
| "loss": 0.4202, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.38026256224535987, |
| "grad_norm": 0.862826800304683, |
| "learning_rate": 9.978851568814789e-06, |
| "loss": 0.4144, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.38629847593179417, |
| "grad_norm": 0.852995884285724, |
| "learning_rate": 9.975502304984643e-06, |
| "loss": 0.4159, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3923343896182285, |
| "grad_norm": 0.8190268708459463, |
| "learning_rate": 9.971907605815065e-06, |
| "loss": 0.4133, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3983703033046627, |
| "grad_norm": 0.7826738241592833, |
| "learning_rate": 9.968067648628092e-06, |
| "loss": 0.417, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.404406216991097, |
| "grad_norm": 0.8234056482304477, |
| "learning_rate": 9.963982622844037e-06, |
| "loss": 0.4151, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.4104421306775313, |
| "grad_norm": 0.8389822495874198, |
| "learning_rate": 9.959652729972138e-06, |
| "loss": 0.4142, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.41647804436396557, |
| "grad_norm": 0.7530220222404655, |
| "learning_rate": 9.955078183600626e-06, |
| "loss": 0.4135, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.4225139580503999, |
| "grad_norm": 0.8094044727188283, |
| "learning_rate": 9.950259209386182e-06, |
| "loss": 0.4076, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4285498717368342, |
| "grad_norm": 0.7704390882655109, |
| "learning_rate": 9.945196045042812e-06, |
| "loss": 0.41, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.4345857854232684, |
| "grad_norm": 0.9003987196323937, |
| "learning_rate": 9.93988894033011e-06, |
| "loss": 0.4114, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4406216991097027, |
| "grad_norm": 0.8729571471009108, |
| "learning_rate": 9.934338157040953e-06, |
| "loss": 0.4128, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.446657612796137, |
| "grad_norm": 0.7801434856688376, |
| "learning_rate": 9.928543968988576e-06, |
| "loss": 0.4103, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4526935264825713, |
| "grad_norm": 0.9417689284475159, |
| "learning_rate": 9.922506661993067e-06, |
| "loss": 0.4086, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.4587294401690056, |
| "grad_norm": 0.8877368018323296, |
| "learning_rate": 9.91622653386727e-06, |
| "loss": 0.4139, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4647653538554399, |
| "grad_norm": 0.7960343939884429, |
| "learning_rate": 9.909703894402093e-06, |
| "loss": 0.4072, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4708012675418741, |
| "grad_norm": 0.7142525800658928, |
| "learning_rate": 9.90293906535123e-06, |
| "loss": 0.4069, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4768371812283084, |
| "grad_norm": 0.8168998091378754, |
| "learning_rate": 9.895932380415277e-06, |
| "loss": 0.4053, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.48287309491474273, |
| "grad_norm": 0.7851582099155968, |
| "learning_rate": 9.888684185225291e-06, |
| "loss": 0.4096, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.48890900860117703, |
| "grad_norm": 0.7313895363802666, |
| "learning_rate": 9.881194837325722e-06, |
| "loss": 0.4035, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4949449222876113, |
| "grad_norm": 0.801599057157289, |
| "learning_rate": 9.873464706156785e-06, |
| "loss": 0.4082, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5009808359740455, |
| "grad_norm": 0.7959824627607599, |
| "learning_rate": 9.865494173036238e-06, |
| "loss": 0.4086, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.5070167496604798, |
| "grad_norm": 0.7643194639900054, |
| "learning_rate": 9.857283631140563e-06, |
| "loss": 0.4097, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5130526633469141, |
| "grad_norm": 0.8141162481887632, |
| "learning_rate": 9.848833485485577e-06, |
| "loss": 0.4068, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5190885770333484, |
| "grad_norm": 0.7263606575446551, |
| "learning_rate": 9.840144152906455e-06, |
| "loss": 0.4052, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5251244907197827, |
| "grad_norm": 0.7326820835121685, |
| "learning_rate": 9.831216062037163e-06, |
| "loss": 0.403, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.531160404406217, |
| "grad_norm": 0.7722145618849807, |
| "learning_rate": 9.822049653289318e-06, |
| "loss": 0.4041, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5371963180926512, |
| "grad_norm": 0.7035970302521439, |
| "learning_rate": 9.81264537883046e-06, |
| "loss": 0.401, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5432322317790855, |
| "grad_norm": 0.6580207236042055, |
| "learning_rate": 9.803003702561753e-06, |
| "loss": 0.4057, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5492681454655198, |
| "grad_norm": 0.6960070468306416, |
| "learning_rate": 9.79312510009509e-06, |
| "loss": 0.4103, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5553040591519541, |
| "grad_norm": 0.7088936549744779, |
| "learning_rate": 9.783010058729644e-06, |
| "loss": 0.4024, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5613399728383884, |
| "grad_norm": 0.8173990374915286, |
| "learning_rate": 9.772659077427824e-06, |
| "loss": 0.3983, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5673758865248227, |
| "grad_norm": 0.7248588219467303, |
| "learning_rate": 9.762072666790658e-06, |
| "loss": 0.4042, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5734118002112569, |
| "grad_norm": 0.6953286894486166, |
| "learning_rate": 9.751251349032615e-06, |
| "loss": 0.4052, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5794477138976912, |
| "grad_norm": 0.6805775618542874, |
| "learning_rate": 9.74019565795584e-06, |
| "loss": 0.4028, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5854836275841255, |
| "grad_norm": 0.7073250522342893, |
| "learning_rate": 9.728906138923823e-06, |
| "loss": 0.4031, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5915195412705598, |
| "grad_norm": 0.8161486510568995, |
| "learning_rate": 9.71738334883449e-06, |
| "loss": 0.4012, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5975554549569941, |
| "grad_norm": 0.7478470587664012, |
| "learning_rate": 9.705627856092743e-06, |
| "loss": 0.4035, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.6035913686434284, |
| "grad_norm": 1.2181648223419725, |
| "learning_rate": 9.69364024058242e-06, |
| "loss": 0.3994, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6096272823298626, |
| "grad_norm": 0.724496170506016, |
| "learning_rate": 9.681421093637677e-06, |
| "loss": 0.4003, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6156631960162969, |
| "grad_norm": 0.7245373569956688, |
| "learning_rate": 9.668971018013835e-06, |
| "loss": 0.3993, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6216991097027312, |
| "grad_norm": 1.3707555561464966, |
| "learning_rate": 9.656290627857638e-06, |
| "loss": 0.4031, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6277350233891655, |
| "grad_norm": 0.8617205371794142, |
| "learning_rate": 9.643380548676957e-06, |
| "loss": 0.3989, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6337709370755998, |
| "grad_norm": 0.7218421707442351, |
| "learning_rate": 9.63024141730994e-06, |
| "loss": 0.4009, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.6398068507620341, |
| "grad_norm": 0.7919863849580143, |
| "learning_rate": 9.616873881893593e-06, |
| "loss": 0.402, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6458427644484683, |
| "grad_norm": 0.7643496416415103, |
| "learning_rate": 9.603278601831806e-06, |
| "loss": 0.3966, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6518786781349026, |
| "grad_norm": 0.8387350986976135, |
| "learning_rate": 9.58945624776284e-06, |
| "loss": 0.3974, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6579145918213369, |
| "grad_norm": 0.7195707742464319, |
| "learning_rate": 9.575407501526218e-06, |
| "loss": 0.4033, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6639505055077712, |
| "grad_norm": 0.8948583587192116, |
| "learning_rate": 9.561133056129122e-06, |
| "loss": 0.4005, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6699864191942055, |
| "grad_norm": 0.7784558611785358, |
| "learning_rate": 9.546633615712184e-06, |
| "loss": 0.3969, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6760223328806398, |
| "grad_norm": 0.7279188084081983, |
| "learning_rate": 9.531909895514766e-06, |
| "loss": 0.3968, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6820582465670741, |
| "grad_norm": 0.7707824454002812, |
| "learning_rate": 9.516962621839667e-06, |
| "loss": 0.3941, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6880941602535083, |
| "grad_norm": 0.7559246242676043, |
| "learning_rate": 9.501792532017304e-06, |
| "loss": 0.3935, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6941300739399426, |
| "grad_norm": 0.7670492895949397, |
| "learning_rate": 9.48640037436934e-06, |
| "loss": 0.3962, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.7001659876263769, |
| "grad_norm": 0.7574175499302432, |
| "learning_rate": 9.470786908171761e-06, |
| "loss": 0.396, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7062019013128112, |
| "grad_norm": 1.1364368407573255, |
| "learning_rate": 9.454952903617434e-06, |
| "loss": 0.3987, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7122378149992455, |
| "grad_norm": 0.6929517509246322, |
| "learning_rate": 9.438899141778105e-06, |
| "loss": 0.3959, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7182737286856798, |
| "grad_norm": 0.7239918001848392, |
| "learning_rate": 9.42262641456588e-06, |
| "loss": 0.3961, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.724309642372114, |
| "grad_norm": 0.7351627240649914, |
| "learning_rate": 9.406135524694146e-06, |
| "loss": 0.3946, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7303455560585483, |
| "grad_norm": 0.7178193311197739, |
| "learning_rate": 9.389427285637986e-06, |
| "loss": 0.3934, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7363814697449826, |
| "grad_norm": 0.7197436378060236, |
| "learning_rate": 9.372502521594052e-06, |
| "loss": 0.3951, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7424173834314169, |
| "grad_norm": 0.7020942866993558, |
| "learning_rate": 9.355362067439899e-06, |
| "loss": 0.3953, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7484532971178512, |
| "grad_norm": 0.6493652144119091, |
| "learning_rate": 9.338006768692807e-06, |
| "loss": 0.3976, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7544892108042855, |
| "grad_norm": 0.7452091082245685, |
| "learning_rate": 9.320437481468077e-06, |
| "loss": 0.3947, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7605251244907197, |
| "grad_norm": 0.7211982596336295, |
| "learning_rate": 9.302655072436789e-06, |
| "loss": 0.3978, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.766561038177154, |
| "grad_norm": 0.8069527677411222, |
| "learning_rate": 9.284660418783064e-06, |
| "loss": 0.3961, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7725969518635883, |
| "grad_norm": 0.6964974366663241, |
| "learning_rate": 9.266454408160779e-06, |
| "loss": 0.395, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7786328655500226, |
| "grad_norm": 0.6951835215600591, |
| "learning_rate": 9.248037938649792e-06, |
| "loss": 0.3918, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.784668779236457, |
| "grad_norm": 0.7011033108204148, |
| "learning_rate": 9.229411918711637e-06, |
| "loss": 0.3911, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7907046929228913, |
| "grad_norm": 0.6699999752789259, |
| "learning_rate": 9.210577267144703e-06, |
| "loss": 0.3917, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7967406066093254, |
| "grad_norm": 0.7952469588442095, |
| "learning_rate": 9.191534913038926e-06, |
| "loss": 0.393, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8027765202957597, |
| "grad_norm": 0.7362949625214187, |
| "learning_rate": 9.172285795729945e-06, |
| "loss": 0.3916, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.808812433982194, |
| "grad_norm": 0.777349182077021, |
| "learning_rate": 9.152830864752773e-06, |
| "loss": 0.396, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8148483476686283, |
| "grad_norm": 0.6858011231159463, |
| "learning_rate": 9.133171079794952e-06, |
| "loss": 0.3949, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8208842613550626, |
| "grad_norm": 0.8252893789848457, |
| "learning_rate": 9.113307410649222e-06, |
| "loss": 0.3951, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.826920175041497, |
| "grad_norm": 0.742614174317752, |
| "learning_rate": 9.093240837165668e-06, |
| "loss": 0.3912, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8329560887279311, |
| "grad_norm": 0.6712408370389595, |
| "learning_rate": 9.072972349203401e-06, |
| "loss": 0.3938, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8389920024143654, |
| "grad_norm": 0.7390425813359819, |
| "learning_rate": 9.052502946581718e-06, |
| "loss": 0.3902, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8450279161007997, |
| "grad_norm": 0.9031901060003036, |
| "learning_rate": 9.031833639030789e-06, |
| "loss": 0.39, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.851063829787234, |
| "grad_norm": 0.8073830235615219, |
| "learning_rate": 9.010965446141842e-06, |
| "loss": 0.3907, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8570997434736684, |
| "grad_norm": 0.7197468777451328, |
| "learning_rate": 8.989899397316875e-06, |
| "loss": 0.3933, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8631356571601027, |
| "grad_norm": 0.7874409375571629, |
| "learning_rate": 8.96863653171787e-06, |
| "loss": 0.3941, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8691715708465368, |
| "grad_norm": 0.7047790860975574, |
| "learning_rate": 8.947177898215538e-06, |
| "loss": 0.3918, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8752074845329711, |
| "grad_norm": 0.6732410856766448, |
| "learning_rate": 8.925524555337575e-06, |
| "loss": 0.3948, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8812433982194054, |
| "grad_norm": 0.6379130166882847, |
| "learning_rate": 8.90367757121645e-06, |
| "loss": 0.392, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8872793119058398, |
| "grad_norm": 0.6453169279070088, |
| "learning_rate": 8.881638023536715e-06, |
| "loss": 0.3902, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.893315225592274, |
| "grad_norm": 0.8925532684482897, |
| "learning_rate": 8.859406999481839e-06, |
| "loss": 0.3897, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8993511392787084, |
| "grad_norm": 0.7321151042406583, |
| "learning_rate": 8.836985595680585e-06, |
| "loss": 0.3903, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.9053870529651425, |
| "grad_norm": 0.717542202485072, |
| "learning_rate": 8.81437491815291e-06, |
| "loss": 0.3907, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9114229666515768, |
| "grad_norm": 0.6899069830042462, |
| "learning_rate": 8.791576082255414e-06, |
| "loss": 0.3914, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9174588803380112, |
| "grad_norm": 0.7416902913208727, |
| "learning_rate": 8.768590212626305e-06, |
| "loss": 0.3914, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9234947940244455, |
| "grad_norm": 0.648187852127454, |
| "learning_rate": 8.745418443129944e-06, |
| "loss": 0.3878, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9295307077108798, |
| "grad_norm": 0.6971446829374528, |
| "learning_rate": 8.722061916800892e-06, |
| "loss": 0.3889, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9355666213973141, |
| "grad_norm": 0.6897656341763103, |
| "learning_rate": 8.698521785787543e-06, |
| "loss": 0.3916, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9416025350837482, |
| "grad_norm": 0.6707821534631215, |
| "learning_rate": 8.674799211295272e-06, |
| "loss": 0.3872, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9476384487701826, |
| "grad_norm": 0.7047440310341709, |
| "learning_rate": 8.650895363529172e-06, |
| "loss": 0.3893, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9536743624566169, |
| "grad_norm": 0.7111300925227007, |
| "learning_rate": 8.626811421636318e-06, |
| "loss": 0.3899, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9597102761430512, |
| "grad_norm": 0.742242466940292, |
| "learning_rate": 8.602548573647603e-06, |
| "loss": 0.3933, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9657461898294855, |
| "grad_norm": 0.6405514647772552, |
| "learning_rate": 8.578108016419138e-06, |
| "loss": 0.3886, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9717821035159198, |
| "grad_norm": 0.6969067995610034, |
| "learning_rate": 8.553490955573207e-06, |
| "loss": 0.3875, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9778180172023541, |
| "grad_norm": 0.6404080311189763, |
| "learning_rate": 8.528698605438801e-06, |
| "loss": 0.3915, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9838539308887883, |
| "grad_norm": 0.689314089106684, |
| "learning_rate": 8.50373218899171e-06, |
| "loss": 0.3897, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9898898445752226, |
| "grad_norm": 0.6238451440610306, |
| "learning_rate": 8.478592937794202e-06, |
| "loss": 0.3865, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9959257582616569, |
| "grad_norm": 0.6246538104726604, |
| "learning_rate": 8.453282091934262e-06, |
| "loss": 0.3891, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.0018107741059303, |
| "grad_norm": 0.6650133535244673, |
| "learning_rate": 8.427800899964438e-06, |
| "loss": 0.3775, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.0078466877923646, |
| "grad_norm": 0.7340465665361768, |
| "learning_rate": 8.402150618840229e-06, |
| "loss": 0.3658, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.013882601478799, |
| "grad_norm": 0.8803678131362109, |
| "learning_rate": 8.376332513858091e-06, |
| "loss": 0.3643, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0199185151652332, |
| "grad_norm": 0.6784266807756097, |
| "learning_rate": 8.350347858593035e-06, |
| "loss": 0.3632, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.0259544288516673, |
| "grad_norm": 0.6757297253946429, |
| "learning_rate": 8.324197934835775e-06, |
| "loss": 0.3611, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.0319903425381016, |
| "grad_norm": 0.6937615226816463, |
| "learning_rate": 8.297884032529525e-06, |
| "loss": 0.3641, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.038026256224536, |
| "grad_norm": 0.6656265896882699, |
| "learning_rate": 8.271407449706347e-06, |
| "loss": 0.3634, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.0440621699109702, |
| "grad_norm": 0.6758693000716391, |
| "learning_rate": 8.244769492423144e-06, |
| "loss": 0.3651, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.0500980835974045, |
| "grad_norm": 0.7271602756269683, |
| "learning_rate": 8.217971474697205e-06, |
| "loss": 0.3655, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.0561339972838388, |
| "grad_norm": 0.7262048623607191, |
| "learning_rate": 8.191014718441413e-06, |
| "loss": 0.3646, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.0621699109702731, |
| "grad_norm": 0.7594858496478063, |
| "learning_rate": 8.163900553399022e-06, |
| "loss": 0.3683, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.0682058246567074, |
| "grad_norm": 0.6834326812737692, |
| "learning_rate": 8.13663031707806e-06, |
| "loss": 0.3657, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.0742417383431417, |
| "grad_norm": 0.829231127715137, |
| "learning_rate": 8.109205354685367e-06, |
| "loss": 0.3657, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.080277652029576, |
| "grad_norm": 0.7172584884654448, |
| "learning_rate": 8.081627019060223e-06, |
| "loss": 0.3612, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0863135657160103, |
| "grad_norm": 0.700123283944604, |
| "learning_rate": 8.053896670607616e-06, |
| "loss": 0.3669, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0923494794024446, |
| "grad_norm": 0.6802763184360072, |
| "learning_rate": 8.026015677231137e-06, |
| "loss": 0.36, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0983853930888787, |
| "grad_norm": 0.6976972839342949, |
| "learning_rate": 7.997985414265513e-06, |
| "loss": 0.3645, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.104421306775313, |
| "grad_norm": 0.6892045690564895, |
| "learning_rate": 7.969807264408745e-06, |
| "loss": 0.3664, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.1104572204617473, |
| "grad_norm": 0.6606374628961976, |
| "learning_rate": 7.94148261765391e-06, |
| "loss": 0.3611, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1164931341481816, |
| "grad_norm": 0.7063672325182395, |
| "learning_rate": 7.913012871220605e-06, |
| "loss": 0.3652, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.122529047834616, |
| "grad_norm": 0.6353061774622171, |
| "learning_rate": 7.884399429486e-06, |
| "loss": 0.3619, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.1285649615210502, |
| "grad_norm": 0.6646621743965846, |
| "learning_rate": 7.855643703915585e-06, |
| "loss": 0.3638, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.1346008752074845, |
| "grad_norm": 0.6379034557335701, |
| "learning_rate": 7.826747112993532e-06, |
| "loss": 0.3595, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.1406367888939188, |
| "grad_norm": 0.6995974469144366, |
| "learning_rate": 7.797711082152726e-06, |
| "loss": 0.3628, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.1466727025803531, |
| "grad_norm": 0.6564170955860726, |
| "learning_rate": 7.768537043704447e-06, |
| "loss": 0.3637, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.1527086162667874, |
| "grad_norm": 0.7572552114374352, |
| "learning_rate": 7.739226436767721e-06, |
| "loss": 0.362, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.1587445299532217, |
| "grad_norm": 0.7571612085211564, |
| "learning_rate": 7.709780707198328e-06, |
| "loss": 0.3638, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.164780443639656, |
| "grad_norm": 0.6792493024466744, |
| "learning_rate": 7.680201307517479e-06, |
| "loss": 0.3625, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.1708163573260904, |
| "grad_norm": 0.664259682779261, |
| "learning_rate": 7.650489696840164e-06, |
| "loss": 0.3646, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.1768522710125244, |
| "grad_norm": 0.6270149603322056, |
| "learning_rate": 7.6206473408031775e-06, |
| "loss": 0.3624, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.1828881846989587, |
| "grad_norm": 0.6383894085325998, |
| "learning_rate": 7.590675711492823e-06, |
| "loss": 0.3643, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.188924098385393, |
| "grad_norm": 0.6816453891866903, |
| "learning_rate": 7.56057628737229e-06, |
| "loss": 0.3637, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1949600120718273, |
| "grad_norm": 0.7133078108250313, |
| "learning_rate": 7.530350553208726e-06, |
| "loss": 0.3585, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.2009959257582616, |
| "grad_norm": 0.6322767475179056, |
| "learning_rate": 7.500000000000001e-06, |
| "loss": 0.361, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.207031839444696, |
| "grad_norm": 0.6987380190815154, |
| "learning_rate": 7.469526124901149e-06, |
| "loss": 0.3623, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2130677531311302, |
| "grad_norm": 0.6219916214226197, |
| "learning_rate": 7.4389304311505195e-06, |
| "loss": 0.3637, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.2191036668175645, |
| "grad_norm": 0.6591583924033313, |
| "learning_rate": 7.408214427995628e-06, |
| "loss": 0.3644, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.2251395805039988, |
| "grad_norm": 0.7005471225701302, |
| "learning_rate": 7.3773796306187e-06, |
| "loss": 0.3595, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.2311754941904332, |
| "grad_norm": 0.6332845796820719, |
| "learning_rate": 7.346427560061931e-06, |
| "loss": 0.3652, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.2372114078768675, |
| "grad_norm": 0.6778224076333697, |
| "learning_rate": 7.315359743152464e-06, |
| "loss": 0.3606, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.2432473215633015, |
| "grad_norm": 0.6582665893949518, |
| "learning_rate": 7.284177712427056e-06, |
| "loss": 0.3599, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.2492832352497358, |
| "grad_norm": 0.6584059931101761, |
| "learning_rate": 7.252883006056495e-06, |
| "loss": 0.3622, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.2553191489361701, |
| "grad_norm": 0.6857700496450303, |
| "learning_rate": 7.221477167769716e-06, |
| "loss": 0.3633, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.2613550626226044, |
| "grad_norm": 0.6856644672766703, |
| "learning_rate": 7.189961746777657e-06, |
| "loss": 0.363, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.2673909763090387, |
| "grad_norm": 0.6857005736783666, |
| "learning_rate": 7.1583382976968295e-06, |
| "loss": 0.3618, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.273426889995473, |
| "grad_norm": 0.6166440607694041, |
| "learning_rate": 7.126608380472642e-06, |
| "loss": 0.3593, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.2794628036819073, |
| "grad_norm": 0.6673854300030073, |
| "learning_rate": 7.094773560302438e-06, |
| "loss": 0.3616, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.2854987173683416, |
| "grad_norm": 0.6261609808400934, |
| "learning_rate": 7.062835407558295e-06, |
| "loss": 0.3623, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.291534631054776, |
| "grad_norm": 0.6573770008704372, |
| "learning_rate": 7.030795497709559e-06, |
| "loss": 0.3616, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.2975705447412103, |
| "grad_norm": 0.63175357402283, |
| "learning_rate": 6.99865541124513e-06, |
| "loss": 0.363, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.3036064584276446, |
| "grad_norm": 0.7095581591416922, |
| "learning_rate": 6.9664167335954866e-06, |
| "loss": 0.3604, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.3096423721140789, |
| "grad_norm": 0.6211244267814455, |
| "learning_rate": 6.9340810550545004e-06, |
| "loss": 0.3584, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.3156782858005132, |
| "grad_norm": 0.6411383893721285, |
| "learning_rate": 6.901649970700966e-06, |
| "loss": 0.3616, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.3217141994869475, |
| "grad_norm": 0.6508872294411808, |
| "learning_rate": 6.869125080319934e-06, |
| "loss": 0.3626, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.3277501131733815, |
| "grad_norm": 0.6456129899609592, |
| "learning_rate": 6.836507988323785e-06, |
| "loss": 0.3612, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3337860268598158, |
| "grad_norm": 0.6885055595324049, |
| "learning_rate": 6.803800303673096e-06, |
| "loss": 0.3588, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.3398219405462501, |
| "grad_norm": 0.6841559054058574, |
| "learning_rate": 6.77100363979726e-06, |
| "loss": 0.3608, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.3458578542326844, |
| "grad_norm": 0.7229876827512576, |
| "learning_rate": 6.738119614514913e-06, |
| "loss": 0.3655, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.3518937679191187, |
| "grad_norm": 0.6235312062043321, |
| "learning_rate": 6.705149849954116e-06, |
| "loss": 0.3607, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.357929681605553, |
| "grad_norm": 0.6372979896414575, |
| "learning_rate": 6.672095972472339e-06, |
| "loss": 0.3613, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.3639655952919874, |
| "grad_norm": 0.5943237749223176, |
| "learning_rate": 6.638959612576243e-06, |
| "loss": 0.3578, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.3700015089784217, |
| "grad_norm": 0.6331473442190148, |
| "learning_rate": 6.605742404841241e-06, |
| "loss": 0.3606, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.376037422664856, |
| "grad_norm": 0.6352200712052698, |
| "learning_rate": 6.572445987830869e-06, |
| "loss": 0.3602, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.38207333635129, |
| "grad_norm": 0.6315011206585134, |
| "learning_rate": 6.539072004015962e-06, |
| "loss": 0.3585, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.3881092500377243, |
| "grad_norm": 0.672467399271792, |
| "learning_rate": 6.505622099693624e-06, |
| "loss": 0.359, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.3941451637241586, |
| "grad_norm": 0.6540330679200106, |
| "learning_rate": 6.4720979249060245e-06, |
| "loss": 0.357, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.400181077410593, |
| "grad_norm": 0.6296334356002367, |
| "learning_rate": 6.438501133359006e-06, |
| "loss": 0.363, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.4062169910970272, |
| "grad_norm": 0.5755292937597596, |
| "learning_rate": 6.404833382340498e-06, |
| "loss": 0.3579, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.4122529047834615, |
| "grad_norm": 0.6273216809842853, |
| "learning_rate": 6.3710963326387845e-06, |
| "loss": 0.361, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.4182888184698959, |
| "grad_norm": 0.659504858020357, |
| "learning_rate": 6.337291648460554e-06, |
| "loss": 0.3648, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.4243247321563302, |
| "grad_norm": 0.646430703430766, |
| "learning_rate": 6.303420997348828e-06, |
| "loss": 0.3609, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.4303606458427645, |
| "grad_norm": 0.70677217944382, |
| "learning_rate": 6.269486050100692e-06, |
| "loss": 0.3583, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.4363965595291988, |
| "grad_norm": 0.6982928562021034, |
| "learning_rate": 6.2354884806848825e-06, |
| "loss": 0.3587, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.442432473215633, |
| "grad_norm": 0.635748838083391, |
| "learning_rate": 6.201429966159203e-06, |
| "loss": 0.3603, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.4484683869020674, |
| "grad_norm": 0.6591941857655591, |
| "learning_rate": 6.167312186587813e-06, |
| "loss": 0.3587, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4545043005885017, |
| "grad_norm": 0.6513018732706167, |
| "learning_rate": 6.133136824958334e-06, |
| "loss": 0.3583, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.460540214274936, |
| "grad_norm": 0.6895727383237782, |
| "learning_rate": 6.098905567098846e-06, |
| "loss": 0.3638, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.4665761279613703, |
| "grad_norm": 0.6281650394691185, |
| "learning_rate": 6.064620101594715e-06, |
| "loss": 0.3629, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.4726120416478046, |
| "grad_norm": 0.7324490252015554, |
| "learning_rate": 6.030282119705306e-06, |
| "loss": 0.3621, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.4786479553342387, |
| "grad_norm": 0.6803933740478001, |
| "learning_rate": 5.99589331528055e-06, |
| "loss": 0.3613, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.484683869020673, |
| "grad_norm": 0.6535344969186776, |
| "learning_rate": 5.961455384677393e-06, |
| "loss": 0.3588, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.4907197827071073, |
| "grad_norm": 0.6220530519094237, |
| "learning_rate": 5.92697002667611e-06, |
| "loss": 0.3614, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.4967556963935416, |
| "grad_norm": 0.5997735782443615, |
| "learning_rate": 5.892438942396515e-06, |
| "loss": 0.3562, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.5027916100799759, |
| "grad_norm": 0.5881600037112182, |
| "learning_rate": 5.857863835214041e-06, |
| "loss": 0.36, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.5088275237664102, |
| "grad_norm": 0.6301732957095514, |
| "learning_rate": 5.823246410675714e-06, |
| "loss": 0.3602, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.5148634374528445, |
| "grad_norm": 0.6369138058336548, |
| "learning_rate": 5.788588376416026e-06, |
| "loss": 0.3575, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.5208993511392785, |
| "grad_norm": 1.8916358390305654, |
| "learning_rate": 5.753891442072693e-06, |
| "loss": 0.3584, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.5269352648257128, |
| "grad_norm": 0.6400402583906231, |
| "learning_rate": 5.719157319202325e-06, |
| "loss": 0.3539, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.5329711785121471, |
| "grad_norm": 0.6223661041265537, |
| "learning_rate": 5.684387721195997e-06, |
| "loss": 0.3595, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.5390070921985815, |
| "grad_norm": 0.6649761362975228, |
| "learning_rate": 5.649584363194725e-06, |
| "loss": 0.36, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.5450430058850158, |
| "grad_norm": 0.5989851062495032, |
| "learning_rate": 5.6147489620048655e-06, |
| "loss": 0.3582, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.55107891957145, |
| "grad_norm": 0.6435791376898407, |
| "learning_rate": 5.579883236013429e-06, |
| "loss": 0.3559, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.5571148332578844, |
| "grad_norm": 0.5973586913854247, |
| "learning_rate": 5.544988905103304e-06, |
| "loss": 0.3581, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.5631507469443187, |
| "grad_norm": 0.6331916860819433, |
| "learning_rate": 5.510067690568429e-06, |
| "loss": 0.3573, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.569186660630753, |
| "grad_norm": 0.6000249694556851, |
| "learning_rate": 5.475121315028876e-06, |
| "loss": 0.3574, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.5752225743171873, |
| "grad_norm": 0.5919987411148389, |
| "learning_rate": 5.4401515023458805e-06, |
| "loss": 0.3622, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.5812584880036216, |
| "grad_norm": 0.6130160505042299, |
| "learning_rate": 5.4051599775368e-06, |
| "loss": 0.3585, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.5872944016900559, |
| "grad_norm": 0.6196465067482942, |
| "learning_rate": 5.370148466690026e-06, |
| "loss": 0.3524, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.5933303153764902, |
| "grad_norm": 0.6396523422153624, |
| "learning_rate": 5.335118696879836e-06, |
| "loss": 0.3584, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.5993662290629245, |
| "grad_norm": 0.6247037129381725, |
| "learning_rate": 5.3000723960812e-06, |
| "loss": 0.358, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.6054021427493588, |
| "grad_norm": 0.6296280096461855, |
| "learning_rate": 5.265011293084539e-06, |
| "loss": 0.3557, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.611438056435793, |
| "grad_norm": 0.6270649643037325, |
| "learning_rate": 5.2299371174104505e-06, |
| "loss": 0.3586, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.6174739701222274, |
| "grad_norm": 0.6724245016825049, |
| "learning_rate": 5.194851599224392e-06, |
| "loss": 0.3563, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.6235098838086617, |
| "grad_norm": 0.6246722692854128, |
| "learning_rate": 5.159756469251327e-06, |
| "loss": 0.3587, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.629545797495096, |
| "grad_norm": 0.5856892553580461, |
| "learning_rate": 5.1246534586903655e-06, |
| "loss": 0.3538, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.63558171118153, |
| "grad_norm": 0.6199649535926036, |
| "learning_rate": 5.089544299129349e-06, |
| "loss": 0.3552, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.6416176248679644, |
| "grad_norm": 0.6395106688159933, |
| "learning_rate": 5.054430722459442e-06, |
| "loss": 0.3575, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.6476535385543987, |
| "grad_norm": 0.6217763272730691, |
| "learning_rate": 5.019314460789708e-06, |
| "loss": 0.3568, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.653689452240833, |
| "grad_norm": 0.6159996290026578, |
| "learning_rate": 4.984197246361649e-06, |
| "loss": 0.3565, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.6597253659272673, |
| "grad_norm": 0.6021051813495957, |
| "learning_rate": 4.949080811463767e-06, |
| "loss": 0.3577, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.6657612796137016, |
| "grad_norm": 0.6102206368388114, |
| "learning_rate": 4.913966888346118e-06, |
| "loss": 0.3556, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.6717971933001357, |
| "grad_norm": 0.5968837038838994, |
| "learning_rate": 4.8788572091348435e-06, |
| "loss": 0.3581, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.67783310698657, |
| "grad_norm": 0.5981355700097328, |
| "learning_rate": 4.843753505746748e-06, |
| "loss": 0.358, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.6838690206730043, |
| "grad_norm": 0.6567740858768865, |
| "learning_rate": 4.8086575098038505e-06, |
| "loss": 0.3573, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.6899049343594386, |
| "grad_norm": 0.6773288375423023, |
| "learning_rate": 4.773570952547975e-06, |
| "loss": 0.3552, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.6959408480458729, |
| "grad_norm": 0.6202686068367487, |
| "learning_rate": 4.738495564755345e-06, |
| "loss": 0.3547, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.7019767617323072, |
| "grad_norm": 0.5595337919079114, |
| "learning_rate": 4.703433076651205e-06, |
| "loss": 0.353, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.7080126754187415, |
| "grad_norm": 0.6583890978208258, |
| "learning_rate": 4.668385217824482e-06, |
| "loss": 0.3583, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.7140485891051758, |
| "grad_norm": 0.5898922057879373, |
| "learning_rate": 4.633353717142448e-06, |
| "loss": 0.3524, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.72008450279161, |
| "grad_norm": 0.5938698503556435, |
| "learning_rate": 4.5983403026654625e-06, |
| "loss": 0.3554, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.7261204164780444, |
| "grad_norm": 0.632653867195755, |
| "learning_rate": 4.563346701561699e-06, |
| "loss": 0.3535, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.7321563301644787, |
| "grad_norm": 0.634481958151908, |
| "learning_rate": 4.528374640021975e-06, |
| "loss": 0.3548, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.738192243850913, |
| "grad_norm": 0.6554591212571549, |
| "learning_rate": 4.493425843174581e-06, |
| "loss": 0.3523, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.7442281575373473, |
| "grad_norm": 0.639030241328894, |
| "learning_rate": 4.4585020350001885e-06, |
| "loss": 0.3571, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.7502640712237816, |
| "grad_norm": 0.579081823243162, |
| "learning_rate": 4.423604938246815e-06, |
| "loss": 0.358, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.7562999849102159, |
| "grad_norm": 0.5786332593667859, |
| "learning_rate": 4.38873627434483e-06, |
| "loss": 0.3546, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.7623358985966502, |
| "grad_norm": 0.5844630643462843, |
| "learning_rate": 4.353897763322053e-06, |
| "loss": 0.3557, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.7683718122830845, |
| "grad_norm": 0.6362540824300466, |
| "learning_rate": 4.319091123718891e-06, |
| "loss": 0.3577, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.7744077259695188, |
| "grad_norm": 0.6152238906869951, |
| "learning_rate": 4.284318072503581e-06, |
| "loss": 0.3558, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.7804436396559529, |
| "grad_norm": 0.5871415463947245, |
| "learning_rate": 4.249580324987482e-06, |
| "loss": 0.3565, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.7864795533423872, |
| "grad_norm": 0.5894304003956816, |
| "learning_rate": 4.2148795947404664e-06, |
| "loss": 0.3548, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.7925154670288215, |
| "grad_norm": 0.5546376741165042, |
| "learning_rate": 4.180217593506394e-06, |
| "loss": 0.3545, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.7985513807152558, |
| "grad_norm": 0.5882950021870835, |
| "learning_rate": 4.1455960311186645e-06, |
| "loss": 0.3578, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.80458729440169, |
| "grad_norm": 0.6581353476419389, |
| "learning_rate": 4.111016615415887e-06, |
| "loss": 0.3545, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.8106232080881244, |
| "grad_norm": 0.728199708802779, |
| "learning_rate": 4.076481052157621e-06, |
| "loss": 0.3567, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.8166591217745585, |
| "grad_norm": 0.5836951966903218, |
| "learning_rate": 4.0419910449402385e-06, |
| "loss": 0.3541, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.8226950354609928, |
| "grad_norm": 0.583825208842142, |
| "learning_rate": 4.0075482951128965e-06, |
| "loss": 0.3557, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.828730949147427, |
| "grad_norm": 0.627394077298899, |
| "learning_rate": 3.973154501693597e-06, |
| "loss": 0.352, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.8347668628338614, |
| "grad_norm": 0.6500394437203815, |
| "learning_rate": 3.938811361285386e-06, |
| "loss": 0.3543, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.8408027765202957, |
| "grad_norm": 0.5787408936785984, |
| "learning_rate": 3.904520567992655e-06, |
| "loss": 0.3539, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.84683869020673, |
| "grad_norm": 0.6006488260082842, |
| "learning_rate": 3.870283813337587e-06, |
| "loss": 0.3534, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.8528746038931643, |
| "grad_norm": 0.6017706438925717, |
| "learning_rate": 3.836102786176697e-06, |
| "loss": 0.3533, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.8589105175795986, |
| "grad_norm": 0.6160731963284618, |
| "learning_rate": 3.8019791726175353e-06, |
| "loss": 0.3537, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.8649464312660329, |
| "grad_norm": 0.7394723530516694, |
| "learning_rate": 3.767914655935513e-06, |
| "loss": 0.3512, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.8709823449524672, |
| "grad_norm": 0.5969802619046902, |
| "learning_rate": 3.73391091649086e-06, |
| "loss": 0.3514, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.8770182586389015, |
| "grad_norm": 0.6434909203687009, |
| "learning_rate": 3.6999696316457468e-06, |
| "loss": 0.3525, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.8830541723253358, |
| "grad_norm": 0.6185839002292769, |
| "learning_rate": 3.6660924756815314e-06, |
| "loss": 0.3516, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.88909008601177, |
| "grad_norm": 0.5764246370880874, |
| "learning_rate": 3.63228111971618e-06, |
| "loss": 0.3543, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.8951259996982044, |
| "grad_norm": 0.5724269342695871, |
| "learning_rate": 3.5985372316218187e-06, |
| "loss": 0.3524, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.9011619133846387, |
| "grad_norm": 0.5893980753783277, |
| "learning_rate": 3.5648624759424723e-06, |
| "loss": 0.3487, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.907197827071073, |
| "grad_norm": 0.6385286384600478, |
| "learning_rate": 3.5312585138119503e-06, |
| "loss": 0.353, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.9132337407575073, |
| "grad_norm": 0.643587632906283, |
| "learning_rate": 3.4977270028719013e-06, |
| "loss": 0.3498, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.9192696544439416, |
| "grad_norm": 0.6189874783125575, |
| "learning_rate": 3.4642695971900506e-06, |
| "loss": 0.3542, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.925305568130376, |
| "grad_norm": 0.6320316722606764, |
| "learning_rate": 3.4308879471785986e-06, |
| "loss": 0.3523, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.93134148181681, |
| "grad_norm": 0.6715762862677156, |
| "learning_rate": 3.3975836995128176e-06, |
| "loss": 0.3505, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.9373773955032443, |
| "grad_norm": 0.5947951437286136, |
| "learning_rate": 3.3643584970498166e-06, |
| "loss": 0.356, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.9434133091896786, |
| "grad_norm": 0.5953138896683005, |
| "learning_rate": 3.3312139787474986e-06, |
| "loss": 0.3552, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.9494492228761129, |
| "grad_norm": 0.5696476474991146, |
| "learning_rate": 3.298151779583725e-06, |
| "loss": 0.3496, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.9554851365625472, |
| "grad_norm": 0.6131972032987533, |
| "learning_rate": 3.2651735304756505e-06, |
| "loss": 0.3536, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.9615210502489815, |
| "grad_norm": 0.6336317988993604, |
| "learning_rate": 3.2322808581992825e-06, |
| "loss": 0.3563, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.9675569639354156, |
| "grad_norm": 0.6341579320490388, |
| "learning_rate": 3.1994753853092284e-06, |
| "loss": 0.3482, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.9735928776218499, |
| "grad_norm": 0.5954681993221721, |
| "learning_rate": 3.166758730058653e-06, |
| "loss": 0.3518, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.9796287913082842, |
| "grad_norm": 0.5893599270303087, |
| "learning_rate": 3.134132506319467e-06, |
| "loss": 0.3536, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.9856647049947185, |
| "grad_norm": 0.5689301232875419, |
| "learning_rate": 3.101598323502698e-06, |
| "loss": 0.3537, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.9917006186811528, |
| "grad_norm": 0.6116819898452338, |
| "learning_rate": 3.0691577864791176e-06, |
| "loss": 0.3515, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.997736532367587, |
| "grad_norm": 0.5926997741101551, |
| "learning_rate": 3.036812495500058e-06, |
| "loss": 0.3504, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.0036215482118607, |
| "grad_norm": 0.5928785278377309, |
| "learning_rate": 3.0045640461184917e-06, |
| "loss": 0.339, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.009657461898295, |
| "grad_norm": 0.6039984062866832, |
| "learning_rate": 2.97241402911031e-06, |
| "loss": 0.3325, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.0156933755847293, |
| "grad_norm": 0.6671960610879556, |
| "learning_rate": 2.940364030395856e-06, |
| "loss": 0.3284, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.0217292892711636, |
| "grad_norm": 0.5808483500966948, |
| "learning_rate": 2.908415630961702e-06, |
| "loss": 0.3265, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.027765202957598, |
| "grad_norm": 0.6017580883286716, |
| "learning_rate": 2.876570406782645e-06, |
| "loss": 0.3296, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.033801116644032, |
| "grad_norm": 0.6067555273933171, |
| "learning_rate": 2.844829928743987e-06, |
| "loss": 0.3315, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.0398370303304665, |
| "grad_norm": 0.5774545226545359, |
| "learning_rate": 2.813195762564018e-06, |
| "loss": 0.3268, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.0458729440169003, |
| "grad_norm": 0.5888748284507602, |
| "learning_rate": 2.781669468716811e-06, |
| "loss": 0.3292, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.0519088577033346, |
| "grad_norm": 0.6137376399757654, |
| "learning_rate": 2.7502526023552227e-06, |
| "loss": 0.3258, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.057944771389769, |
| "grad_norm": 0.59390579398881, |
| "learning_rate": 2.718946713234185e-06, |
| "loss": 0.3295, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.0639806850762032, |
| "grad_norm": 0.6555105104152712, |
| "learning_rate": 2.6877533456342714e-06, |
| "loss": 0.3301, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.0700165987626375, |
| "grad_norm": 0.6048063575727766, |
| "learning_rate": 2.6566740382855005e-06, |
| "loss": 0.3289, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.076052512449072, |
| "grad_norm": 0.6014841818951663, |
| "learning_rate": 2.625710324291442e-06, |
| "loss": 0.3325, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.082088426135506, |
| "grad_norm": 0.6035697169885135, |
| "learning_rate": 2.5948637310535886e-06, |
| "loss": 0.3296, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.0881243398219405, |
| "grad_norm": 0.6112233467387164, |
| "learning_rate": 2.5641357801960186e-06, |
| "loss": 0.3278, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.0941602535083748, |
| "grad_norm": 0.5870217829586826, |
| "learning_rate": 2.5335279874903185e-06, |
| "loss": 0.3313, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.100196167194809, |
| "grad_norm": 0.5897131296840935, |
| "learning_rate": 2.503041862780827e-06, |
| "loss": 0.3296, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.1062320808812434, |
| "grad_norm": 0.5718259687035243, |
| "learning_rate": 2.47267890991016e-06, |
| "loss": 0.3281, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.1122679945676777, |
| "grad_norm": 0.5777856500315681, |
| "learning_rate": 2.4424406266450045e-06, |
| "loss": 0.3296, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.118303908254112, |
| "grad_norm": 0.6262457739159312, |
| "learning_rate": 2.412328504602264e-06, |
| "loss": 0.3336, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.1243398219405463, |
| "grad_norm": 0.589194023665236, |
| "learning_rate": 2.382344029175462e-06, |
| "loss": 0.3349, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.1303757356269806, |
| "grad_norm": 0.6140628916832596, |
| "learning_rate": 2.3524886794614653e-06, |
| "loss": 0.331, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.136411649313415, |
| "grad_norm": 0.6028871935735021, |
| "learning_rate": 2.322763928187543e-06, |
| "loss": 0.3307, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.142447562999849, |
| "grad_norm": 0.5798390235554982, |
| "learning_rate": 2.293171241638698e-06, |
| "loss": 0.3298, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.1484834766862835, |
| "grad_norm": 0.5950496656474389, |
| "learning_rate": 2.263712079585345e-06, |
| "loss": 0.3305, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.154519390372718, |
| "grad_norm": 0.5926734664470145, |
| "learning_rate": 2.2343878952113012e-06, |
| "loss": 0.3276, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.160555304059152, |
| "grad_norm": 0.5877698580097848, |
| "learning_rate": 2.2052001350421096e-06, |
| "loss": 0.3268, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.1665912177455864, |
| "grad_norm": 0.5888247000199527, |
| "learning_rate": 2.1761502388736655e-06, |
| "loss": 0.3327, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.1726271314320207, |
| "grad_norm": 0.5807991121980183, |
| "learning_rate": 2.14723963970121e-06, |
| "loss": 0.3315, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.178663045118455, |
| "grad_norm": 0.5763459777490838, |
| "learning_rate": 2.118469763648643e-06, |
| "loss": 0.3278, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.1846989588048893, |
| "grad_norm": 0.5588744726618396, |
| "learning_rate": 2.0898420298981537e-06, |
| "loss": 0.3296, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.1907348724913236, |
| "grad_norm": 0.6040859182215225, |
| "learning_rate": 2.061357850620243e-06, |
| "loss": 0.3279, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.1967707861777575, |
| "grad_norm": 0.6083091005217864, |
| "learning_rate": 2.0330186309040394e-06, |
| "loss": 0.3298, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.2028066998641918, |
| "grad_norm": 0.568667447432841, |
| "learning_rate": 2.0048257686879997e-06, |
| "loss": 0.3286, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.208842613550626, |
| "grad_norm": 0.586169393672314, |
| "learning_rate": 1.9767806546909457e-06, |
| "loss": 0.3316, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.2148785272370604, |
| "grad_norm": 0.5855668928973393, |
| "learning_rate": 1.9488846723434646e-06, |
| "loss": 0.3262, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.2209144409234947, |
| "grad_norm": 0.5920501956876788, |
| "learning_rate": 1.921139197719664e-06, |
| "loss": 0.3298, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.226950354609929, |
| "grad_norm": 0.6343784219115092, |
| "learning_rate": 1.893545599469292e-06, |
| "loss": 0.3316, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.2329862682963633, |
| "grad_norm": 0.56167618088226, |
| "learning_rate": 1.86610523875023e-06, |
| "loss": 0.3288, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.2390221819827976, |
| "grad_norm": 0.5937195941687996, |
| "learning_rate": 1.8388194691613308e-06, |
| "loss": 0.3285, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.245058095669232, |
| "grad_norm": 0.6068056100462802, |
| "learning_rate": 1.811689636675672e-06, |
| "loss": 0.3295, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.251094009355666, |
| "grad_norm": 0.5998463693882512, |
| "learning_rate": 1.7847170795741414e-06, |
| "loss": 0.33, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.2571299230421005, |
| "grad_norm": 0.5707846476820784, |
| "learning_rate": 1.7579031283794234e-06, |
| "loss": 0.3324, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.2631658367285348, |
| "grad_norm": 0.6070101386107148, |
| "learning_rate": 1.7312491057903808e-06, |
| "loss": 0.3288, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.269201750414969, |
| "grad_norm": 0.5684370763425239, |
| "learning_rate": 1.7047563266167888e-06, |
| "loss": 0.3291, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.2752376641014034, |
| "grad_norm": 0.5367883177519198, |
| "learning_rate": 1.678426097714489e-06, |
| "loss": 0.3265, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.2812735777878377, |
| "grad_norm": 0.5853244396608877, |
| "learning_rate": 1.6522597179209187e-06, |
| "loss": 0.3259, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.287309491474272, |
| "grad_norm": 0.5641343283784108, |
| "learning_rate": 1.6262584779910472e-06, |
| "loss": 0.3286, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.2933454051607063, |
| "grad_norm": 0.5563209895809159, |
| "learning_rate": 1.600423660533692e-06, |
| "loss": 0.3281, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.2993813188471406, |
| "grad_norm": 0.5805361043294971, |
| "learning_rate": 1.5747565399482605e-06, |
| "loss": 0.3299, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.305417232533575, |
| "grad_norm": 0.5811803574606669, |
| "learning_rate": 1.5492583823618878e-06, |
| "loss": 0.3289, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.311453146220009, |
| "grad_norm": 0.6040233888147246, |
| "learning_rate": 1.523930445566963e-06, |
| "loss": 0.3308, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.3174890599064435, |
| "grad_norm": 0.6059976475921155, |
| "learning_rate": 1.4987739789591056e-06, |
| "loss": 0.3294, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.323524973592878, |
| "grad_norm": 0.5905386095910952, |
| "learning_rate": 1.4737902234755203e-06, |
| "loss": 0.3301, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.329560887279312, |
| "grad_norm": 0.5747067002149818, |
| "learning_rate": 1.448980411533782e-06, |
| "loss": 0.3278, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.335596800965746, |
| "grad_norm": 0.5732211405787891, |
| "learning_rate": 1.4243457669710564e-06, |
| "loss": 0.3245, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.3416327146521807, |
| "grad_norm": 0.6079651710560006, |
| "learning_rate": 1.3998875049837141e-06, |
| "loss": 0.3268, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.3476686283386146, |
| "grad_norm": 0.5783578941572416, |
| "learning_rate": 1.3756068320673938e-06, |
| "loss": 0.3283, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.353704542025049, |
| "grad_norm": 0.5532376575030373, |
| "learning_rate": 1.3515049459574847e-06, |
| "loss": 0.3254, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.359740455711483, |
| "grad_norm": 0.5467274114487632, |
| "learning_rate": 1.3275830355700519e-06, |
| "loss": 0.3257, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.3657763693979175, |
| "grad_norm": 0.5922264462167515, |
| "learning_rate": 1.3038422809431733e-06, |
| "loss": 0.3291, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.3718122830843518, |
| "grad_norm": 0.5807751637804499, |
| "learning_rate": 1.280283853178742e-06, |
| "loss": 0.3281, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.377848196770786, |
| "grad_norm": 0.5751202261036737, |
| "learning_rate": 1.256908914384698e-06, |
| "loss": 0.3321, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.3838841104572204, |
| "grad_norm": 0.5829573981972134, |
| "learning_rate": 1.233718617617689e-06, |
| "loss": 0.3303, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.3899200241436547, |
| "grad_norm": 0.5614143554083199, |
| "learning_rate": 1.2107141068262119e-06, |
| "loss": 0.3276, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.395955937830089, |
| "grad_norm": 0.5657826082869326, |
| "learning_rate": 1.1878965167941658e-06, |
| "loss": 0.3279, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.4019918515165233, |
| "grad_norm": 0.5583977788315128, |
| "learning_rate": 1.1652669730848837e-06, |
| "loss": 0.3259, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.4080277652029576, |
| "grad_norm": 0.5670227130617606, |
| "learning_rate": 1.1428265919856057e-06, |
| "loss": 0.3319, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.414063678889392, |
| "grad_norm": 0.5345020446470288, |
| "learning_rate": 1.1205764804524172e-06, |
| "loss": 0.3258, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.420099592575826, |
| "grad_norm": 0.5742530447532448, |
| "learning_rate": 1.0985177360556421e-06, |
| "loss": 0.3281, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.4261355062622605, |
| "grad_norm": 0.5681633515485598, |
| "learning_rate": 1.0766514469257006e-06, |
| "loss": 0.33, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.432171419948695, |
| "grad_norm": 0.5469547021834809, |
| "learning_rate": 1.0549786916994387e-06, |
| "loss": 0.3271, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.438207333635129, |
| "grad_norm": 0.5467836338693935, |
| "learning_rate": 1.0335005394669062e-06, |
| "loss": 0.3282, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.4442432473215634, |
| "grad_norm": 0.5496370736783344, |
| "learning_rate": 1.012218049718639e-06, |
| "loss": 0.3267, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.4502791610079977, |
| "grad_norm": 0.5532695447765059, |
| "learning_rate": 9.911322722933825e-07, |
| "loss": 0.3267, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.456315074694432, |
| "grad_norm": 0.5593061519759683, |
| "learning_rate": 9.702442473263035e-07, |
| "loss": 0.3261, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.4623509883808663, |
| "grad_norm": 0.5675718980431652, |
| "learning_rate": 9.495550051976937e-07, |
| "loss": 0.33, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.4683869020673006, |
| "grad_norm": 0.55036807877547, |
| "learning_rate": 9.290655664821296e-07, |
| "loss": 0.326, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.474422815753735, |
| "grad_norm": 0.6047393707132771, |
| "learning_rate": 9.087769418981352e-07, |
| "loss": 0.3294, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.480458729440169, |
| "grad_norm": 0.5377856224781872, |
| "learning_rate": 8.88690132258323e-07, |
| "loss": 0.3301, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.486494643126603, |
| "grad_norm": 0.5404023215833121, |
| "learning_rate": 8.688061284200266e-07, |
| "loss": 0.3308, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.492530556813038, |
| "grad_norm": 0.5429038964087051, |
| "learning_rate": 8.491259112364192e-07, |
| "loss": 0.3277, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.4985664704994717, |
| "grad_norm": 0.5556392061166345, |
| "learning_rate": 8.296504515081333e-07, |
| "loss": 0.328, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.5046023841859064, |
| "grad_norm": 0.5550852188468128, |
| "learning_rate": 8.103807099353733e-07, |
| "loss": 0.3303, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.5106382978723403, |
| "grad_norm": 0.5683960534884703, |
| "learning_rate": 7.913176370705166e-07, |
| "loss": 0.3303, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.5166742115587746, |
| "grad_norm": 0.5647058376594801, |
| "learning_rate": 7.724621732712373e-07, |
| "loss": 0.3281, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.522710125245209, |
| "grad_norm": 0.5396463872633352, |
| "learning_rate": 7.538152486541078e-07, |
| "loss": 0.3224, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.528746038931643, |
| "grad_norm": 0.5769965957501234, |
| "learning_rate": 7.353777830487247e-07, |
| "loss": 0.3298, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.5347819526180775, |
| "grad_norm": 0.5617546845646423, |
| "learning_rate": 7.171506859523298e-07, |
| "loss": 0.3284, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.540817866304512, |
| "grad_norm": 0.5370456459767287, |
| "learning_rate": 6.991348564849504e-07, |
| "loss": 0.3272, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.546853779990946, |
| "grad_norm": 0.5449920129863155, |
| "learning_rate": 6.813311833450426e-07, |
| "loss": 0.3244, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.5528896936773804, |
| "grad_norm": 0.5814796250543772, |
| "learning_rate": 6.637405447656542e-07, |
| "loss": 0.3286, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.5589256073638147, |
| "grad_norm": 0.5802300234417045, |
| "learning_rate": 6.463638084711088e-07, |
| "loss": 0.3303, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.564961521050249, |
| "grad_norm": 0.5682016106324166, |
| "learning_rate": 6.29201831634188e-07, |
| "loss": 0.3275, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.5709974347366833, |
| "grad_norm": 0.628799960343276, |
| "learning_rate": 6.122554608338605e-07, |
| "loss": 0.3278, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.5770333484231176, |
| "grad_norm": 0.5261749879449605, |
| "learning_rate": 5.955255320135195e-07, |
| "loss": 0.3287, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.583069262109552, |
| "grad_norm": 0.5365103226953842, |
| "learning_rate": 5.790128704397424e-07, |
| "loss": 0.3242, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.589105175795986, |
| "grad_norm": 0.5482210552849281, |
| "learning_rate": 5.627182906615825e-07, |
| "loss": 0.3254, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.5951410894824205, |
| "grad_norm": 0.5270093070193902, |
| "learning_rate": 5.466425964703914e-07, |
| "loss": 0.3268, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.601177003168855, |
| "grad_norm": 0.5351843851712077, |
| "learning_rate": 5.307865808601664e-07, |
| "loss": 0.3267, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.607212916855289, |
| "grad_norm": 0.5551045883829538, |
| "learning_rate": 5.151510259884329e-07, |
| "loss": 0.3261, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.6132488305417234, |
| "grad_norm": 0.5716515174477422, |
| "learning_rate": 4.997367031376627e-07, |
| "loss": 0.3283, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.6192847442281577, |
| "grad_norm": 0.5484469831279773, |
| "learning_rate": 4.84544372677228e-07, |
| "loss": 0.3279, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.6253206579145916, |
| "grad_norm": 0.5627722024643765, |
| "learning_rate": 4.6957478402589076e-07, |
| "loss": 0.3285, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.6313565716010263, |
| "grad_norm": 0.54030007506572, |
| "learning_rate": 4.548286756148401e-07, |
| "loss": 0.328, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.63739248528746, |
| "grad_norm": 0.5688872966757411, |
| "learning_rate": 4.4030677485125906e-07, |
| "loss": 0.3291, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.643428398973895, |
| "grad_norm": 0.5611453338620043, |
| "learning_rate": 4.2600979808244627e-07, |
| "loss": 0.3267, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.649464312660329, |
| "grad_norm": 0.5591585705521456, |
| "learning_rate": 4.119384505604834e-07, |
| "loss": 0.3285, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.655500226346763, |
| "grad_norm": 0.5403567309346599, |
| "learning_rate": 3.980934264074393e-07, |
| "loss": 0.3234, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.6615361400331974, |
| "grad_norm": 0.5366841662024877, |
| "learning_rate": 3.8447540858113197e-07, |
| "loss": 0.3289, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.6675720537196317, |
| "grad_norm": 0.5505493242335168, |
| "learning_rate": 3.710850688414419e-07, |
| "loss": 0.329, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.673607967406066, |
| "grad_norm": 0.5572305600353893, |
| "learning_rate": 3.579230677171702e-07, |
| "loss": 0.326, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.6796438810925003, |
| "grad_norm": 0.5320801899819191, |
| "learning_rate": 3.4499005447346024e-07, |
| "loss": 0.3272, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.6856797947789346, |
| "grad_norm": 0.5621605698475473, |
| "learning_rate": 3.32286667079767e-07, |
| "loss": 0.3232, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.691715708465369, |
| "grad_norm": 0.5535800034831663, |
| "learning_rate": 3.1981353217838853e-07, |
| "loss": 0.3267, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.697751622151803, |
| "grad_norm": 0.5541989505631728, |
| "learning_rate": 3.0757126505355284e-07, |
| "loss": 0.3271, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.7037875358382375, |
| "grad_norm": 0.554309743511386, |
| "learning_rate": 2.9556046960106997e-07, |
| "loss": 0.3275, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.709823449524672, |
| "grad_norm": 0.5441084268121339, |
| "learning_rate": 2.837817382985375e-07, |
| "loss": 0.3265, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.715859363211106, |
| "grad_norm": 0.5661752729331364, |
| "learning_rate": 2.722356521761188e-07, |
| "loss": 0.3251, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.7218952768975404, |
| "grad_norm": 0.5284497315283775, |
| "learning_rate": 2.6092278078788004e-07, |
| "loss": 0.3249, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.7279311905839747, |
| "grad_norm": 0.52978683625873, |
| "learning_rate": 2.4984368218369305e-07, |
| "loss": 0.3282, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.733967104270409, |
| "grad_norm": 0.5435219044017648, |
| "learning_rate": 2.389989028817108e-07, |
| "loss": 0.3283, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.7400030179568433, |
| "grad_norm": 0.5516780362582209, |
| "learning_rate": 2.2838897784140612e-07, |
| "loss": 0.3274, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.7460389316432776, |
| "grad_norm": 0.546719555306795, |
| "learning_rate": 2.1801443043718285e-07, |
| "loss": 0.3298, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.752074845329712, |
| "grad_norm": 0.5563980632574993, |
| "learning_rate": 2.0787577243255807e-07, |
| "loss": 0.3267, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.758110759016146, |
| "grad_norm": 0.535638021015215, |
| "learning_rate": 1.9797350395492077e-07, |
| "loss": 0.3253, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.76414667270258, |
| "grad_norm": 0.5347609121819951, |
| "learning_rate": 1.8830811347085697e-07, |
| "loss": 0.3252, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.770182586389015, |
| "grad_norm": 0.5415863482391344, |
| "learning_rate": 1.788800777620542e-07, |
| "loss": 0.3276, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.7762185000754487, |
| "grad_norm": 0.5466212280219622, |
| "learning_rate": 1.6968986190178728e-07, |
| "loss": 0.326, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.7822544137618834, |
| "grad_norm": 0.5391843573715891, |
| "learning_rate": 1.60737919231973e-07, |
| "loss": 0.3265, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.7882903274483173, |
| "grad_norm": 0.5465887305789703, |
| "learning_rate": 1.5202469134080633e-07, |
| "loss": 0.3291, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.794326241134752, |
| "grad_norm": 0.5447449635613493, |
| "learning_rate": 1.4355060804098043e-07, |
| "loss": 0.3254, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.800362154821186, |
| "grad_norm": 0.5376834372862567, |
| "learning_rate": 1.3531608734848433e-07, |
| "loss": 0.3252, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.80639806850762, |
| "grad_norm": 0.5419447242645747, |
| "learning_rate": 1.273215354619789e-07, |
| "loss": 0.3277, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.8124339821940545, |
| "grad_norm": 0.521436211709283, |
| "learning_rate": 1.1956734674276492e-07, |
| "loss": 0.3267, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.818469895880489, |
| "grad_norm": 0.5443036316275357, |
| "learning_rate": 1.1205390369532553e-07, |
| "loss": 0.328, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.824505809566923, |
| "grad_norm": 0.5736771187575125, |
| "learning_rate": 1.0478157694846002e-07, |
| "loss": 0.3269, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.8305417232533574, |
| "grad_norm": 0.5533030963421177, |
| "learning_rate": 9.775072523700135e-08, |
| "loss": 0.3274, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.8365776369397917, |
| "grad_norm": 0.5297867847542854, |
| "learning_rate": 9.096169538411747e-08, |
| "loss": 0.3251, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.842613550626226, |
| "grad_norm": 0.5603590658940372, |
| "learning_rate": 8.441482228420505e-08, |
| "loss": 0.3261, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.8486494643126603, |
| "grad_norm": 0.5648155137748375, |
| "learning_rate": 7.81104288863721e-08, |
| "loss": 0.3238, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.8546853779990946, |
| "grad_norm": 0.5155233113764542, |
| "learning_rate": 7.204882617850129e-08, |
| "loss": 0.3284, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.860721291685529, |
| "grad_norm": 0.5283055469638852, |
| "learning_rate": 6.623031317191386e-08, |
| "loss": 0.3243, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.866757205371963, |
| "grad_norm": 0.5324757215458941, |
| "learning_rate": 6.065517688661926e-08, |
| "loss": 0.3266, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.8727931190583975, |
| "grad_norm": 0.5270694862009192, |
| "learning_rate": 5.532369233715418e-08, |
| "loss": 0.3263, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.878829032744832, |
| "grad_norm": 0.5184850936640313, |
| "learning_rate": 5.02361225190201e-08, |
| "loss": 0.325, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.884864946431266, |
| "grad_norm": 0.5522807685327075, |
| "learning_rate": 4.539271839570702e-08, |
| "loss": 0.3303, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.8909008601177004, |
| "grad_norm": 0.5833975533295399, |
| "learning_rate": 4.079371888631667e-08, |
| "loss": 0.3287, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.8969367738041347, |
| "grad_norm": 0.5435014494666157, |
| "learning_rate": 3.643935085377193e-08, |
| "loss": 0.3291, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.902972687490569, |
| "grad_norm": 0.5317951774661862, |
| "learning_rate": 3.232982909363247e-08, |
| "loss": 0.3302, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.9090086011770033, |
| "grad_norm": 0.5470417295465569, |
| "learning_rate": 2.8465356323494897e-08, |
| "loss": 0.3293, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.915044514863437, |
| "grad_norm": 0.5361189628769133, |
| "learning_rate": 2.4846123172992953e-08, |
| "loss": 0.3281, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.921080428549872, |
| "grad_norm": 0.54177713240335, |
| "learning_rate": 2.147230817439616e-08, |
| "loss": 0.326, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.927116342236306, |
| "grad_norm": 0.5323052126594137, |
| "learning_rate": 1.834407775380187e-08, |
| "loss": 0.3281, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.9331522559227405, |
| "grad_norm": 0.5359903004283559, |
| "learning_rate": 1.5461586222924596e-08, |
| "loss": 0.3261, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.9391881696091744, |
| "grad_norm": 0.5543427271655068, |
| "learning_rate": 1.2824975771486558e-08, |
| "loss": 0.3264, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.945224083295609, |
| "grad_norm": 0.5384618149718552, |
| "learning_rate": 1.0434376460201067e-08, |
| "loss": 0.3271, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.951259996982043, |
| "grad_norm": 0.5121547980752482, |
| "learning_rate": 8.289906214358767e-09, |
| "loss": 0.3252, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.9572959106684773, |
| "grad_norm": 0.5126843579972032, |
| "learning_rate": 6.391670818008955e-09, |
| "loss": 0.3255, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.9633318243549116, |
| "grad_norm": 0.5323524113852374, |
| "learning_rate": 4.7397639087432e-09, |
| "loss": 0.3267, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.969367738041346, |
| "grad_norm": 0.5324354968490875, |
| "learning_rate": 3.3342669730729303e-09, |
| "loss": 0.3255, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.97540365172778, |
| "grad_norm": 0.550825930999869, |
| "learning_rate": 2.1752493424148647e-09, |
| "loss": 0.328, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.9814395654142145, |
| "grad_norm": 0.5289245768111625, |
| "learning_rate": 1.2627681896670852e-09, |
| "loss": 0.3265, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.987475479100649, |
| "grad_norm": 0.5548931035545003, |
| "learning_rate": 5.968685263885165e-10, |
| "loss": 0.329, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.993511392787083, |
| "grad_norm": 0.5268141741402684, |
| "learning_rate": 1.7758320058236522e-10, |
| "loss": 0.3264, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.9995473064735174, |
| "grad_norm": 0.5394909098085136, |
| "learning_rate": 4.932895071863009e-12, |
| "loss": 0.3267, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 4971, |
| "total_flos": 3906508525600768.0, |
| "train_loss": 0.3714317911300974, |
| "train_runtime": 271631.3463, |
| "train_samples_per_second": 4.684, |
| "train_steps_per_second": 0.018 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4971, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3906508525600768.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|