{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 120.0,
  "eval_steps": 500,
  "global_step": 46440,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.12919896640826872,
      "grad_norm": 0.9917405843734741,
      "learning_rate": 0.0004994616709732988,
      "loss": 7.7308,
      "step": 50
    },
    {
      "epoch": 0.25839793281653745,
      "grad_norm": 0.6762784123420715,
      "learning_rate": 0.0004989233419465977,
      "loss": 6.9663,
      "step": 100
    },
    {
      "epoch": 0.3875968992248062,
      "grad_norm": 0.7483344078063965,
      "learning_rate": 0.0004983850129198967,
      "loss": 6.7476,
      "step": 150
    },
    {
      "epoch": 0.5167958656330749,
      "grad_norm": 0.6773372888565063,
      "learning_rate": 0.0004978466838931956,
      "loss": 6.568,
      "step": 200
    },
    {
      "epoch": 0.6459948320413437,
      "grad_norm": 0.7506688833236694,
      "learning_rate": 0.0004973083548664944,
      "loss": 6.4322,
      "step": 250
    },
    {
      "epoch": 0.7751937984496124,
      "grad_norm": 0.6851488351821899,
      "learning_rate": 0.0004967700258397933,
      "loss": 6.2898,
      "step": 300
    },
    {
      "epoch": 0.9043927648578811,
      "grad_norm": 0.6121354103088379,
      "learning_rate": 0.0004962316968130921,
      "loss": 6.2314,
      "step": 350
    },
    {
      "epoch": 1.0335917312661498,
      "grad_norm": 0.6324535608291626,
      "learning_rate": 0.000495693367786391,
      "loss": 6.1388,
      "step": 400
    },
    {
      "epoch": 1.1627906976744187,
      "grad_norm": 0.7013489007949829,
      "learning_rate": 0.00049515503875969,
      "loss": 6.0423,
      "step": 450
    },
    {
      "epoch": 1.2919896640826873,
      "grad_norm": 0.709388792514801,
      "learning_rate": 0.0004946167097329889,
      "loss": 6.0031,
      "step": 500
    },
    {
      "epoch": 1.421188630490956,
      "grad_norm": 0.6075559854507446,
      "learning_rate": 0.0004940783807062877,
      "loss": 5.9879,
      "step": 550
    },
    {
      "epoch": 1.550387596899225,
      "grad_norm": 0.7773743271827698,
      "learning_rate": 0.0004935400516795866,
      "loss": 5.9289,
      "step": 600
    },
    {
      "epoch": 1.6795865633074936,
      "grad_norm": 0.6793815493583679,
      "learning_rate": 0.0004930017226528854,
      "loss": 5.913,
      "step": 650
    },
    {
      "epoch": 1.8087855297157622,
      "grad_norm": 0.6257513761520386,
      "learning_rate": 0.0004924633936261843,
      "loss": 5.8669,
      "step": 700
    },
    {
      "epoch": 1.937984496124031,
      "grad_norm": 0.6651344299316406,
      "learning_rate": 0.0004919250645994833,
      "loss": 5.8144,
      "step": 750
    },
    {
      "epoch": 2.0671834625322996,
      "grad_norm": 0.7016419172286987,
      "learning_rate": 0.0004913867355727821,
      "loss": 5.723,
      "step": 800
    },
    {
      "epoch": 2.1963824289405687,
      "grad_norm": 0.6791759133338928,
      "learning_rate": 0.000490848406546081,
      "loss": 5.6373,
      "step": 850
    },
    {
      "epoch": 2.3255813953488373,
      "grad_norm": 0.5815697908401489,
      "learning_rate": 0.0004903100775193798,
      "loss": 5.6218,
      "step": 900
    },
    {
      "epoch": 2.454780361757106,
      "grad_norm": 0.688028872013092,
      "learning_rate": 0.0004897717484926787,
      "loss": 5.6122,
      "step": 950
    },
    {
      "epoch": 2.5839793281653747,
      "grad_norm": 0.7259660959243774,
      "learning_rate": 0.0004892334194659776,
      "loss": 5.6035,
      "step": 1000
    },
    {
      "epoch": 2.7131782945736433,
      "grad_norm": 0.6235421299934387,
      "learning_rate": 0.0004886950904392765,
      "loss": 5.6196,
      "step": 1050
    },
    {
      "epoch": 2.842377260981912,
      "grad_norm": 0.7457830905914307,
      "learning_rate": 0.00048815676141257536,
      "loss": 5.6065,
      "step": 1100
    },
    {
      "epoch": 2.971576227390181,
      "grad_norm": 0.6832508444786072,
      "learning_rate": 0.0004876184323858743,
      "loss": 5.6163,
      "step": 1150
    },
    {
      "epoch": 3.10077519379845,
      "grad_norm": 0.6513948440551758,
      "learning_rate": 0.00048708010335917314,
      "loss": 5.4044,
      "step": 1200
    },
    {
      "epoch": 3.2299741602067185,
      "grad_norm": 0.7611924409866333,
      "learning_rate": 0.00048654177433247206,
      "loss": 5.3747,
      "step": 1250
    },
    {
      "epoch": 3.359173126614987,
      "grad_norm": 0.7044627070426941,
      "learning_rate": 0.00048600344530577087,
      "loss": 5.4291,
      "step": 1300
    },
    {
      "epoch": 3.488372093023256,
      "grad_norm": 0.7278909683227539,
      "learning_rate": 0.00048546511627906973,
      "loss": 5.4147,
      "step": 1350
    },
    {
      "epoch": 3.6175710594315245,
      "grad_norm": 0.7509022951126099,
      "learning_rate": 0.00048492678725236865,
      "loss": 5.4163,
      "step": 1400
    },
    {
      "epoch": 3.746770025839793,
      "grad_norm": 0.6917663812637329,
      "learning_rate": 0.0004843884582256675,
      "loss": 5.3963,
      "step": 1450
    },
    {
      "epoch": 3.875968992248062,
      "grad_norm": 0.7984724044799805,
      "learning_rate": 0.00048385012919896644,
      "loss": 5.3657,
      "step": 1500
    },
    {
      "epoch": 4.0051679586563305,
      "grad_norm": 0.6966680884361267,
      "learning_rate": 0.0004833118001722653,
      "loss": 5.3756,
      "step": 1550
    },
    {
      "epoch": 4.134366925064599,
      "grad_norm": 0.8046667575836182,
      "learning_rate": 0.00048277347114556417,
      "loss": 5.1746,
      "step": 1600
    },
    {
      "epoch": 4.263565891472869,
      "grad_norm": 0.7066221833229065,
      "learning_rate": 0.00048223514211886303,
      "loss": 5.2074,
      "step": 1650
    },
    {
      "epoch": 4.392764857881137,
      "grad_norm": 0.895158052444458,
      "learning_rate": 0.00048169681309216195,
      "loss": 5.2184,
      "step": 1700
    },
    {
      "epoch": 4.521963824289406,
      "grad_norm": 0.8543861508369446,
      "learning_rate": 0.0004811584840654608,
      "loss": 5.2353,
      "step": 1750
    },
    {
      "epoch": 4.651162790697675,
      "grad_norm": 0.7139029502868652,
      "learning_rate": 0.00048062015503875973,
      "loss": 5.2094,
      "step": 1800
    },
    {
      "epoch": 4.780361757105943,
      "grad_norm": 0.7855068445205688,
      "learning_rate": 0.0004800818260120586,
      "loss": 5.2364,
      "step": 1850
    },
    {
      "epoch": 4.909560723514212,
      "grad_norm": 0.6991411447525024,
      "learning_rate": 0.00047954349698535746,
      "loss": 5.2289,
      "step": 1900
    },
    {
      "epoch": 5.038759689922481,
      "grad_norm": 0.7080933451652527,
      "learning_rate": 0.0004790051679586563,
      "loss": 5.1262,
      "step": 1950
    },
    {
      "epoch": 5.167958656330749,
      "grad_norm": 0.7555293440818787,
      "learning_rate": 0.0004784668389319552,
      "loss": 4.9888,
      "step": 2000
    },
    {
      "epoch": 5.297157622739018,
      "grad_norm": 0.7009134888648987,
      "learning_rate": 0.0004779285099052541,
      "loss": 5.0354,
      "step": 2050
    },
    {
      "epoch": 5.426356589147287,
      "grad_norm": 0.7513852715492249,
      "learning_rate": 0.000477390180878553,
      "loss": 5.0477,
      "step": 2100
    },
    {
      "epoch": 5.555555555555555,
      "grad_norm": 0.7191789150238037,
      "learning_rate": 0.0004768518518518519,
      "loss": 5.0678,
      "step": 2150
    },
    {
      "epoch": 5.684754521963824,
      "grad_norm": 0.7183817625045776,
      "learning_rate": 0.0004763135228251507,
      "loss": 5.0628,
      "step": 2200
    },
    {
      "epoch": 5.813953488372093,
      "grad_norm": 0.6737208962440491,
      "learning_rate": 0.0004757751937984496,
      "loss": 5.0434,
      "step": 2250
    },
    {
      "epoch": 5.943152454780362,
      "grad_norm": 0.7439269423484802,
      "learning_rate": 0.0004752368647717485,
      "loss": 5.0775,
      "step": 2300
    },
    {
      "epoch": 6.072351421188631,
      "grad_norm": 0.7350853085517883,
      "learning_rate": 0.0004746985357450474,
      "loss": 4.9399,
      "step": 2350
    },
    {
      "epoch": 6.2015503875969,
      "grad_norm": 0.8478549718856812,
      "learning_rate": 0.00047416020671834627,
      "loss": 4.8426,
      "step": 2400
    },
    {
      "epoch": 6.330749354005168,
      "grad_norm": 0.7773024439811707,
      "learning_rate": 0.0004736218776916452,
      "loss": 4.8553,
      "step": 2450
    },
    {
      "epoch": 6.459948320413437,
      "grad_norm": 0.7779753804206848,
      "learning_rate": 0.000473083548664944,
      "loss": 4.9105,
      "step": 2500
    },
    {
      "epoch": 6.589147286821706,
      "grad_norm": 0.7040427923202515,
      "learning_rate": 0.0004725452196382429,
      "loss": 4.9099,
      "step": 2550
    },
    {
      "epoch": 6.718346253229974,
      "grad_norm": 0.7401306629180908,
      "learning_rate": 0.0004720068906115418,
      "loss": 4.9181,
      "step": 2600
    },
    {
      "epoch": 6.847545219638243,
      "grad_norm": 0.7807409167289734,
      "learning_rate": 0.00047146856158484065,
      "loss": 4.9113,
      "step": 2650
    },
    {
      "epoch": 6.976744186046512,
      "grad_norm": 0.6973236203193665,
      "learning_rate": 0.00047093023255813957,
      "loss": 4.9278,
      "step": 2700
    },
    {
      "epoch": 7.10594315245478,
      "grad_norm": 0.725831151008606,
      "learning_rate": 0.0004703919035314384,
      "loss": 4.7418,
      "step": 2750
    },
    {
      "epoch": 7.235142118863049,
      "grad_norm": 0.7228904366493225,
      "learning_rate": 0.0004698535745047373,
      "loss": 4.7114,
      "step": 2800
    },
    {
      "epoch": 7.364341085271318,
      "grad_norm": 0.8759543895721436,
      "learning_rate": 0.00046931524547803616,
      "loss": 4.7837,
      "step": 2850
    },
    {
      "epoch": 7.493540051679586,
      "grad_norm": 0.7154684066772461,
      "learning_rate": 0.0004687769164513351,
      "loss": 4.7535,
      "step": 2900
    },
    {
      "epoch": 7.622739018087855,
      "grad_norm": 0.7702780365943909,
      "learning_rate": 0.00046823858742463394,
      "loss": 4.7394,
      "step": 2950
    },
    {
      "epoch": 7.751937984496124,
      "grad_norm": 0.7203558683395386,
      "learning_rate": 0.00046770025839793286,
      "loss": 4.7651,
      "step": 3000
    },
    {
      "epoch": 7.881136950904393,
      "grad_norm": 0.7156451940536499,
      "learning_rate": 0.0004671619293712317,
      "loss": 4.7152,
      "step": 3050
    },
    {
      "epoch": 8.010335917312661,
      "grad_norm": 0.7143818736076355,
      "learning_rate": 0.0004666236003445306,
      "loss": 4.7618,
      "step": 3100
    },
    {
      "epoch": 8.13953488372093,
      "grad_norm": 0.7013837695121765,
      "learning_rate": 0.00046608527131782946,
      "loss": 4.5375,
      "step": 3150
    },
    {
      "epoch": 8.268733850129198,
      "grad_norm": 0.774198055267334,
      "learning_rate": 0.0004655469422911284,
      "loss": 4.5631,
      "step": 3200
    },
    {
      "epoch": 8.397932816537468,
      "grad_norm": 0.7553951740264893,
      "learning_rate": 0.00046500861326442724,
      "loss": 4.5684,
      "step": 3250
    },
    {
      "epoch": 8.527131782945737,
      "grad_norm": 0.8197945356369019,
      "learning_rate": 0.0004644702842377261,
      "loss": 4.5987,
      "step": 3300
    },
    {
      "epoch": 8.656330749354005,
      "grad_norm": 0.7249457836151123,
      "learning_rate": 0.00046393195521102497,
      "loss": 4.5985,
      "step": 3350
    },
    {
      "epoch": 8.785529715762275,
      "grad_norm": 0.6863015294075012,
      "learning_rate": 0.00046339362618432383,
      "loss": 4.6153,
      "step": 3400
    },
    {
      "epoch": 8.914728682170542,
      "grad_norm": 0.8114822506904602,
      "learning_rate": 0.00046285529715762275,
      "loss": 4.6149,
      "step": 3450
    },
    {
      "epoch": 9.043927648578812,
      "grad_norm": 0.7665704488754272,
      "learning_rate": 0.0004623169681309216,
      "loss": 4.5479,
      "step": 3500
    },
    {
      "epoch": 9.17312661498708,
      "grad_norm": 0.7534597516059875,
      "learning_rate": 0.00046177863910422054,
      "loss": 4.3759,
      "step": 3550
    },
    {
      "epoch": 9.30232558139535,
      "grad_norm": 0.7461442351341248,
      "learning_rate": 0.0004612403100775194,
      "loss": 4.4284,
      "step": 3600
    },
    {
      "epoch": 9.431524547803617,
      "grad_norm": 0.7281939387321472,
      "learning_rate": 0.00046070198105081827,
      "loss": 4.4657,
      "step": 3650
    },
    {
      "epoch": 9.560723514211887,
      "grad_norm": 0.782497227191925,
      "learning_rate": 0.00046016365202411713,
      "loss": 4.4393,
      "step": 3700
    },
    {
      "epoch": 9.689922480620154,
      "grad_norm": 0.7056346535682678,
      "learning_rate": 0.00045962532299741605,
      "loss": 4.4573,
      "step": 3750
    },
    {
      "epoch": 9.819121447028424,
      "grad_norm": 0.8219366669654846,
      "learning_rate": 0.0004590869939707149,
      "loss": 4.4681,
      "step": 3800
    },
    {
      "epoch": 9.948320413436692,
      "grad_norm": 0.7478976249694824,
      "learning_rate": 0.00045854866494401383,
      "loss": 4.4559,
      "step": 3850
    },
    {
      "epoch": 10.077519379844961,
      "grad_norm": 0.7888408303260803,
      "learning_rate": 0.00045801033591731264,
      "loss": 4.3124,
      "step": 3900
    },
    {
      "epoch": 10.20671834625323,
      "grad_norm": 0.7403283715248108,
      "learning_rate": 0.0004574720068906115,
      "loss": 4.2445,
      "step": 3950
    },
    {
      "epoch": 10.335917312661499,
      "grad_norm": 0.7731178402900696,
      "learning_rate": 0.0004569336778639104,
      "loss": 4.2635,
      "step": 4000
    },
    {
      "epoch": 10.465116279069768,
      "grad_norm": 0.7026334404945374,
      "learning_rate": 0.0004563953488372093,
      "loss": 4.2916,
      "step": 4050
    },
    {
      "epoch": 10.594315245478036,
      "grad_norm": 0.7333751916885376,
      "learning_rate": 0.0004558570198105082,
      "loss": 4.278,
      "step": 4100
    },
    {
      "epoch": 10.723514211886306,
      "grad_norm": 0.954624593257904,
      "learning_rate": 0.0004553186907838071,
      "loss": 4.2896,
      "step": 4150
    },
    {
      "epoch": 10.852713178294573,
      "grad_norm": 0.710111141204834,
      "learning_rate": 0.00045478036175710594,
      "loss": 4.2906,
      "step": 4200
    },
    {
      "epoch": 10.981912144702843,
      "grad_norm": 0.7165260314941406,
      "learning_rate": 0.0004542420327304048,
      "loss": 4.3577,
      "step": 4250
    },
    {
      "epoch": 11.11111111111111,
      "grad_norm": 0.7047768831253052,
      "learning_rate": 0.0004537037037037037,
      "loss": 4.0813,
      "step": 4300
    },
    {
      "epoch": 11.24031007751938,
      "grad_norm": 0.733347475528717,
      "learning_rate": 0.0004531653746770026,
      "loss": 4.0638,
      "step": 4350
    },
    {
      "epoch": 11.369509043927648,
      "grad_norm": 0.7578668594360352,
      "learning_rate": 0.0004526270456503015,
      "loss": 4.1124,
      "step": 4400
    },
    {
      "epoch": 11.498708010335918,
      "grad_norm": 0.7754192352294922,
      "learning_rate": 0.00045208871662360037,
      "loss": 4.1428,
      "step": 4450
    },
    {
      "epoch": 11.627906976744185,
      "grad_norm": 0.7117652893066406,
      "learning_rate": 0.00045155038759689924,
      "loss": 4.1332,
      "step": 4500
    },
    {
      "epoch": 11.757105943152455,
      "grad_norm": 0.7095139026641846,
      "learning_rate": 0.0004510120585701981,
      "loss": 4.1521,
      "step": 4550
    },
    {
      "epoch": 11.886304909560723,
      "grad_norm": 0.7314981818199158,
      "learning_rate": 0.00045047372954349697,
      "loss": 4.1494,
      "step": 4600
    },
    {
      "epoch": 12.015503875968992,
      "grad_norm": 0.7254127264022827,
      "learning_rate": 0.0004499354005167959,
      "loss": 4.1582,
      "step": 4650
    },
    {
      "epoch": 12.144702842377262,
      "grad_norm": 0.7222834229469299,
      "learning_rate": 0.00044939707149009475,
      "loss": 3.887,
      "step": 4700
    },
    {
      "epoch": 12.27390180878553,
      "grad_norm": 0.7748265862464905,
      "learning_rate": 0.00044885874246339367,
      "loss": 3.9115,
      "step": 4750
    },
    {
      "epoch": 12.4031007751938,
      "grad_norm": 0.7006997466087341,
      "learning_rate": 0.0004483204134366925,
      "loss": 3.9553,
      "step": 4800
    },
    {
      "epoch": 12.532299741602067,
      "grad_norm": 0.7475093007087708,
      "learning_rate": 0.0004477820844099914,
      "loss": 3.9835,
      "step": 4850
    },
    {
      "epoch": 12.661498708010337,
      "grad_norm": 0.7333974242210388,
      "learning_rate": 0.00044724375538329026,
      "loss": 3.9859,
      "step": 4900
    },
    {
      "epoch": 12.790697674418604,
      "grad_norm": 0.7468938231468201,
      "learning_rate": 0.0004467054263565892,
      "loss": 3.9911,
      "step": 4950
    },
    {
      "epoch": 12.919896640826874,
      "grad_norm": 0.710841178894043,
      "learning_rate": 0.00044616709732988804,
      "loss": 4.0315,
      "step": 5000
    },
    {
      "epoch": 13.049095607235142,
      "grad_norm": 0.7264716029167175,
      "learning_rate": 0.00044562876830318696,
      "loss": 3.9268,
      "step": 5050
    },
    {
      "epoch": 13.178294573643411,
      "grad_norm": 0.730278491973877,
      "learning_rate": 0.0004450904392764858,
      "loss": 3.7096,
      "step": 5100
    },
    {
      "epoch": 13.307493540051679,
      "grad_norm": 0.7709214687347412,
      "learning_rate": 0.0004445521102497847,
      "loss": 3.7462,
      "step": 5150
    },
    {
      "epoch": 13.436692506459949,
      "grad_norm": 0.737932026386261,
      "learning_rate": 0.00044401378122308356,
      "loss": 3.8213,
      "step": 5200
    },
    {
      "epoch": 13.565891472868216,
      "grad_norm": 0.7939437627792358,
      "learning_rate": 0.0004434754521963824,
      "loss": 3.8186,
      "step": 5250
    },
    {
      "epoch": 13.695090439276486,
      "grad_norm": 0.743143618106842,
      "learning_rate": 0.00044293712316968134,
      "loss": 3.848,
      "step": 5300
    },
    {
      "epoch": 13.824289405684755,
      "grad_norm": 0.7637361288070679,
      "learning_rate": 0.00044239879414298015,
      "loss": 3.8735,
      "step": 5350
    },
    {
      "epoch": 13.953488372093023,
      "grad_norm": 0.7697912454605103,
      "learning_rate": 0.00044186046511627907,
      "loss": 3.8798,
      "step": 5400
    },
    {
      "epoch": 14.082687338501293,
      "grad_norm": 0.7684032917022705,
      "learning_rate": 0.00044132213608957793,
      "loss": 3.6829,
      "step": 5450
    },
    {
      "epoch": 14.21188630490956,
      "grad_norm": 0.7472913861274719,
      "learning_rate": 0.00044078380706287685,
      "loss": 3.5535,
      "step": 5500
    },
    {
      "epoch": 14.34108527131783,
      "grad_norm": 0.7673872113227844,
      "learning_rate": 0.0004402454780361757,
      "loss": 3.6403,
      "step": 5550
    },
    {
      "epoch": 14.470284237726098,
      "grad_norm": 0.7946407198905945,
      "learning_rate": 0.00043970714900947464,
      "loss": 3.6268,
      "step": 5600
    },
    {
      "epoch": 14.599483204134367,
      "grad_norm": 0.8375792503356934,
      "learning_rate": 0.00043916881998277345,
      "loss": 3.682,
      "step": 5650
    },
    {
      "epoch": 14.728682170542635,
      "grad_norm": 0.7884207367897034,
      "learning_rate": 0.00043863049095607237,
      "loss": 3.7157,
      "step": 5700
    },
    {
      "epoch": 14.857881136950905,
      "grad_norm": 0.7757668495178223,
      "learning_rate": 0.00043809216192937123,
      "loss": 3.7187,
      "step": 5750
    },
    {
      "epoch": 14.987080103359173,
      "grad_norm": 0.8297176957130432,
      "learning_rate": 0.00043755383290267015,
      "loss": 3.7363,
      "step": 5800
    },
    {
      "epoch": 15.116279069767442,
      "grad_norm": 0.8003735542297363,
      "learning_rate": 0.000437015503875969,
      "loss": 3.4399,
      "step": 5850
    },
    {
      "epoch": 15.24547803617571,
      "grad_norm": 0.7709876894950867,
      "learning_rate": 0.0004364771748492679,
      "loss": 3.4298,
      "step": 5900
    },
    {
      "epoch": 15.37467700258398,
      "grad_norm": 0.7903456091880798,
      "learning_rate": 0.00043593884582256674,
      "loss": 3.4718,
      "step": 5950
    },
    {
      "epoch": 15.503875968992247,
      "grad_norm": 0.811981201171875,
      "learning_rate": 0.0004354005167958656,
      "loss": 3.4655,
      "step": 6000
    },
    {
      "epoch": 15.633074935400517,
      "grad_norm": 0.7823837995529175,
      "learning_rate": 0.0004348621877691645,
      "loss": 3.5308,
      "step": 6050
    },
    {
      "epoch": 15.762273901808786,
      "grad_norm": 0.7559253573417664,
      "learning_rate": 0.0004343238587424634,
      "loss": 3.532,
      "step": 6100
    },
    {
      "epoch": 15.891472868217054,
      "grad_norm": 0.7838237285614014,
      "learning_rate": 0.0004337855297157623,
      "loss": 3.5759,
      "step": 6150
    },
    {
      "epoch": 16.020671834625322,
      "grad_norm": 0.7657240033149719,
      "learning_rate": 0.0004332472006890612,
      "loss": 3.5372,
      "step": 6200
    },
    {
      "epoch": 16.149870801033593,
      "grad_norm": 0.759943425655365,
      "learning_rate": 0.00043270887166236004,
      "loss": 3.2469,
      "step": 6250
    },
    {
      "epoch": 16.27906976744186,
      "grad_norm": 0.8399419784545898,
      "learning_rate": 0.0004321705426356589,
      "loss": 3.2606,
      "step": 6300
    },
    {
      "epoch": 16.40826873385013,
      "grad_norm": 0.8121830821037292,
      "learning_rate": 0.0004316322136089578,
      "loss": 3.3145,
      "step": 6350
    },
    {
      "epoch": 16.537467700258397,
      "grad_norm": 0.8004789352416992,
      "learning_rate": 0.0004310938845822567,
      "loss": 3.3681,
      "step": 6400
    },
    {
      "epoch": 16.666666666666668,
      "grad_norm": 0.8482992053031921,
      "learning_rate": 0.0004305555555555556,
      "loss": 3.388,
      "step": 6450
    },
    {
      "epoch": 16.795865633074936,
      "grad_norm": 0.8081602454185486,
      "learning_rate": 0.0004300172265288544,
      "loss": 3.3743,
      "step": 6500
    },
    {
      "epoch": 16.925064599483203,
      "grad_norm": 0.8198291063308716,
      "learning_rate": 0.0004294788975021533,
      "loss": 3.4198,
      "step": 6550
    },
    {
      "epoch": 17.05426356589147,
      "grad_norm": 0.8351156711578369,
      "learning_rate": 0.0004289405684754522,
      "loss": 3.2457,
      "step": 6600
    },
    {
      "epoch": 17.183462532299743,
      "grad_norm": 0.89192795753479,
      "learning_rate": 0.00042840223944875107,
      "loss": 3.0653,
      "step": 6650
    },
    {
      "epoch": 17.31266149870801,
      "grad_norm": 0.8027949929237366,
      "learning_rate": 0.00042786391042205,
      "loss": 3.1194,
      "step": 6700
    },
    {
      "epoch": 17.441860465116278,
      "grad_norm": 0.8474370241165161,
      "learning_rate": 0.00042732558139534885,
      "loss": 3.1529,
      "step": 6750
    },
    {
      "epoch": 17.57105943152455,
      "grad_norm": 0.8289262652397156,
      "learning_rate": 0.0004267872523686477,
      "loss": 3.1873,
      "step": 6800
    },
    {
      "epoch": 17.700258397932817,
      "grad_norm": 0.8512954711914062,
      "learning_rate": 0.0004262489233419466,
      "loss": 3.2142,
      "step": 6850
    },
    {
      "epoch": 17.829457364341085,
      "grad_norm": 0.8560154438018799,
      "learning_rate": 0.0004257105943152455,
      "loss": 3.2422,
      "step": 6900
    },
    {
      "epoch": 17.958656330749353,
      "grad_norm": 0.8370221853256226,
      "learning_rate": 0.00042517226528854436,
      "loss": 3.2616,
      "step": 6950
    },
    {
      "epoch": 18.087855297157624,
      "grad_norm": 0.8279748558998108,
      "learning_rate": 0.0004246339362618433,
      "loss": 3.0243,
      "step": 7000
    },
    {
      "epoch": 18.217054263565892,
      "grad_norm": 0.8575722575187683,
      "learning_rate": 0.00042409560723514214,
      "loss": 2.9463,
      "step": 7050
    },
    {
      "epoch": 18.34625322997416,
      "grad_norm": 0.8492497205734253,
      "learning_rate": 0.000423557278208441,
      "loss": 2.9768,
      "step": 7100
    },
    {
      "epoch": 18.475452196382427,
      "grad_norm": 0.8386108875274658,
      "learning_rate": 0.0004230189491817399,
      "loss": 3.0188,
      "step": 7150
    },
    {
      "epoch": 18.6046511627907,
      "grad_norm": 0.8196629881858826,
      "learning_rate": 0.00042248062015503874,
      "loss": 3.0129,
      "step": 7200
    },
    {
      "epoch": 18.733850129198967,
      "grad_norm": 0.8722365498542786,
      "learning_rate": 0.00042194229112833766,
      "loss": 3.0563,
      "step": 7250
    },
    {
      "epoch": 18.863049095607234,
      "grad_norm": 0.8680191040039062,
      "learning_rate": 0.0004214039621016365,
      "loss": 3.1033,
      "step": 7300
    },
    {
      "epoch": 18.992248062015506,
      "grad_norm": 0.8340879678726196,
      "learning_rate": 0.00042086563307493544,
      "loss": 3.1014,
      "step": 7350
    },
    {
      "epoch": 19.121447028423773,
      "grad_norm": 0.8735944032669067,
      "learning_rate": 0.00042032730404823425,
      "loss": 2.7535,
      "step": 7400
    },
    {
      "epoch": 19.25064599483204,
      "grad_norm": 0.8746029138565063,
      "learning_rate": 0.00041978897502153317,
      "loss": 2.778,
      "step": 7450
    },
    {
      "epoch": 19.37984496124031,
      "grad_norm": 0.8950029015541077,
      "learning_rate": 0.00041925064599483203,
      "loss": 2.8154,
      "step": 7500
    },
    {
      "epoch": 19.50904392764858,
      "grad_norm": 0.8750012516975403,
      "learning_rate": 0.00041871231696813095,
      "loss": 2.8509,
      "step": 7550
    },
    {
      "epoch": 19.638242894056848,
      "grad_norm": 0.8834818601608276,
      "learning_rate": 0.0004181739879414298,
      "loss": 2.8861,
      "step": 7600
    },
    {
      "epoch": 19.767441860465116,
      "grad_norm": 0.8611243963241577,
      "learning_rate": 0.00041763565891472874,
      "loss": 2.9299,
      "step": 7650
    },
    {
      "epoch": 19.896640826873384,
      "grad_norm": 0.8894075751304626,
      "learning_rate": 0.00041709732988802755,
      "loss": 2.9413,
      "step": 7700
    },
    {
      "epoch": 20.025839793281655,
      "grad_norm": 0.8464634418487549,
      "learning_rate": 0.00041655900086132647,
      "loss": 2.9067,
      "step": 7750
    },
    {
      "epoch": 20.155038759689923,
      "grad_norm": 0.8762670159339905,
      "learning_rate": 0.00041602067183462533,
      "loss": 2.5834,
      "step": 7800
    },
    {
      "epoch": 20.28423772609819,
      "grad_norm": 0.8752980828285217,
      "learning_rate": 0.0004154823428079242,
      "loss": 2.6456,
      "step": 7850
    },
    {
      "epoch": 20.41343669250646,
      "grad_norm": 0.9237301349639893,
      "learning_rate": 0.0004149440137812231,
      "loss": 2.6859,
      "step": 7900
    },
    {
      "epoch": 20.54263565891473,
      "grad_norm": 0.8540187478065491,
      "learning_rate": 0.0004144056847545219,
      "loss": 2.6872,
      "step": 7950
    },
    {
      "epoch": 20.671834625322997,
      "grad_norm": 0.9119529724121094,
      "learning_rate": 0.00041386735572782084,
      "loss": 2.747,
      "step": 8000
    },
    {
      "epoch": 20.801033591731265,
      "grad_norm": 0.8775665163993835,
      "learning_rate": 0.0004133290267011197,
      "loss": 2.7498,
      "step": 8050
    },
    {
      "epoch": 20.930232558139537,
      "grad_norm": 0.8916148543357849,
      "learning_rate": 0.00041279069767441863,
      "loss": 2.7927,
      "step": 8100
    },
    {
      "epoch": 21.059431524547804,
      "grad_norm": 0.8950030207633972,
      "learning_rate": 0.0004122523686477175,
      "loss": 2.6083,
      "step": 8150
    },
    {
      "epoch": 21.188630490956072,
      "grad_norm": 0.9209582805633545,
      "learning_rate": 0.0004117140396210164,
      "loss": 2.4359,
      "step": 8200
    },
    {
      "epoch": 21.31782945736434,
      "grad_norm": 0.9141038656234741,
      "learning_rate": 0.0004111757105943152,
      "loss": 2.4899,
      "step": 8250
    },
    {
      "epoch": 21.44702842377261,
      "grad_norm": 0.966147243976593,
      "learning_rate": 0.00041063738156761414,
      "loss": 2.5223,
      "step": 8300
    },
    {
      "epoch": 21.57622739018088,
      "grad_norm": 0.9487552046775818,
      "learning_rate": 0.000410099052540913,
      "loss": 2.5519,
      "step": 8350
    },
    {
      "epoch": 21.705426356589147,
      "grad_norm": 0.9473231434822083,
      "learning_rate": 0.0004095607235142119,
      "loss": 2.5879,
      "step": 8400
    },
    {
      "epoch": 21.834625322997415,
      "grad_norm": 0.9128333926200867,
      "learning_rate": 0.0004090223944875108,
      "loss": 2.6198,
      "step": 8450
    },
    {
      "epoch": 21.963824289405686,
      "grad_norm": 0.950503408908844,
      "learning_rate": 0.00040848406546080965,
      "loss": 2.6171,
      "step": 8500
    },
    {
      "epoch": 22.093023255813954,
      "grad_norm": 0.9006721377372742,
      "learning_rate": 0.0004079457364341085,
      "loss": 2.4196,
      "step": 8550
    },
    {
      "epoch": 22.22222222222222,
      "grad_norm": 1.0064070224761963,
      "learning_rate": 0.0004074074074074074,
      "loss": 2.3131,
      "step": 8600
    },
    {
      "epoch": 22.35142118863049,
      "grad_norm": 0.9683255553245544,
      "learning_rate": 0.0004068690783807063,
      "loss": 2.3318,
      "step": 8650
    },
    {
      "epoch": 22.48062015503876,
      "grad_norm": 0.9993629455566406,
      "learning_rate": 0.00040633074935400517,
      "loss": 2.3724,
      "step": 8700
    },
    {
      "epoch": 22.60981912144703,
      "grad_norm": 0.9402567744255066,
      "learning_rate": 0.0004057924203273041,
      "loss": 2.4148,
      "step": 8750
    },
    {
      "epoch": 22.739018087855296,
      "grad_norm": 0.9771119952201843,
      "learning_rate": 0.00040525409130060295,
      "loss": 2.419,
      "step": 8800
    },
    {
      "epoch": 22.868217054263567,
      "grad_norm": 0.935016393661499,
      "learning_rate": 0.0004047157622739018,
      "loss": 2.4596,
      "step": 8850
    },
    {
      "epoch": 22.997416020671835,
      "grad_norm": 0.9506202936172485,
      "learning_rate": 0.0004041774332472007,
      "loss": 2.4919,
      "step": 8900
    },
    {
      "epoch": 23.126614987080103,
      "grad_norm": 1.0655943155288696,
      "learning_rate": 0.0004036391042204996,
      "loss": 2.1427,
      "step": 8950
    },
    {
      "epoch": 23.25581395348837,
      "grad_norm": 0.973122239112854,
      "learning_rate": 0.00040310077519379846,
      "loss": 2.1678,
      "step": 9000
    },
    {
      "epoch": 23.385012919896642,
      "grad_norm": 0.9337523579597473,
      "learning_rate": 0.0004025624461670974,
      "loss": 2.2083,
      "step": 9050
    },
    {
      "epoch": 23.51421188630491,
      "grad_norm": 0.9414676427841187,
      "learning_rate": 0.0004020241171403962,
      "loss": 2.2472,
      "step": 9100
    },
    {
      "epoch": 23.643410852713178,
      "grad_norm": 0.975627601146698,
      "learning_rate": 0.00040148578811369506,
      "loss": 2.2592,
      "step": 9150
    },
    {
      "epoch": 23.772609819121445,
      "grad_norm": 0.9661776423454285,
      "learning_rate": 0.000400947459086994,
      "loss": 2.2872,
      "step": 9200
    },
    {
      "epoch": 23.901808785529717,
      "grad_norm": 1.0357248783111572,
      "learning_rate": 0.00040040913006029284,
      "loss": 2.2914,
      "step": 9250
    },
    {
      "epoch": 24.031007751937985,
      "grad_norm": 0.8984593749046326,
      "learning_rate": 0.00039987080103359176,
      "loss": 2.2445,
      "step": 9300
    },
    {
      "epoch": 24.160206718346252,
      "grad_norm": 1.0363667011260986,
      "learning_rate": 0.0003993324720068906,
      "loss": 1.9718,
      "step": 9350
    },
    {
      "epoch": 24.289405684754524,
      "grad_norm": 1.0693833827972412,
      "learning_rate": 0.0003987941429801895,
      "loss": 2.0401,
      "step": 9400
    },
    {
      "epoch": 24.41860465116279,
      "grad_norm": 0.9846957921981812,
      "learning_rate": 0.00039825581395348835,
      "loss": 2.0873,
      "step": 9450
    },
    {
      "epoch": 24.54780361757106,
      "grad_norm": 0.9677785634994507,
      "learning_rate": 0.00039771748492678727,
      "loss": 2.0879,
      "step": 9500
    },
    {
      "epoch": 24.677002583979327,
      "grad_norm": 0.9678288102149963,
      "learning_rate": 0.00039717915590008613,
      "loss": 2.1276,
      "step": 9550
    },
    {
      "epoch": 24.8062015503876,
      "grad_norm": 1.0275673866271973,
      "learning_rate": 0.00039664082687338505,
      "loss": 2.1485,
      "step": 9600
    },
    {
      "epoch": 24.935400516795866,
      "grad_norm": 0.9752850532531738,
      "learning_rate": 0.0003961024978466839,
      "loss": 2.1584,
      "step": 9650
    },
    {
      "epoch": 25.064599483204134,
      "grad_norm": 0.9534931182861328,
      "learning_rate": 0.0003955641688199828,
      "loss": 1.9799,
      "step": 9700
    },
    {
      "epoch": 25.1937984496124,
      "grad_norm": 1.006929636001587,
      "learning_rate": 0.00039502583979328165,
      "loss": 1.8713,
      "step": 9750
    },
    {
      "epoch": 25.322997416020673,
      "grad_norm": 1.0046697854995728,
      "learning_rate": 0.0003944875107665805,
      "loss": 1.8964,
      "step": 9800
    },
    {
      "epoch": 25.45219638242894,
      "grad_norm": 1.023473858833313,
      "learning_rate": 0.00039394918173987943,
      "loss": 1.9473,
      "step": 9850
    },
    {
      "epoch": 25.58139534883721,
      "grad_norm": 0.9878107309341431,
      "learning_rate": 0.0003934108527131783,
      "loss": 1.9539,
      "step": 9900
    },
    {
      "epoch": 25.710594315245476,
      "grad_norm": 1.029515027999878,
      "learning_rate": 0.0003928725236864772,
      "loss": 1.9703,
      "step": 9950
    },
    {
      "epoch": 25.839793281653748,
      "grad_norm": 1.054065227508545,
      "learning_rate": 0.000392334194659776,
      "loss": 2.0098,
      "step": 10000
    },
    {
      "epoch": 25.968992248062015,
      "grad_norm": 1.0248093605041504,
      "learning_rate": 0.00039179586563307494,
      "loss": 2.0293,
      "step": 10050
    },
    {
      "epoch": 26.098191214470283,
      "grad_norm": 1.0006974935531616,
      "learning_rate": 0.0003912575366063738,
      "loss": 1.7516,
      "step": 10100
    },
    {
      "epoch": 26.227390180878555,
      "grad_norm": 1.0021758079528809,
      "learning_rate": 0.00039071920757967273,
      "loss": 1.7316,
      "step": 10150
    },
    {
      "epoch": 26.356589147286822,
      "grad_norm": 1.0138041973114014,
      "learning_rate": 0.0003901808785529716,
      "loss": 1.7655,
      "step": 10200
    },
    {
      "epoch": 26.48578811369509,
      "grad_norm": 0.9640183448791504,
      "learning_rate": 0.0003896425495262705,
      "loss": 1.8073,
      "step": 10250
    },
    {
      "epoch": 26.614987080103358,
      "grad_norm": 0.9878391623497009,
      "learning_rate": 0.0003891042204995693,
      "loss": 1.8213,
      "step": 10300
    },
    {
      "epoch": 26.74418604651163,
      "grad_norm": 1.0443445444107056,
      "learning_rate": 0.00038856589147286824,
      "loss": 1.8545,
      "step": 10350
    },
    {
      "epoch": 26.873385012919897,
      "grad_norm": 1.0249011516571045,
      "learning_rate": 0.0003880275624461671,
      "loss": 1.8678,
      "step": 10400
    },
    {
      "epoch": 27.002583979328165,
      "grad_norm": 0.9969984292984009,
      "learning_rate": 0.00038748923341946597,
      "loss": 1.9046,
      "step": 10450
    },
    {
      "epoch": 27.131782945736433,
      "grad_norm": 0.980108380317688,
      "learning_rate": 0.0003869509043927649,
      "loss": 1.5799,
      "step": 10500
    },
    {
      "epoch": 27.260981912144704,
      "grad_norm": 1.0120890140533447,
      "learning_rate": 0.0003864125753660637,
      "loss": 1.5999,
      "step": 10550
    },
    {
      "epoch": 27.39018087855297,
      "grad_norm": 1.0249812602996826,
      "learning_rate": 0.0003858742463393626,
      "loss": 1.6372,
      "step": 10600
    },
    {
      "epoch": 27.51937984496124,
      "grad_norm": 1.0876713991165161,
      "learning_rate": 0.0003853359173126615,
      "loss": 1.6599,
      "step": 10650
    },
    {
      "epoch": 27.64857881136951,
      "grad_norm": 1.0191476345062256,
      "learning_rate": 0.0003847975882859604,
      "loss": 1.7051,
      "step": 10700
    },
    {
      "epoch": 27.77777777777778,
      "grad_norm": 1.0177712440490723,
      "learning_rate": 0.00038425925925925927,
      "loss": 1.7253,
      "step": 10750
    },
    {
      "epoch": 27.906976744186046,
      "grad_norm": 1.0449353456497192,
      "learning_rate": 0.0003837209302325582,
      "loss": 1.7649,
      "step": 10800
    },
    {
      "epoch": 28.036175710594314,
      "grad_norm": 1.053076148033142,
      "learning_rate": 0.000383182601205857,
      "loss": 1.6766,
      "step": 10850
    },
    {
      "epoch": 28.165374677002585,
      "grad_norm": 1.04450261592865,
      "learning_rate": 0.0003826442721791559,
      "loss": 1.4666,
      "step": 10900
    },
    {
      "epoch": 28.294573643410853,
      "grad_norm": 1.116848349571228,
      "learning_rate": 0.0003821059431524548,
      "loss": 1.5029,
      "step": 10950
    },
    {
      "epoch": 28.42377260981912,
      "grad_norm": 1.022246241569519,
      "learning_rate": 0.0003815676141257537,
      "loss": 1.5182,
      "step": 11000
    },
    {
      "epoch": 28.55297157622739,
      "grad_norm": 1.0176646709442139,
      "learning_rate": 0.00038102928509905256,
      "loss": 1.555,
      "step": 11050
    },
    {
      "epoch": 28.68217054263566,
      "grad_norm": 1.1232167482376099,
      "learning_rate": 0.0003804909560723514,
      "loss": 1.5896,
      "step": 11100
    },
    {
      "epoch": 28.811369509043928,
      "grad_norm": 1.0402328968048096,
      "learning_rate": 0.0003799526270456503,
      "loss": 1.5899,
      "step": 11150
    },
    {
      "epoch": 28.940568475452196,
      "grad_norm": 1.0869791507720947,
      "learning_rate": 0.00037941429801894916,
      "loss": 1.6253,
      "step": 11200
    },
    {
      "epoch": 29.069767441860463,
      "grad_norm": 1.037343144416809,
      "learning_rate": 0.0003788759689922481,
      "loss": 1.458,
      "step": 11250
    },
    {
      "epoch": 29.198966408268735,
      "grad_norm": 1.0311415195465088,
      "learning_rate": 0.00037833763996554694,
      "loss": 1.3556,
      "step": 11300
    },
    {
      "epoch": 29.328165374677003,
      "grad_norm": 1.0798285007476807,
      "learning_rate": 0.00037779931093884586,
      "loss": 1.3922,
      "step": 11350
    },
    {
      "epoch": 29.45736434108527,
      "grad_norm": 1.1110342741012573,
      "learning_rate": 0.0003772609819121447,
      "loss": 1.4293,
      "step": 11400
    },
    {
      "epoch": 29.58656330749354,
      "grad_norm": 1.0339034795761108,
      "learning_rate": 0.0003767226528854436,
      "loss": 1.4486,
      "step": 11450
    },
    {
      "epoch": 29.71576227390181,
      "grad_norm": 1.07316255569458,
      "learning_rate": 0.00037618432385874245,
      "loss": 1.4563,
      "step": 11500
    },
    {
      "epoch": 29.844961240310077,
      "grad_norm": 1.0457143783569336,
      "learning_rate": 0.00037564599483204137,
      "loss": 1.4887,
      "step": 11550
    },
    {
      "epoch": 29.974160206718345,
      "grad_norm": 1.0909912586212158,
      "learning_rate": 0.00037510766580534023,
      "loss": 1.5086,
      "step": 11600
    },
    {
      "epoch": 30.103359173126616,
      "grad_norm": 1.043745756149292,
      "learning_rate": 0.00037456933677863915,
      "loss": 1.2855,
      "step": 11650
    },
    {
      "epoch": 30.232558139534884,
      "grad_norm": 1.0904996395111084,
      "learning_rate": 0.00037403100775193796,
      "loss": 1.2689,
      "step": 11700
    },
    {
      "epoch": 30.361757105943152,
      "grad_norm": 1.0589019060134888,
      "learning_rate": 0.00037349267872523683,
      "loss": 1.2895,
      "step": 11750
    },
    {
      "epoch": 30.49095607235142,
      "grad_norm": 1.1011033058166504,
      "learning_rate": 0.00037295434969853575,
      "loss": 1.3106,
      "step": 11800
    },
    {
      "epoch": 30.62015503875969,
      "grad_norm": 1.0708906650543213,
      "learning_rate": 0.0003724160206718346,
      "loss": 1.3393,
      "step": 11850
    },
    {
      "epoch": 30.74935400516796,
      "grad_norm": 1.1151784658432007,
      "learning_rate": 0.00037187769164513353,
      "loss": 1.3697,
      "step": 11900
    },
    {
      "epoch": 30.878552971576227,
      "grad_norm": 1.1110284328460693,
      "learning_rate": 0.0003713393626184324,
      "loss": 1.3772,
      "step": 11950
    },
    {
      "epoch": 31.007751937984494,
      "grad_norm": 1.0338791608810425,
      "learning_rate": 0.00037080103359173126,
      "loss": 1.3897,
      "step": 12000
    },
    {
      "epoch": 31.136950904392766,
      "grad_norm": 0.9945932626724243,
      "learning_rate": 0.0003702627045650301,
      "loss": 1.128,
      "step": 12050
    },
    {
      "epoch": 31.266149870801033,
      "grad_norm": 1.0979617834091187,
      "learning_rate": 0.00036972437553832904,
      "loss": 1.181,
      "step": 12100
    },
    {
      "epoch": 31.3953488372093,
      "grad_norm": 1.09369695186615,
      "learning_rate": 0.0003691860465116279,
      "loss": 1.1983,
      "step": 12150
    },
    {
      "epoch": 31.524547803617573,
      "grad_norm": 0.9991064667701721,
      "learning_rate": 0.00036864771748492683,
      "loss": 1.2173,
      "step": 12200
    },
    {
      "epoch": 31.65374677002584,
      "grad_norm": 1.1208692789077759,
      "learning_rate": 0.0003681093884582257,
      "loss": 1.2472,
      "step": 12250
    },
    {
      "epoch": 31.782945736434108,
      "grad_norm": 1.1178430318832397,
      "learning_rate": 0.00036757105943152456,
      "loss": 1.2743,
      "step": 12300
    },
    {
      "epoch": 31.912144702842376,
      "grad_norm": 1.1133732795715332,
      "learning_rate": 0.0003670327304048234,
      "loss": 1.3026,
      "step": 12350
    },
    {
      "epoch": 32.041343669250644,
      "grad_norm": 1.0029962062835693,
      "learning_rate": 0.0003664944013781223,
      "loss": 1.2224,
      "step": 12400
    },
    {
      "epoch": 32.17054263565891,
      "grad_norm": 1.0903340578079224,
      "learning_rate": 0.0003659560723514212,
      "loss": 1.0604,
      "step": 12450
    },
    {
      "epoch": 32.299741602067186,
      "grad_norm": 1.0614738464355469,
      "learning_rate": 0.00036541774332472007,
      "loss": 1.0914,
      "step": 12500
    },
    {
      "epoch": 32.428940568475454,
      "grad_norm": 1.164192795753479,
      "learning_rate": 0.000364879414298019,
      "loss": 1.1175,
      "step": 12550
    },
    {
      "epoch": 32.55813953488372,
      "grad_norm": 1.0716255903244019,
      "learning_rate": 0.0003643410852713178,
      "loss": 1.1399,
      "step": 12600
    },
    {
      "epoch": 32.68733850129199,
      "grad_norm": 1.0433281660079956,
      "learning_rate": 0.0003638027562446167,
      "loss": 1.1566,
      "step": 12650
    },
    {
      "epoch": 32.81653746770026,
      "grad_norm": 1.1375789642333984,
      "learning_rate": 0.0003632644272179156,
      "loss": 1.1776,
      "step": 12700
    },
    {
      "epoch": 32.945736434108525,
      "grad_norm": 1.0836702585220337,
      "learning_rate": 0.0003627260981912145,
      "loss": 1.2104,
      "step": 12750
    },
    {
      "epoch": 33.07493540051679,
      "grad_norm": 1.0600357055664062,
      "learning_rate": 0.00036218776916451337,
      "loss": 1.0668,
      "step": 12800
    },
    {
      "epoch": 33.20413436692507,
      "grad_norm": 1.119808554649353,
      "learning_rate": 0.0003616494401378123,
      "loss": 0.9916,
      "step": 12850
    },
    {
      "epoch": 33.333333333333336,
      "grad_norm": 1.0667747259140015,
      "learning_rate": 0.0003611111111111111,
      "loss": 1.0156,
      "step": 12900
    },
    {
      "epoch": 33.4625322997416,
      "grad_norm": 1.0929239988327026,
      "learning_rate": 0.00036057278208441,
      "loss": 1.0444,
      "step": 12950
    },
    {
      "epoch": 33.59173126614987,
      "grad_norm": 1.110893726348877,
      "learning_rate": 0.0003600344530577089,
      "loss": 1.0593,
      "step": 13000
    },
    {
      "epoch": 33.72093023255814,
      "grad_norm": 1.0616086721420288,
      "learning_rate": 0.00035949612403100774,
      "loss": 1.0855,
      "step": 13050
    },
    {
      "epoch": 33.85012919896641,
      "grad_norm": 1.0666298866271973,
      "learning_rate": 0.00035895779500430666,
      "loss": 1.1038,
      "step": 13100
    },
    {
      "epoch": 33.979328165374675,
      "grad_norm": 1.1630655527114868,
      "learning_rate": 0.00035841946597760547,
      "loss": 1.1173,
      "step": 13150
    },
    {
      "epoch": 34.10852713178294,
      "grad_norm": 1.0808757543563843,
      "learning_rate": 0.0003578811369509044,
      "loss": 0.9347,
      "step": 13200
    },
    {
      "epoch": 34.23772609819122,
      "grad_norm": 1.0300551652908325,
      "learning_rate": 0.00035734280792420326,
      "loss": 0.9218,
      "step": 13250
    },
    {
      "epoch": 34.366925064599485,
      "grad_norm": 1.031818151473999,
      "learning_rate": 0.0003568044788975022,
      "loss": 0.9459,
      "step": 13300
    },
    {
      "epoch": 34.49612403100775,
      "grad_norm": 1.0583913326263428,
      "learning_rate": 0.00035626614987080104,
      "loss": 0.9711,
      "step": 13350
    },
    {
      "epoch": 34.62532299741602,
      "grad_norm": 1.1159652471542358,
      "learning_rate": 0.00035572782084409996,
      "loss": 0.988,
      "step": 13400
    },
    {
      "epoch": 34.75452196382429,
      "grad_norm": 1.1470566987991333,
      "learning_rate": 0.00035518949181739877,
      "loss": 1.017,
      "step": 13450
    },
    {
      "epoch": 34.883720930232556,
      "grad_norm": 1.1096407175064087,
      "learning_rate": 0.0003546511627906977,
      "loss": 1.0332,
      "step": 13500
    },
    {
      "epoch": 35.012919896640824,
      "grad_norm": 1.0105173587799072,
      "learning_rate": 0.00035411283376399655,
      "loss": 1.0242,
      "step": 13550
    },
    {
      "epoch": 35.1421188630491,
      "grad_norm": 1.023962378501892,
      "learning_rate": 0.00035357450473729547,
      "loss": 0.8372,
      "step": 13600
    },
    {
      "epoch": 35.27131782945737,
      "grad_norm": 1.0276005268096924,
      "learning_rate": 0.00035303617571059433,
      "loss": 0.8636,
      "step": 13650
    },
    {
      "epoch": 35.400516795865634,
      "grad_norm": 1.0073238611221313,
      "learning_rate": 0.0003524978466838932,
      "loss": 0.8824,
      "step": 13700
    },
    {
      "epoch": 35.5297157622739,
      "grad_norm": 1.036906123161316,
      "learning_rate": 0.00035195951765719206,
      "loss": 0.9058,
      "step": 13750
    },
    {
      "epoch": 35.65891472868217,
      "grad_norm": 1.165481686592102,
      "learning_rate": 0.00035142118863049093,
      "loss": 0.9267,
      "step": 13800
    },
    {
      "epoch": 35.78811369509044,
      "grad_norm": 1.095076084136963,
      "learning_rate": 0.00035088285960378985,
      "loss": 0.9407,
      "step": 13850
    },
    {
      "epoch": 35.917312661498705,
      "grad_norm": 1.1574336290359497,
      "learning_rate": 0.0003503445305770887,
      "loss": 0.9571,
      "step": 13900
    },
    {
      "epoch": 36.04651162790697,
      "grad_norm": 0.9809584021568298,
      "learning_rate": 0.00034980620155038763,
      "loss": 0.9009,
      "step": 13950
    },
    {
      "epoch": 36.17571059431525,
      "grad_norm": 1.1092181205749512,
      "learning_rate": 0.0003492678725236865,
      "loss": 0.7827,
      "step": 14000
    },
    {
      "epoch": 36.304909560723516,
      "grad_norm": 1.0440077781677246,
      "learning_rate": 0.00034872954349698536,
      "loss": 0.8052,
      "step": 14050
    },
    {
      "epoch": 36.434108527131784,
      "grad_norm": 1.0873245000839233,
      "learning_rate": 0.0003481912144702842,
      "loss": 0.826,
      "step": 14100
    },
    {
      "epoch": 36.56330749354005,
      "grad_norm": 1.070630669593811,
      "learning_rate": 0.00034765288544358314,
      "loss": 0.8569,
      "step": 14150
    },
    {
      "epoch": 36.69250645994832,
      "grad_norm": 1.1440433263778687,
      "learning_rate": 0.000347114556416882,
      "loss": 0.8654,
      "step": 14200
    },
    {
      "epoch": 36.82170542635659,
      "grad_norm": 1.0592854022979736,
      "learning_rate": 0.00034657622739018093,
      "loss": 0.8762,
      "step": 14250
    },
    {
      "epoch": 36.950904392764855,
      "grad_norm": 1.0903996229171753,
      "learning_rate": 0.00034603789836347974,
      "loss": 0.8899,
      "step": 14300
    },
    {
      "epoch": 37.08010335917313,
      "grad_norm": 1.048769474029541,
      "learning_rate": 0.0003454995693367786,
      "loss": 0.7938,
      "step": 14350
    },
    {
      "epoch": 37.2093023255814,
      "grad_norm": 1.055869698524475,
      "learning_rate": 0.0003449612403100775,
      "loss": 0.7354,
      "step": 14400
    },
    {
      "epoch": 37.338501291989665,
      "grad_norm": 1.1306650638580322,
      "learning_rate": 0.0003444229112833764,
      "loss": 0.7577,
      "step": 14450
    },
    {
      "epoch": 37.46770025839793,
      "grad_norm": 1.0616788864135742,
      "learning_rate": 0.0003438845822566753,
      "loss": 0.7708,
      "step": 14500
    },
    {
      "epoch": 37.5968992248062,
      "grad_norm": 1.0879144668579102,
      "learning_rate": 0.00034334625322997417,
      "loss": 0.7847,
      "step": 14550
    },
    {
      "epoch": 37.72609819121447,
      "grad_norm": 1.0476908683776855,
      "learning_rate": 0.00034280792420327303,
      "loss": 0.8129,
      "step": 14600
    },
    {
      "epoch": 37.855297157622736,
      "grad_norm": 1.0760301351547241,
      "learning_rate": 0.0003422695951765719,
      "loss": 0.8227,
      "step": 14650
    },
    {
      "epoch": 37.98449612403101,
      "grad_norm": 1.0563594102859497,
      "learning_rate": 0.0003417312661498708,
      "loss": 0.8335,
      "step": 14700
    },
    {
      "epoch": 38.11369509043928,
      "grad_norm": 0.9487522840499878,
      "learning_rate": 0.0003411929371231697,
      "loss": 0.6946,
      "step": 14750
    },
    {
      "epoch": 38.24289405684755,
      "grad_norm": 1.0453423261642456,
      "learning_rate": 0.0003406546080964686,
      "loss": 0.6954,
      "step": 14800
    },
    {
      "epoch": 38.372093023255815,
      "grad_norm": 1.0472131967544556,
      "learning_rate": 0.00034011627906976747,
      "loss": 0.7018,
      "step": 14850
    },
    {
      "epoch": 38.50129198966408,
      "grad_norm": 1.1571449041366577,
      "learning_rate": 0.00033957795004306633,
      "loss": 0.7238,
      "step": 14900
    },
    {
      "epoch": 38.63049095607235,
      "grad_norm": 1.0307157039642334,
      "learning_rate": 0.0003390396210163652,
      "loss": 0.7421,
      "step": 14950
    },
    {
      "epoch": 38.75968992248062,
      "grad_norm": 1.0821259021759033,
      "learning_rate": 0.0003385012919896641,
      "loss": 0.7637,
      "step": 15000
    },
    {
      "epoch": 38.888888888888886,
      "grad_norm": 1.1054902076721191,
      "learning_rate": 0.000337962962962963,
      "loss": 0.7695,
      "step": 15050
    },
    {
      "epoch": 39.01808785529716,
      "grad_norm": 1.0535945892333984,
      "learning_rate": 0.00033742463393626184,
      "loss": 0.7626,
      "step": 15100
    },
    {
      "epoch": 39.14728682170543,
      "grad_norm": 1.0273267030715942,
      "learning_rate": 0.00033688630490956076,
      "loss": 0.6317,
      "step": 15150
    },
    {
      "epoch": 39.276485788113696,
      "grad_norm": 1.0202946662902832,
      "learning_rate": 0.00033634797588285957,
      "loss": 0.6479,
      "step": 15200
    },
    {
      "epoch": 39.405684754521964,
      "grad_norm": 1.0935359001159668,
      "learning_rate": 0.0003358096468561585,
      "loss": 0.6675,
      "step": 15250
    },
    {
      "epoch": 39.53488372093023,
      "grad_norm": 1.038771629333496,
      "learning_rate": 0.00033527131782945736,
      "loss": 0.6832,
      "step": 15300
    },
    {
      "epoch": 39.6640826873385,
      "grad_norm": 1.0775431394577026,
      "learning_rate": 0.0003347329888027563,
      "loss": 0.6996,
      "step": 15350
    },
    {
      "epoch": 39.79328165374677,
      "grad_norm": 1.1158571243286133,
      "learning_rate": 0.00033419465977605514,
      "loss": 0.7102,
      "step": 15400
    },
    {
      "epoch": 39.92248062015504,
      "grad_norm": 1.0991837978363037,
      "learning_rate": 0.00033365633074935406,
      "loss": 0.7237,
      "step": 15450
    },
    {
      "epoch": 40.05167958656331,
      "grad_norm": 0.9562063813209534,
      "learning_rate": 0.00033311800172265287,
      "loss": 0.6724,
      "step": 15500
    },
    {
      "epoch": 40.18087855297158,
      "grad_norm": 1.007157325744629,
      "learning_rate": 0.0003325796726959518,
      "loss": 0.5968,
      "step": 15550
    },
    {
      "epoch": 40.310077519379846,
      "grad_norm": 0.9869040250778198,
      "learning_rate": 0.00033204134366925065,
      "loss": 0.607,
      "step": 15600
    },
    {
      "epoch": 40.43927648578811,
      "grad_norm": 1.0570577383041382,
      "learning_rate": 0.00033150301464254957,
      "loss": 0.6317,
      "step": 15650
    },
    {
      "epoch": 40.56847545219638,
      "grad_norm": 1.0295755863189697,
      "learning_rate": 0.00033096468561584843,
      "loss": 0.6477,
      "step": 15700
    },
    {
      "epoch": 40.69767441860465,
      "grad_norm": 1.0514014959335327,
      "learning_rate": 0.00033042635658914725,
      "loss": 0.6615,
      "step": 15750
    },
    {
      "epoch": 40.82687338501292,
      "grad_norm": 1.095037817955017,
      "learning_rate": 0.00032988802756244616,
      "loss": 0.6638,
      "step": 15800
    },
    {
      "epoch": 40.95607235142119,
      "grad_norm": 1.0811198949813843,
      "learning_rate": 0.00032934969853574503,
      "loss": 0.677,
      "step": 15850
    },
    {
      "epoch": 41.08527131782946,
      "grad_norm": 0.940364420413971,
      "learning_rate": 0.00032881136950904395,
      "loss": 0.6022,
      "step": 15900
    },
    {
      "epoch": 41.21447028423773,
      "grad_norm": 0.9907973408699036,
      "learning_rate": 0.0003282730404823428,
      "loss": 0.5622,
      "step": 15950
    },
    {
      "epoch": 41.343669250645995,
      "grad_norm": 1.0549731254577637,
      "learning_rate": 0.00032773471145564173,
      "loss": 0.5885,
      "step": 16000
    },
    {
      "epoch": 41.47286821705426,
      "grad_norm": 1.0816057920455933,
      "learning_rate": 0.00032719638242894054,
      "loss": 0.5988,
      "step": 16050
    },
    {
      "epoch": 41.60206718346253,
      "grad_norm": 1.0438159704208374,
      "learning_rate": 0.00032665805340223946,
      "loss": 0.6099,
      "step": 16100
    },
    {
      "epoch": 41.7312661498708,
      "grad_norm": 1.0632882118225098,
      "learning_rate": 0.0003261197243755383,
      "loss": 0.6093,
      "step": 16150
    },
    {
      "epoch": 41.86046511627907,
      "grad_norm": 1.0382323265075684,
      "learning_rate": 0.00032558139534883724,
      "loss": 0.6215,
      "step": 16200
    },
    {
      "epoch": 41.98966408268734,
      "grad_norm": 1.0467637777328491,
      "learning_rate": 0.0003250430663221361,
      "loss": 0.6362,
      "step": 16250
    },
    {
      "epoch": 42.11886304909561,
      "grad_norm": 0.9871970415115356,
      "learning_rate": 0.00032450473729543503,
      "loss": 0.5333,
      "step": 16300
    },
    {
      "epoch": 42.248062015503876,
      "grad_norm": 0.9626291394233704,
      "learning_rate": 0.00032396640826873384,
      "loss": 0.5385,
      "step": 16350
    },
    {
      "epoch": 42.377260981912144,
      "grad_norm": 0.9124513864517212,
      "learning_rate": 0.0003234280792420327,
      "loss": 0.5475,
      "step": 16400
    },
    {
      "epoch": 42.50645994832041,
      "grad_norm": 0.9666666388511658,
      "learning_rate": 0.0003228897502153316,
      "loss": 0.5568,
      "step": 16450
    },
    {
      "epoch": 42.63565891472868,
      "grad_norm": 0.9857789278030396,
      "learning_rate": 0.0003223514211886305,
      "loss": 0.571,
      "step": 16500
    },
    {
      "epoch": 42.76485788113695,
      "grad_norm": 1.0844330787658691,
      "learning_rate": 0.0003218130921619294,
      "loss": 0.5801,
      "step": 16550
    },
    {
      "epoch": 42.89405684754522,
      "grad_norm": 1.061064600944519,
      "learning_rate": 0.00032127476313522827,
      "loss": 0.5942,
      "step": 16600
    },
    {
      "epoch": 43.02325581395349,
      "grad_norm": 0.9507066607475281,
      "learning_rate": 0.00032073643410852713,
      "loss": 0.5839,
      "step": 16650
    },
    {
      "epoch": 43.15245478036176,
      "grad_norm": 0.9956624507904053,
      "learning_rate": 0.000320198105081826,
      "loss": 0.4911,
      "step": 16700
    },
    {
      "epoch": 43.281653746770026,
      "grad_norm": 0.9833168387413025,
      "learning_rate": 0.0003196597760551249,
      "loss": 0.5085,
      "step": 16750
    },
    {
      "epoch": 43.41085271317829,
      "grad_norm": 0.9695848226547241,
      "learning_rate": 0.0003191214470284238,
      "loss": 0.519,
      "step": 16800
    },
    {
      "epoch": 43.54005167958656,
      "grad_norm": 1.0127440690994263,
      "learning_rate": 0.0003185831180017227,
      "loss": 0.5317,
      "step": 16850
    },
    {
      "epoch": 43.66925064599483,
      "grad_norm": 1.0743576288223267,
      "learning_rate": 0.0003180447889750215,
      "loss": 0.5349,
      "step": 16900
    },
    {
      "epoch": 43.798449612403104,
      "grad_norm": 1.0073367357254028,
      "learning_rate": 0.00031750645994832043,
      "loss": 0.5471,
      "step": 16950
    },
    {
      "epoch": 43.92764857881137,
      "grad_norm": 1.042517900466919,
      "learning_rate": 0.0003169681309216193,
      "loss": 0.5533,
      "step": 17000
    },
    {
      "epoch": 44.05684754521964,
      "grad_norm": 1.0032352209091187,
      "learning_rate": 0.00031642980189491816,
      "loss": 0.5236,
      "step": 17050
    },
    {
      "epoch": 44.18604651162791,
      "grad_norm": 0.9403014183044434,
      "learning_rate": 0.0003158914728682171,
      "loss": 0.4656,
      "step": 17100
    },
    {
      "epoch": 44.315245478036175,
      "grad_norm": 1.0409326553344727,
      "learning_rate": 0.00031535314384151594,
      "loss": 0.4797,
      "step": 17150
    },
    {
      "epoch": 44.44444444444444,
      "grad_norm": 0.9520995020866394,
      "learning_rate": 0.0003148148148148148,
      "loss": 0.4828,
      "step": 17200
    },
    {
      "epoch": 44.57364341085271,
      "grad_norm": 1.0025560855865479,
      "learning_rate": 0.00031427648578811367,
      "loss": 0.493,
      "step": 17250
    },
    {
      "epoch": 44.70284237726098,
      "grad_norm": 1.0239673852920532,
      "learning_rate": 0.0003137381567614126,
      "loss": 0.5099,
      "step": 17300
    },
    {
      "epoch": 44.83204134366925,
      "grad_norm": 1.0196808576583862,
      "learning_rate": 0.00031319982773471146,
      "loss": 0.5193,
      "step": 17350
    },
    {
      "epoch": 44.96124031007752,
      "grad_norm": 1.0267741680145264,
      "learning_rate": 0.0003126614987080104,
      "loss": 0.5272,
      "step": 17400
    },
    {
      "epoch": 45.09043927648579,
      "grad_norm": 0.9719401001930237,
      "learning_rate": 0.00031212316968130924,
      "loss": 0.4622,
      "step": 17450
    },
    {
      "epoch": 45.21963824289406,
      "grad_norm": 0.9207221269607544,
      "learning_rate": 0.0003115848406546081,
      "loss": 0.4358,
      "step": 17500
    },
    {
      "epoch": 45.348837209302324,
      "grad_norm": 0.9340962171554565,
      "learning_rate": 0.00031104651162790697,
      "loss": 0.449,
      "step": 17550
    },
    {
      "epoch": 45.47803617571059,
      "grad_norm": 1.0955092906951904,
      "learning_rate": 0.0003105081826012059,
      "loss": 0.4615,
      "step": 17600
    },
    {
      "epoch": 45.60723514211886,
      "grad_norm": 0.9808328151702881,
      "learning_rate": 0.00030996985357450475,
      "loss": 0.4728,
      "step": 17650
    },
    {
      "epoch": 45.736434108527135,
      "grad_norm": 1.0074886083602905,
      "learning_rate": 0.0003094315245478036,
      "loss": 0.4742,
      "step": 17700
    },
    {
      "epoch": 45.8656330749354,
      "grad_norm": 0.9679105281829834,
      "learning_rate": 0.00030889319552110254,
      "loss": 0.4865,
      "step": 17750
    },
    {
      "epoch": 45.99483204134367,
      "grad_norm": 0.9656231999397278,
      "learning_rate": 0.00030835486649440135,
      "loss": 0.4992,
      "step": 17800
    },
    {
      "epoch": 46.12403100775194,
      "grad_norm": 0.9613224864006042,
      "learning_rate": 0.00030781653746770026,
      "loss": 0.414,
      "step": 17850
    },
    {
      "epoch": 46.253229974160206,
      "grad_norm": 0.9829956889152527,
      "learning_rate": 0.00030727820844099913,
      "loss": 0.4162,
      "step": 17900
    },
    {
      "epoch": 46.382428940568474,
      "grad_norm": 0.9279460310935974,
      "learning_rate": 0.00030673987941429805,
      "loss": 0.4236,
      "step": 17950
    },
    {
      "epoch": 46.51162790697674,
      "grad_norm": 0.9363090395927429,
      "learning_rate": 0.0003062015503875969,
      "loss": 0.434,
      "step": 18000
    },
    {
      "epoch": 46.64082687338501,
      "grad_norm": 0.9822865724563599,
      "learning_rate": 0.00030566322136089583,
      "loss": 0.4373,
      "step": 18050
    },
    {
      "epoch": 46.770025839793284,
      "grad_norm": 1.0166221857070923,
      "learning_rate": 0.00030512489233419464,
      "loss": 0.4562,
      "step": 18100
    },
    {
      "epoch": 46.89922480620155,
      "grad_norm": 0.9347100257873535,
      "learning_rate": 0.00030458656330749356,
      "loss": 0.465,
      "step": 18150
    },
    {
      "epoch": 47.02842377260982,
      "grad_norm": 0.8109915852546692,
      "learning_rate": 0.0003040482342807924,
      "loss": 0.4521,
      "step": 18200
    },
    {
      "epoch": 47.15762273901809,
      "grad_norm": 0.9520167708396912,
      "learning_rate": 0.00030350990525409134,
      "loss": 0.386,
      "step": 18250
    },
    {
      "epoch": 47.286821705426355,
      "grad_norm": 0.9405495524406433,
      "learning_rate": 0.0003029715762273902,
      "loss": 0.3886,
      "step": 18300
    },
    {
      "epoch": 47.41602067183462,
      "grad_norm": 0.9056929349899292,
      "learning_rate": 0.000302433247200689,
      "loss": 0.4071,
      "step": 18350
    },
    {
      "epoch": 47.54521963824289,
      "grad_norm": 0.9537134766578674,
      "learning_rate": 0.00030189491817398794,
      "loss": 0.4155,
      "step": 18400
    },
    {
      "epoch": 47.674418604651166,
      "grad_norm": 0.9422418475151062,
      "learning_rate": 0.0003013565891472868,
      "loss": 0.4202,
      "step": 18450
    },
    {
      "epoch": 47.80361757105943,
      "grad_norm": 0.9643380641937256,
      "learning_rate": 0.0003008182601205857,
      "loss": 0.4313,
      "step": 18500
    },
    {
      "epoch": 47.9328165374677,
      "grad_norm": 1.0232657194137573,
      "learning_rate": 0.0003002799310938846,
      "loss": 0.4445,
      "step": 18550
    },
    {
      "epoch": 48.06201550387597,
      "grad_norm": 0.9518341422080994,
      "learning_rate": 0.0002997416020671835,
      "loss": 0.4093,
      "step": 18600
    },
    {
      "epoch": 48.19121447028424,
      "grad_norm": 0.9106420874595642,
      "learning_rate": 0.0002992032730404823,
      "loss": 0.3714,
      "step": 18650
    },
    {
      "epoch": 48.320413436692505,
      "grad_norm": 0.9175841808319092,
      "learning_rate": 0.00029866494401378123,
      "loss": 0.3749,
      "step": 18700
    },
    {
      "epoch": 48.44961240310077,
      "grad_norm": 0.9215375185012817,
      "learning_rate": 0.0002981266149870801,
      "loss": 0.3866,
      "step": 18750
    },
    {
      "epoch": 48.57881136950905,
      "grad_norm": 0.9632646441459656,
      "learning_rate": 0.000297588285960379,
      "loss": 0.3901,
      "step": 18800
    },
    {
      "epoch": 48.708010335917315,
      "grad_norm": 0.9362136721611023,
      "learning_rate": 0.0002970499569336779,
      "loss": 0.4,
      "step": 18850
    },
    {
      "epoch": 48.83720930232558,
      "grad_norm": 0.9412587285041809,
      "learning_rate": 0.0002965116279069768,
      "loss": 0.399,
      "step": 18900
    },
    {
      "epoch": 48.96640826873385,
      "grad_norm": 0.9412651658058167,
      "learning_rate": 0.0002959732988802756,
      "loss": 0.4182,
      "step": 18950
    },
    {
      "epoch": 49.09560723514212,
      "grad_norm": 0.8998754024505615,
      "learning_rate": 0.0002954349698535745,
      "loss": 0.3657,
      "step": 19000
    },
    {
      "epoch": 49.224806201550386,
      "grad_norm": 0.8922932744026184,
      "learning_rate": 0.0002948966408268734,
      "loss": 0.3552,
      "step": 19050
    },
    {
      "epoch": 49.354005167958654,
      "grad_norm": 0.9045536518096924,
      "learning_rate": 0.00029435831180017226,
      "loss": 0.3547,
      "step": 19100
    },
    {
      "epoch": 49.48320413436692,
      "grad_norm": 0.9243225455284119,
      "learning_rate": 0.0002938199827734712,
      "loss": 0.3687,
      "step": 19150
    },
    {
      "epoch": 49.6124031007752,
      "grad_norm": 0.8876057267189026,
      "learning_rate": 0.00029328165374677004,
      "loss": 0.3735,
      "step": 19200
    },
    {
      "epoch": 49.741602067183464,
      "grad_norm": 0.9181823134422302,
      "learning_rate": 0.0002927433247200689,
      "loss": 0.3755,
      "step": 19250
    },
    {
      "epoch": 49.87080103359173,
      "grad_norm": 0.9324612021446228,
      "learning_rate": 0.00029220499569336777,
      "loss": 0.3873,
      "step": 19300
    },
    {
      "epoch": 50.0,
      "grad_norm": 1.5176563262939453,
      "learning_rate": 0.0002916666666666667,
      "loss": 0.3907,
      "step": 19350
    },
    {
      "epoch": 50.12919896640827,
      "grad_norm": 0.8430280089378357,
      "learning_rate": 0.00029112833763996556,
      "loss": 0.3246,
      "step": 19400
    },
    {
      "epoch": 50.258397932816536,
      "grad_norm": 0.8588867783546448,
      "learning_rate": 0.0002905900086132645,
      "loss": 0.3316,
      "step": 19450
    },
    {
      "epoch": 50.3875968992248,
      "grad_norm": 0.9141284227371216,
      "learning_rate": 0.0002900516795865633,
      "loss": 0.3443,
      "step": 19500
    },
    {
      "epoch": 50.51679586563308,
      "grad_norm": 0.9618168473243713,
      "learning_rate": 0.0002895133505598622,
      "loss": 0.3455,
      "step": 19550
    },
    {
      "epoch": 50.645994832041346,
      "grad_norm": 0.8685697317123413,
      "learning_rate": 0.00028897502153316107,
      "loss": 0.3546,
      "step": 19600
    },
    {
      "epoch": 50.775193798449614,
      "grad_norm": 0.9281331896781921,
      "learning_rate": 0.00028843669250645993,
      "loss": 0.3594,
      "step": 19650
    },
    {
      "epoch": 50.90439276485788,
      "grad_norm": 0.9775708317756653,
      "learning_rate": 0.00028789836347975885,
      "loss": 0.3667,
      "step": 19700
    },
    {
      "epoch": 51.03359173126615,
      "grad_norm": 0.9008082151412964,
      "learning_rate": 0.0002873600344530577,
      "loss": 0.3559,
      "step": 19750
    },
    {
      "epoch": 51.16279069767442,
      "grad_norm": 0.8123588562011719,
      "learning_rate": 0.0002868217054263566,
      "loss": 0.3118,
      "step": 19800
    },
    {
      "epoch": 51.291989664082685,
      "grad_norm": 0.9002694487571716,
      "learning_rate": 0.00028628337639965545,
      "loss": 0.3194,
      "step": 19850
    },
    {
      "epoch": 51.42118863049095,
      "grad_norm": 0.8755623698234558,
      "learning_rate": 0.00028574504737295436,
      "loss": 0.3225,
      "step": 19900
    },
    {
      "epoch": 51.55038759689923,
      "grad_norm": 0.867507815361023,
      "learning_rate": 0.00028520671834625323,
      "loss": 0.3288,
      "step": 19950
    },
    {
      "epoch": 51.679586563307495,
      "grad_norm": 0.8991467356681824,
      "learning_rate": 0.00028466838931955215,
      "loss": 0.3378,
      "step": 20000
    },
    {
      "epoch": 51.80878552971576,
      "grad_norm": 0.923663854598999,
      "learning_rate": 0.000284130060292851,
      "loss": 0.3434,
      "step": 20050
    },
    {
      "epoch": 51.93798449612403,
      "grad_norm": 0.9283482432365417,
      "learning_rate": 0.0002835917312661499,
      "loss": 0.3513,
      "step": 20100
    },
    {
      "epoch": 52.0671834625323,
      "grad_norm": 0.7974118590354919,
      "learning_rate": 0.00028305340223944874,
      "loss": 0.3281,
      "step": 20150
    },
    {
      "epoch": 52.19638242894057,
      "grad_norm": 0.8336727023124695,
      "learning_rate": 0.00028251507321274766,
      "loss": 0.2985,
      "step": 20200
    },
    {
      "epoch": 52.325581395348834,
      "grad_norm": 0.8598774671554565,
      "learning_rate": 0.0002819767441860465,
      "loss": 0.3056,
      "step": 20250
    },
    {
      "epoch": 52.45478036175711,
      "grad_norm": 0.8309545516967773,
      "learning_rate": 0.0002814384151593454,
      "loss": 0.3144,
      "step": 20300
    },
    {
      "epoch": 52.58397932816538,
      "grad_norm": 0.8507665991783142,
      "learning_rate": 0.0002809000861326443,
      "loss": 0.3159,
      "step": 20350
    },
    {
      "epoch": 52.713178294573645,
      "grad_norm": 0.8766252398490906,
      "learning_rate": 0.0002803617571059431,
      "loss": 0.3186,
      "step": 20400
    },
    {
      "epoch": 52.84237726098191,
      "grad_norm": 0.8954920172691345,
      "learning_rate": 0.00027982342807924204,
      "loss": 0.3289,
      "step": 20450
    },
    {
      "epoch": 52.97157622739018,
      "grad_norm": 0.8454989790916443,
      "learning_rate": 0.0002792850990525409,
      "loss": 0.3362,
      "step": 20500
    },
    {
      "epoch": 53.10077519379845,
      "grad_norm": 0.8068667650222778,
      "learning_rate": 0.0002787467700258398,
      "loss": 0.2997,
      "step": 20550
    },
    {
      "epoch": 53.229974160206716,
      "grad_norm": 0.8570376038551331,
      "learning_rate": 0.0002782084409991387,
      "loss": 0.2916,
      "step": 20600
    },
    {
      "epoch": 53.359173126614984,
      "grad_norm": 0.8744022846221924,
      "learning_rate": 0.0002776701119724376,
      "loss": 0.29,
      "step": 20650
    },
    {
      "epoch": 53.48837209302326,
      "grad_norm": 0.8130773305892944,
      "learning_rate": 0.0002771317829457364,
      "loss": 0.2945,
      "step": 20700
    },
    {
      "epoch": 53.617571059431526,
      "grad_norm": 0.8203384280204773,
      "learning_rate": 0.00027659345391903533,
      "loss": 0.2987,
      "step": 20750
    },
    {
      "epoch": 53.746770025839794,
      "grad_norm": 0.8505781888961792,
      "learning_rate": 0.0002760551248923342,
      "loss": 0.3057,
      "step": 20800
    },
    {
      "epoch": 53.87596899224806,
      "grad_norm": 0.9071487188339233,
      "learning_rate": 0.0002755167958656331,
      "loss": 0.3141,
      "step": 20850
    },
    {
      "epoch": 54.00516795865633,
      "grad_norm": 0.8062961101531982,
      "learning_rate": 0.000274978466838932,
      "loss": 0.3199,
      "step": 20900
    },
    {
      "epoch": 54.1343669250646,
      "grad_norm": 0.8124945759773254,
      "learning_rate": 0.0002744401378122308,
      "loss": 0.2698,
      "step": 20950
    },
    {
      "epoch": 54.263565891472865,
      "grad_norm": 0.7962179780006409,
      "learning_rate": 0.0002739018087855297,
      "loss": 0.2721,
      "step": 21000
    },
    {
      "epoch": 54.39276485788114,
      "grad_norm": 0.8087983727455139,
      "learning_rate": 0.0002733634797588286,
      "loss": 0.2794,
      "step": 21050
    },
    {
      "epoch": 54.52196382428941,
      "grad_norm": 0.8471127152442932,
      "learning_rate": 0.0002728251507321275,
      "loss": 0.2775,
      "step": 21100
    },
    {
      "epoch": 54.651162790697676,
      "grad_norm": 0.8004376888275146,
      "learning_rate": 0.00027228682170542636,
      "loss": 0.288,
      "step": 21150
    },
    {
      "epoch": 54.78036175710594,
      "grad_norm": 0.835817813873291,
      "learning_rate": 0.0002717484926787253,
      "loss": 0.2988,
      "step": 21200
    },
    {
      "epoch": 54.90956072351421,
      "grad_norm": 0.8239279985427856,
      "learning_rate": 0.0002712101636520241,
      "loss": 0.2997,
      "step": 21250
    },
    {
      "epoch": 55.03875968992248,
      "grad_norm": 0.7811757326126099,
      "learning_rate": 0.000270671834625323,
      "loss": 0.2918,
      "step": 21300
    },
    {
      "epoch": 55.16795865633075,
      "grad_norm": 0.8200927376747131,
      "learning_rate": 0.00027013350559862187,
      "loss": 0.2597,
      "step": 21350
    },
    {
      "epoch": 55.29715762273902,
      "grad_norm": 0.8561047315597534,
      "learning_rate": 0.0002695951765719208,
      "loss": 0.2674,
      "step": 21400
    },
    {
      "epoch": 55.42635658914729,
      "grad_norm": 0.8218419551849365,
      "learning_rate": 0.00026905684754521966,
      "loss": 0.2667,
      "step": 21450
    },
    {
      "epoch": 55.55555555555556,
      "grad_norm": 0.7961956858634949,
      "learning_rate": 0.0002685185185185186,
      "loss": 0.2708,
      "step": 21500
    },
    {
      "epoch": 55.684754521963825,
      "grad_norm": 0.8763186931610107,
      "learning_rate": 0.0002679801894918174,
      "loss": 0.278,
      "step": 21550
    },
    {
      "epoch": 55.81395348837209,
      "grad_norm": 0.831415057182312,
      "learning_rate": 0.00026744186046511625,
      "loss": 0.2836,
      "step": 21600
    },
    {
      "epoch": 55.94315245478036,
      "grad_norm": 0.9739905595779419,
      "learning_rate": 0.00026690353143841517,
      "loss": 0.2871,
      "step": 21650
    },
    {
      "epoch": 56.07235142118863,
      "grad_norm": 0.8093137145042419,
      "learning_rate": 0.00026636520241171403,
      "loss": 0.2699,
      "step": 21700
    },
    {
      "epoch": 56.201550387596896,
      "grad_norm": 0.8015968203544617,
      "learning_rate": 0.00026582687338501295,
      "loss": 0.2509,
      "step": 21750
    },
    {
      "epoch": 56.33074935400517,
      "grad_norm": 0.7668253183364868,
      "learning_rate": 0.0002652885443583118,
      "loss": 0.2518,
      "step": 21800
    },
    {
      "epoch": 56.45994832041344,
      "grad_norm": 0.8243709802627563,
      "learning_rate": 0.0002647502153316107,
      "loss": 0.2555,
      "step": 21850
    },
    {
      "epoch": 56.58914728682171,
      "grad_norm": 0.7767107486724854,
      "learning_rate": 0.00026421188630490955,
      "loss": 0.2628,
      "step": 21900
    },
    {
      "epoch": 56.718346253229974,
      "grad_norm": 0.8016783595085144,
      "learning_rate": 0.00026367355727820846,
      "loss": 0.2663,
      "step": 21950
    },
    {
      "epoch": 56.84754521963824,
      "grad_norm": 0.8889478445053101,
      "learning_rate": 0.00026313522825150733,
      "loss": 0.2716,
      "step": 22000
    },
    {
      "epoch": 56.97674418604651,
      "grad_norm": 0.8272899985313416,
      "learning_rate": 0.00026259689922480625,
      "loss": 0.2792,
      "step": 22050
    },
    {
      "epoch": 57.10594315245478,
      "grad_norm": 0.7909784913063049,
      "learning_rate": 0.00026205857019810506,
      "loss": 0.2449,
      "step": 22100
    },
    {
      "epoch": 57.23514211886305,
      "grad_norm": 0.7938480973243713,
      "learning_rate": 0.000261520241171404,
      "loss": 0.2389,
      "step": 22150
    },
    {
      "epoch": 57.36434108527132,
      "grad_norm": 0.7748268842697144,
      "learning_rate": 0.00026098191214470284,
      "loss": 0.2412,
      "step": 22200
    },
    {
      "epoch": 57.49354005167959,
      "grad_norm": 0.7988633513450623,
      "learning_rate": 0.0002604435831180017,
      "loss": 0.2498,
      "step": 22250
    },
    {
      "epoch": 57.622739018087856,
      "grad_norm": 0.8084873557090759,
      "learning_rate": 0.0002599052540913006,
      "loss": 0.2498,
      "step": 22300
    },
    {
      "epoch": 57.751937984496124,
      "grad_norm": 0.7798044085502625,
      "learning_rate": 0.0002593669250645995,
      "loss": 0.2576,
      "step": 22350
    },
    {
      "epoch": 57.88113695090439,
      "grad_norm": 0.7969653010368347,
      "learning_rate": 0.00025882859603789835,
      "loss": 0.2576,
      "step": 22400
    },
    {
      "epoch": 58.01033591731266,
      "grad_norm": 0.738374650478363,
      "learning_rate": 0.0002582902670111972,
      "loss": 0.2621,
      "step": 22450
    },
    {
      "epoch": 58.13953488372093,
      "grad_norm": 0.7380902171134949,
      "learning_rate": 0.00025775193798449614,
      "loss": 0.2287,
      "step": 22500
    },
    {
      "epoch": 58.2687338501292,
      "grad_norm": 0.7839064002037048,
      "learning_rate": 0.000257213608957795,
      "loss": 0.2297,
      "step": 22550
    },
    {
      "epoch": 58.39793281653747,
      "grad_norm": 0.7798992395401001,
      "learning_rate": 0.0002566752799310939,
      "loss": 0.2285,
      "step": 22600
    },
    {
      "epoch": 58.52713178294574,
      "grad_norm": 0.8027578592300415,
      "learning_rate": 0.0002561369509043928,
      "loss": 0.2338,
      "step": 22650
    },
    {
      "epoch": 58.656330749354005,
      "grad_norm": 0.8049168586730957,
      "learning_rate": 0.00025559862187769165,
      "loss": 0.2425,
      "step": 22700
    },
    {
      "epoch": 58.78552971576227,
      "grad_norm": 0.7948811054229736,
      "learning_rate": 0.0002550602928509905,
      "loss": 0.2458,
      "step": 22750
    },
    {
      "epoch": 58.91472868217054,
      "grad_norm": 0.7933169007301331,
      "learning_rate": 0.00025452196382428943,
      "loss": 0.2506,
      "step": 22800
    },
    {
      "epoch": 59.04392764857881,
      "grad_norm": 0.8338164687156677,
      "learning_rate": 0.0002539836347975883,
      "loss": 0.2428,
      "step": 22850
    },
    {
      "epoch": 59.17312661498708,
      "grad_norm": 0.7746559977531433,
      "learning_rate": 0.00025344530577088716,
      "loss": 0.2206,
      "step": 22900
    },
    {
      "epoch": 59.30232558139535,
      "grad_norm": 0.7659971714019775,
      "learning_rate": 0.0002529069767441861,
      "loss": 0.2225,
      "step": 22950
    },
    {
      "epoch": 59.43152454780362,
      "grad_norm": 0.7558844685554504,
      "learning_rate": 0.0002523686477174849,
      "loss": 0.2211,
      "step": 23000
    },
    {
      "epoch": 59.56072351421189,
      "grad_norm": 0.7602396607398987,
      "learning_rate": 0.0002518303186907838,
      "loss": 0.2257,
      "step": 23050
    },
    {
      "epoch": 59.689922480620154,
      "grad_norm": 0.7975172400474548,
      "learning_rate": 0.0002512919896640827,
      "loss": 0.2311,
      "step": 23100
    },
    {
      "epoch": 59.81912144702842,
      "grad_norm": 0.748289167881012,
      "learning_rate": 0.0002507536606373816,
      "loss": 0.2327,
      "step": 23150
    },
    {
      "epoch": 59.94832041343669,
      "grad_norm": 0.7845473289489746,
      "learning_rate": 0.00025021533161068046,
      "loss": 0.2361,
      "step": 23200
    },
    {
      "epoch": 60.07751937984496,
      "grad_norm": 0.7066329717636108,
      "learning_rate": 0.0002496770025839793,
      "loss": 0.2208,
      "step": 23250
    },
    {
      "epoch": 60.20671834625323,
      "grad_norm": 0.7178154587745667,
      "learning_rate": 0.0002491386735572782,
      "loss": 0.206,
      "step": 23300
    },
    {
      "epoch": 60.3359173126615,
      "grad_norm": 0.7235272526741028,
      "learning_rate": 0.0002486003445305771,
      "loss": 0.2134,
      "step": 23350
    },
    {
      "epoch": 60.46511627906977,
      "grad_norm": 0.7305527925491333,
      "learning_rate": 0.00024806201550387597,
      "loss": 0.2179,
      "step": 23400
    },
    {
      "epoch": 60.594315245478036,
      "grad_norm": 0.7882553339004517,
      "learning_rate": 0.00024752368647717484,
      "loss": 0.2162,
      "step": 23450
    },
    {
      "epoch": 60.723514211886304,
      "grad_norm": 0.7648959755897522,
      "learning_rate": 0.00024698535745047376,
      "loss": 0.2205,
      "step": 23500
    },
    {
      "epoch": 60.85271317829457,
      "grad_norm": 0.815764844417572,
      "learning_rate": 0.0002464470284237726,
      "loss": 0.2245,
      "step": 23550
    },
    {
      "epoch": 60.98191214470284,
      "grad_norm": 0.7537245750427246,
      "learning_rate": 0.0002459086993970715,
      "loss": 0.2276,
      "step": 23600
    },
    {
      "epoch": 61.111111111111114,
      "grad_norm": 0.7311267256736755,
      "learning_rate": 0.0002453703703703704,
      "loss": 0.205,
      "step": 23650
    },
    {
      "epoch": 61.24031007751938,
      "grad_norm": 0.7263907790184021,
      "learning_rate": 0.00024483204134366927,
      "loss": 0.1989,
      "step": 23700
    },
    {
      "epoch": 61.36950904392765,
      "grad_norm": 0.7816259860992432,
      "learning_rate": 0.00024429371231696813,
      "loss": 0.2035,
      "step": 23750
    },
    {
      "epoch": 61.49870801033592,
      "grad_norm": 0.7522289752960205,
      "learning_rate": 0.00024375538329026702,
      "loss": 0.2057,
      "step": 23800
    },
    {
      "epoch": 61.627906976744185,
      "grad_norm": 0.7341068983078003,
      "learning_rate": 0.0002432170542635659,
      "loss": 0.2111,
      "step": 23850
    },
    {
      "epoch": 61.75710594315245,
      "grad_norm": 0.7298462390899658,
      "learning_rate": 0.00024267872523686475,
      "loss": 0.212,
      "step": 23900
    },
    {
      "epoch": 61.88630490956072,
      "grad_norm": 0.7333064079284668,
      "learning_rate": 0.00024214039621016365,
      "loss": 0.2159,
      "step": 23950
    },
    {
      "epoch": 62.01550387596899,
      "grad_norm": 0.7273525595664978,
      "learning_rate": 0.00024160206718346254,
      "loss": 0.2215,
      "step": 24000
    },
    {
      "epoch": 62.144702842377264,
      "grad_norm": 0.6884973645210266,
      "learning_rate": 0.0002410637381567614,
      "loss": 0.1927,
      "step": 24050
    },
    {
      "epoch": 62.27390180878553,
      "grad_norm": 0.714361846446991,
      "learning_rate": 0.0002405254091300603,
      "loss": 0.19,
      "step": 24100
    },
    {
      "epoch": 62.4031007751938,
      "grad_norm": 0.724731981754303,
      "learning_rate": 0.00023998708010335919,
      "loss": 0.1936,
      "step": 24150
    },
    {
      "epoch": 62.53229974160207,
      "grad_norm": 0.7415257096290588,
      "learning_rate": 0.00023944875107665805,
      "loss": 0.2025,
      "step": 24200
    },
    {
      "epoch": 62.661498708010335,
      "grad_norm": 0.7367540597915649,
      "learning_rate": 0.00023891042204995694,
      "loss": 0.1998,
      "step": 24250
    },
    {
      "epoch": 62.7906976744186,
      "grad_norm": 0.758507490158081,
      "learning_rate": 0.00023837209302325583,
      "loss": 0.2022,
      "step": 24300
    },
    {
      "epoch": 62.91989664082687,
      "grad_norm": 0.7706499695777893,
      "learning_rate": 0.0002378337639965547,
      "loss": 0.21,
      "step": 24350
    },
    {
      "epoch": 63.049095607235145,
      "grad_norm": 0.6982904672622681,
      "learning_rate": 0.0002372954349698536,
      "loss": 0.2028,
      "step": 24400
    },
    {
      "epoch": 63.17829457364341,
      "grad_norm": 0.7011119723320007,
      "learning_rate": 0.00023675710594315248,
      "loss": 0.1856,
      "step": 24450
    },
    {
      "epoch": 63.30749354005168,
      "grad_norm": 0.732163667678833,
      "learning_rate": 0.00023621877691645132,
      "loss": 0.185,
      "step": 24500
    },
    {
      "epoch": 63.43669250645995,
      "grad_norm": 0.6784104108810425,
      "learning_rate": 0.0002356804478897502,
      "loss": 0.1885,
      "step": 24550
    },
    {
      "epoch": 63.565891472868216,
      "grad_norm": 0.7086354494094849,
      "learning_rate": 0.0002351421188630491,
      "loss": 0.192,
      "step": 24600
    },
    {
      "epoch": 63.695090439276484,
      "grad_norm": 0.7011638879776001,
      "learning_rate": 0.00023460378983634797,
      "loss": 0.1935,
      "step": 24650
    },
    {
      "epoch": 63.82428940568475,
      "grad_norm": 0.7638087272644043,
      "learning_rate": 0.00023406546080964686,
      "loss": 0.1991,
      "step": 24700
    },
    {
      "epoch": 63.95348837209303,
      "grad_norm": 0.7378659844398499,
      "learning_rate": 0.00023352713178294575,
      "loss": 0.2011,
      "step": 24750
    },
    {
      "epoch": 64.08268733850129,
      "grad_norm": 0.6781746745109558,
      "learning_rate": 0.00023298880275624462,
      "loss": 0.1856,
      "step": 24800
    },
    {
      "epoch": 64.21188630490956,
      "grad_norm": 0.7497926950454712,
      "learning_rate": 0.0002324504737295435,
      "loss": 0.1763,
      "step": 24850
    },
    {
      "epoch": 64.34108527131782,
      "grad_norm": 0.7481130957603455,
      "learning_rate": 0.0002319121447028424,
      "loss": 0.1816,
      "step": 24900
    },
    {
      "epoch": 64.4702842377261,
      "grad_norm": 0.7092560529708862,
      "learning_rate": 0.00023137381567614126,
      "loss": 0.1816,
      "step": 24950
    },
    {
      "epoch": 64.59948320413437,
      "grad_norm": 0.6845375895500183,
      "learning_rate": 0.00023083548664944015,
      "loss": 0.1834,
      "step": 25000
    },
    {
      "epoch": 64.72868217054264,
      "grad_norm": 0.7465250492095947,
      "learning_rate": 0.00023029715762273905,
      "loss": 0.1881,
      "step": 25050
    },
    {
      "epoch": 64.85788113695091,
      "grad_norm": 0.6931895613670349,
      "learning_rate": 0.0002297588285960379,
      "loss": 0.1908,
      "step": 25100
    },
    {
      "epoch": 64.98708010335918,
      "grad_norm": 0.7207470536231995,
      "learning_rate": 0.00022922049956933678,
      "loss": 0.1924,
      "step": 25150
    },
    {
      "epoch": 65.11627906976744,
      "grad_norm": 0.6782975792884827,
      "learning_rate": 0.00022868217054263564,
      "loss": 0.1693,
      "step": 25200
    },
    {
      "epoch": 65.24547803617571,
      "grad_norm": 0.7212117910385132,
      "learning_rate": 0.00022814384151593453,
      "loss": 0.1727,
      "step": 25250
    },
    {
      "epoch": 65.37467700258398,
      "grad_norm": 0.7126914858818054,
      "learning_rate": 0.00022760551248923342,
      "loss": 0.1721,
      "step": 25300
    },
    {
      "epoch": 65.50387596899225,
      "grad_norm": 0.7192057371139526,
      "learning_rate": 0.0002270671834625323,
      "loss": 0.1746,
      "step": 25350
    },
    {
      "epoch": 65.63307493540051,
      "grad_norm": 0.6786862015724182,
      "learning_rate": 0.00022652885443583118,
      "loss": 0.1784,
      "step": 25400
    },
    {
      "epoch": 65.76227390180878,
      "grad_norm": 0.6805238723754883,
      "learning_rate": 0.00022599052540913007,
      "loss": 0.18,
      "step": 25450
    },
    {
      "epoch": 65.89147286821705,
      "grad_norm": 0.7692633867263794,
      "learning_rate": 0.00022545219638242894,
      "loss": 0.1846,
      "step": 25500
    },
    {
      "epoch": 66.02067183462532,
      "grad_norm": 0.6935442686080933,
      "learning_rate": 0.00022491386735572783,
      "loss": 0.1813,
      "step": 25550
    },
    {
      "epoch": 66.14987080103359,
      "grad_norm": 0.7003860473632812,
      "learning_rate": 0.00022437553832902672,
      "loss": 0.1613,
      "step": 25600
    },
    {
      "epoch": 66.27906976744185,
      "grad_norm": 0.7504058480262756,
      "learning_rate": 0.00022383720930232558,
      "loss": 0.1623,
      "step": 25650
    },
    {
      "epoch": 66.40826873385014,
      "grad_norm": 0.6752199530601501,
      "learning_rate": 0.00022329888027562448,
      "loss": 0.166,
      "step": 25700
    },
    {
      "epoch": 66.5374677002584,
      "grad_norm": 0.6579027771949768,
      "learning_rate": 0.00022276055124892337,
      "loss": 0.1699,
      "step": 25750
    },
    {
      "epoch": 66.66666666666667,
      "grad_norm": 0.6829591989517212,
      "learning_rate": 0.0002222222222222222,
      "loss": 0.1732,
      "step": 25800
    },
    {
      "epoch": 66.79586563307494,
      "grad_norm": 0.6764296889305115,
      "learning_rate": 0.0002216838931955211,
      "loss": 0.1727,
      "step": 25850
    },
    {
      "epoch": 66.9250645994832,
      "grad_norm": 0.7083727717399597,
      "learning_rate": 0.00022114556416882,
      "loss": 0.1772,
      "step": 25900
    },
    {
      "epoch": 67.05426356589147,
      "grad_norm": 0.619489848613739,
      "learning_rate": 0.00022060723514211885,
      "loss": 0.1693,
      "step": 25950
    },
    {
      "epoch": 67.18346253229974,
      "grad_norm": 0.6532468199729919,
      "learning_rate": 0.00022006890611541775,
      "loss": 0.1555,
      "step": 26000
    },
    {
      "epoch": 67.31266149870801,
      "grad_norm": 0.6415907144546509,
      "learning_rate": 0.00021953057708871664,
      "loss": 0.1622,
      "step": 26050
    },
    {
      "epoch": 67.44186046511628,
      "grad_norm": 0.7382169365882874,
      "learning_rate": 0.0002189922480620155,
      "loss": 0.1586,
      "step": 26100
    },
    {
      "epoch": 67.57105943152455,
      "grad_norm": 0.6522275805473328,
      "learning_rate": 0.0002184539190353144,
      "loss": 0.164,
      "step": 26150
    },
    {
      "epoch": 67.70025839793281,
      "grad_norm": 0.6581871509552002,
      "learning_rate": 0.00021791559000861329,
      "loss": 0.1618,
      "step": 26200
    },
    {
      "epoch": 67.82945736434108,
      "grad_norm": 0.6378141045570374,
      "learning_rate": 0.00021737726098191215,
      "loss": 0.1678,
      "step": 26250
    },
    {
      "epoch": 67.95865633074935,
      "grad_norm": 0.6920313835144043,
      "learning_rate": 0.00021683893195521104,
      "loss": 0.1705,
      "step": 26300
    },
    {
      "epoch": 68.08785529715762,
      "grad_norm": 0.6803005933761597,
      "learning_rate": 0.00021630060292850993,
      "loss": 0.156,
      "step": 26350
    },
    {
      "epoch": 68.21705426356588,
      "grad_norm": 0.670342206954956,
      "learning_rate": 0.0002157622739018088,
      "loss": 0.1517,
      "step": 26400
    },
    {
      "epoch": 68.34625322997417,
      "grad_norm": 0.6676268577575684,
      "learning_rate": 0.00021522394487510766,
      "loss": 0.1539,
      "step": 26450
    },
    {
      "epoch": 68.47545219638243,
      "grad_norm": 0.7096660137176514,
      "learning_rate": 0.00021468561584840653,
      "loss": 0.1537,
      "step": 26500
    },
    {
      "epoch": 68.6046511627907,
      "grad_norm": 0.6787480711936951,
      "learning_rate": 0.00021414728682170542,
      "loss": 0.1571,
      "step": 26550
    },
    {
      "epoch": 68.73385012919897,
      "grad_norm": 0.6823030710220337,
      "learning_rate": 0.0002136089577950043,
      "loss": 0.1621,
      "step": 26600
    },
    {
      "epoch": 68.86304909560724,
      "grad_norm": 0.7846039533615112,
      "learning_rate": 0.00021307062876830318,
      "loss": 0.163,
      "step": 26650
    },
    {
      "epoch": 68.9922480620155,
      "grad_norm": 0.7133269309997559,
      "learning_rate": 0.00021253229974160207,
      "loss": 0.1628,
      "step": 26700
    },
    {
      "epoch": 69.12144702842377,
      "grad_norm": 0.6047943830490112,
      "learning_rate": 0.00021199397071490096,
      "loss": 0.1463,
      "step": 26750
    },
    {
      "epoch": 69.25064599483204,
      "grad_norm": 0.6206647753715515,
      "learning_rate": 0.00021145564168819982,
      "loss": 0.1442,
      "step": 26800
    },
    {
      "epoch": 69.37984496124031,
      "grad_norm": 0.5883957147598267,
      "learning_rate": 0.00021091731266149872,
      "loss": 0.1462,
      "step": 26850
    },
    {
      "epoch": 69.50904392764858,
      "grad_norm": 0.6892290711402893,
      "learning_rate": 0.0002103789836347976,
      "loss": 0.1491,
      "step": 26900
    },
    {
      "epoch": 69.63824289405684,
      "grad_norm": 0.6859474778175354,
      "learning_rate": 0.00020984065460809647,
      "loss": 0.1498,
      "step": 26950
    },
    {
      "epoch": 69.76744186046511,
      "grad_norm": 0.6671847701072693,
      "learning_rate": 0.00020930232558139536,
      "loss": 0.1525,
      "step": 27000
    },
    {
      "epoch": 69.89664082687338,
      "grad_norm": 0.6607747077941895,
      "learning_rate": 0.00020876399655469426,
      "loss": 0.1577,
      "step": 27050
    },
    {
      "epoch": 70.02583979328165,
      "grad_norm": 0.6253509521484375,
      "learning_rate": 0.0002082256675279931,
      "loss": 0.1529,
      "step": 27100
    },
    {
      "epoch": 70.15503875968992,
      "grad_norm": 0.6366680264472961,
      "learning_rate": 0.00020768733850129198,
      "loss": 0.1407,
      "step": 27150
    },
    {
      "epoch": 70.2842377260982,
      "grad_norm": 0.6100383996963501,
      "learning_rate": 0.00020714900947459088,
      "loss": 0.1428,
      "step": 27200
    },
    {
      "epoch": 70.41343669250647,
      "grad_norm": 0.6114051342010498,
      "learning_rate": 0.00020661068044788974,
      "loss": 0.1419,
      "step": 27250
    },
    {
      "epoch": 70.54263565891473,
      "grad_norm": 0.6189208030700684,
      "learning_rate": 0.00020607235142118863,
      "loss": 0.1425,
      "step": 27300
    },
    {
      "epoch": 70.671834625323,
      "grad_norm": 0.6425594091415405,
      "learning_rate": 0.00020553402239448752,
      "loss": 0.145,
      "step": 27350
    },
    {
      "epoch": 70.80103359173127,
      "grad_norm": 0.633064866065979,
      "learning_rate": 0.0002049956933677864,
      "loss": 0.1501,
      "step": 27400
    },
    {
      "epoch": 70.93023255813954,
      "grad_norm": 0.6031843423843384,
      "learning_rate": 0.00020445736434108528,
      "loss": 0.1477,
      "step": 27450
    },
    {
      "epoch": 71.0594315245478,
      "grad_norm": 0.6118212342262268,
      "learning_rate": 0.00020391903531438417,
      "loss": 0.1458,
      "step": 27500
    },
    {
      "epoch": 71.18863049095607,
      "grad_norm": 0.5871561765670776,
      "learning_rate": 0.00020338070628768304,
      "loss": 0.1352,
      "step": 27550
    },
    {
      "epoch": 71.31782945736434,
      "grad_norm": 0.6446227431297302,
      "learning_rate": 0.00020284237726098193,
      "loss": 0.1344,
      "step": 27600
    },
    {
      "epoch": 71.44702842377261,
      "grad_norm": 0.6453047394752502,
      "learning_rate": 0.00020230404823428082,
      "loss": 0.1355,
      "step": 27650
    },
    {
      "epoch": 71.57622739018088,
      "grad_norm": 0.6546571254730225,
      "learning_rate": 0.00020176571920757968,
      "loss": 0.1393,
      "step": 27700
    },
    {
      "epoch": 71.70542635658914,
      "grad_norm": 0.6440081596374512,
      "learning_rate": 0.00020122739018087855,
      "loss": 0.1385,
      "step": 27750
    },
    {
      "epoch": 71.83462532299741,
      "grad_norm": 0.6724774241447449,
      "learning_rate": 0.00020068906115417741,
      "loss": 0.1436,
      "step": 27800
    },
    {
      "epoch": 71.96382428940568,
      "grad_norm": 0.6848536133766174,
      "learning_rate": 0.0002001507321274763,
      "loss": 0.1434,
      "step": 27850
    },
    {
      "epoch": 72.09302325581395,
      "grad_norm": 0.617206335067749,
      "learning_rate": 0.0001996124031007752,
      "loss": 0.1333,
      "step": 27900
    },
    {
      "epoch": 72.22222222222223,
      "grad_norm": 0.599015474319458,
      "learning_rate": 0.00019907407407407406,
      "loss": 0.1298,
      "step": 27950
    },
    {
      "epoch": 72.3514211886305,
      "grad_norm": 0.6199496388435364,
      "learning_rate": 0.00019853574504737295,
      "loss": 0.1331,
      "step": 28000
    },
    {
      "epoch": 72.48062015503876,
      "grad_norm": 0.5886519551277161,
      "learning_rate": 0.00019799741602067185,
      "loss": 0.1322,
      "step": 28050
    },
    {
      "epoch": 72.60981912144703,
      "grad_norm": 0.6261482834815979,
      "learning_rate": 0.0001974590869939707,
      "loss": 0.1327,
      "step": 28100
    },
    {
      "epoch": 72.7390180878553,
      "grad_norm": 0.5919719934463501,
      "learning_rate": 0.0001969207579672696,
      "loss": 0.1369,
      "step": 28150
    },
    {
      "epoch": 72.86821705426357,
      "grad_norm": 0.618654191493988,
      "learning_rate": 0.0001963824289405685,
      "loss": 0.1371,
      "step": 28200
    },
    {
      "epoch": 72.99741602067184,
      "grad_norm": 0.6660842299461365,
      "learning_rate": 0.00019584409991386736,
      "loss": 0.1395,
      "step": 28250
    },
    {
      "epoch": 73.1266149870801,
      "grad_norm": 0.6433922648429871,
      "learning_rate": 0.00019530577088716625,
      "loss": 0.1277,
      "step": 28300
    },
    {
      "epoch": 73.25581395348837,
      "grad_norm": 0.6188713312149048,
      "learning_rate": 0.00019476744186046514,
      "loss": 0.1244,
      "step": 28350
    },
    {
      "epoch": 73.38501291989664,
      "grad_norm": 0.6060723662376404,
      "learning_rate": 0.00019422911283376398,
      "loss": 0.124,
      "step": 28400
    },
    {
      "epoch": 73.5142118863049,
      "grad_norm": 0.6420648694038391,
      "learning_rate": 0.00019369078380706287,
      "loss": 0.1289,
      "step": 28450
    },
    {
      "epoch": 73.64341085271317,
      "grad_norm": 0.6359677910804749,
      "learning_rate": 0.00019315245478036176,
      "loss": 0.1288,
      "step": 28500
    },
    {
      "epoch": 73.77260981912144,
      "grad_norm": 0.5918629169464111,
      "learning_rate": 0.00019261412575366063,
      "loss": 0.1298,
      "step": 28550
    },
    {
      "epoch": 73.90180878552971,
      "grad_norm": 0.6620613932609558,
      "learning_rate": 0.00019207579672695952,
      "loss": 0.1358,
      "step": 28600
    },
    {
      "epoch": 74.03100775193798,
      "grad_norm": 0.6626524925231934,
      "learning_rate": 0.0001915374677002584,
      "loss": 0.1297,
      "step": 28650
    },
    {
      "epoch": 74.16020671834626,
      "grad_norm": 0.5902724862098694,
      "learning_rate": 0.00019099913867355728,
      "loss": 0.121,
      "step": 28700
    },
    {
      "epoch": 74.28940568475453,
      "grad_norm": 0.6111683249473572,
      "learning_rate": 0.00019046080964685617,
      "loss": 0.1206,
      "step": 28750
    },
    {
      "epoch": 74.4186046511628,
      "grad_norm": 0.5692412257194519,
      "learning_rate": 0.00018992248062015506,
      "loss": 0.119,
      "step": 28800
    },
    {
      "epoch": 74.54780361757106,
      "grad_norm": 0.6336855292320251,
      "learning_rate": 0.00018938415159345392,
      "loss": 0.1197,
      "step": 28850
    },
    {
      "epoch": 74.67700258397933,
      "grad_norm": 0.6091431379318237,
      "learning_rate": 0.00018884582256675282,
      "loss": 0.1237,
      "step": 28900
    },
    {
      "epoch": 74.8062015503876,
      "grad_norm": 0.6267944574356079,
      "learning_rate": 0.0001883074935400517,
      "loss": 0.1231,
      "step": 28950
    },
    {
      "epoch": 74.93540051679587,
      "grad_norm": 0.639306366443634,
      "learning_rate": 0.00018776916451335057,
      "loss": 0.1279,
      "step": 29000
    },
    {
      "epoch": 75.06459948320413,
      "grad_norm": 0.5728051066398621,
      "learning_rate": 0.00018723083548664944,
      "loss": 0.1212,
      "step": 29050
    },
    {
      "epoch": 75.1937984496124,
      "grad_norm": 0.6017086505889893,
      "learning_rate": 0.0001866925064599483,
      "loss": 0.1139,
      "step": 29100
    },
    {
      "epoch": 75.32299741602067,
      "grad_norm": 0.5609886050224304,
      "learning_rate": 0.0001861541774332472,
      "loss": 0.1152,
      "step": 29150
    },
    {
      "epoch": 75.45219638242894,
      "grad_norm": 0.5853701233863831,
      "learning_rate": 0.00018561584840654608,
      "loss": 0.1154,
      "step": 29200
    },
    {
      "epoch": 75.5813953488372,
      "grad_norm": 0.6056568026542664,
      "learning_rate": 0.00018507751937984495,
      "loss": 0.1166,
      "step": 29250
    },
    {
      "epoch": 75.71059431524547,
      "grad_norm": 0.5850881338119507,
      "learning_rate": 0.00018453919035314384,
      "loss": 0.1186,
      "step": 29300
    },
    {
      "epoch": 75.83979328165374,
      "grad_norm": 0.5566284656524658,
      "learning_rate": 0.00018400086132644273,
      "loss": 0.1195,
      "step": 29350
    },
    {
      "epoch": 75.96899224806202,
      "grad_norm": 0.6375340223312378,
      "learning_rate": 0.0001834625322997416,
      "loss": 0.1211,
      "step": 29400
    },
    {
      "epoch": 76.09819121447029,
      "grad_norm": 0.5630626678466797,
      "learning_rate": 0.0001829242032730405,
      "loss": 0.1144,
      "step": 29450
    },
    {
      "epoch": 76.22739018087856,
      "grad_norm": 0.5619855523109436,
      "learning_rate": 0.00018238587424633938,
      "loss": 0.1117,
      "step": 29500
    },
    {
      "epoch": 76.35658914728683,
      "grad_norm": 0.6064419150352478,
      "learning_rate": 0.00018184754521963825,
      "loss": 0.112,
      "step": 29550
    },
    {
      "epoch": 76.4857881136951,
      "grad_norm": 0.6273238062858582,
      "learning_rate": 0.00018130921619293714,
      "loss": 0.1154,
      "step": 29600
    },
    {
      "epoch": 76.61498708010336,
      "grad_norm": 0.5801479816436768,
      "learning_rate": 0.00018077088716623603,
      "loss": 0.1124,
      "step": 29650
    },
    {
      "epoch": 76.74418604651163,
      "grad_norm": 0.578396201133728,
      "learning_rate": 0.00018023255813953487,
      "loss": 0.1144,
      "step": 29700
    },
    {
      "epoch": 76.8733850129199,
      "grad_norm": 0.6003342866897583,
      "learning_rate": 0.00017969422911283376,
      "loss": 0.1158,
      "step": 29750
    },
    {
      "epoch": 77.00258397932816,
      "grad_norm": 0.5574929118156433,
      "learning_rate": 0.00017915590008613265,
      "loss": 0.1174,
      "step": 29800
    },
    {
      "epoch": 77.13178294573643,
      "grad_norm": 0.6395805478096008,
      "learning_rate": 0.00017861757105943151,
      "loss": 0.1059,
      "step": 29850
    },
    {
      "epoch": 77.2609819121447,
      "grad_norm": 0.5699200630187988,
      "learning_rate": 0.0001780792420327304,
      "loss": 0.1066,
      "step": 29900
    },
    {
      "epoch": 77.39018087855297,
      "grad_norm": 0.5334694385528564,
      "learning_rate": 0.0001775409130060293,
      "loss": 0.1081,
      "step": 29950
    },
    {
      "epoch": 77.51937984496124,
      "grad_norm": 0.5451510548591614,
      "learning_rate": 0.00017700258397932816,
      "loss": 0.1083,
      "step": 30000
    },
    {
      "epoch": 77.6485788113695,
      "grad_norm": 0.5868716835975647,
      "learning_rate": 0.00017646425495262705,
      "loss": 0.1085,
      "step": 30050
    },
    {
      "epoch": 77.77777777777777,
      "grad_norm": 0.662187397480011,
      "learning_rate": 0.00017592592592592595,
      "loss": 0.1101,
      "step": 30100
    },
    {
      "epoch": 77.90697674418605,
      "grad_norm": 0.5743542313575745,
      "learning_rate": 0.0001753875968992248,
      "loss": 0.1105,
      "step": 30150
    },
    {
      "epoch": 78.03617571059432,
      "grad_norm": 0.5407698750495911,
      "learning_rate": 0.0001748492678725237,
      "loss": 0.1083,
      "step": 30200
    },
    {
      "epoch": 78.16537467700259,
      "grad_norm": 0.5431357026100159,
      "learning_rate": 0.0001743109388458226,
      "loss": 0.1034,
      "step": 30250
    },
    {
      "epoch": 78.29457364341086,
      "grad_norm": 0.5634459853172302,
      "learning_rate": 0.00017377260981912146,
      "loss": 0.1018,
      "step": 30300
    },
    {
      "epoch": 78.42377260981912,
      "grad_norm": 0.5569723844528198,
      "learning_rate": 0.00017323428079242032,
      "loss": 0.1054,
      "step": 30350
    },
    {
      "epoch": 78.55297157622739,
      "grad_norm": 0.542418897151947,
      "learning_rate": 0.0001726959517657192,
      "loss": 0.1048,
      "step": 30400
    },
    {
      "epoch": 78.68217054263566,
      "grad_norm": 0.5264263153076172,
      "learning_rate": 0.00017215762273901808,
      "loss": 0.1054,
      "step": 30450
    },
    {
      "epoch": 78.81136950904393,
      "grad_norm": 0.56622713804245,
      "learning_rate": 0.00017161929371231697,
      "loss": 0.1059,
      "step": 30500
    },
    {
      "epoch": 78.9405684754522,
      "grad_norm": 0.588805615901947,
      "learning_rate": 0.00017108096468561584,
      "loss": 0.1097,
      "step": 30550
    },
    {
      "epoch": 79.06976744186046,
      "grad_norm": 0.5106987953186035,
      "learning_rate": 0.00017054263565891473,
      "loss": 0.1013,
      "step": 30600
    },
    {
      "epoch": 79.19896640826873,
      "grad_norm": 0.5502251982688904,
      "learning_rate": 0.00017000430663221362,
      "loss": 0.0998,
      "step": 30650
    },
    {
      "epoch": 79.328165374677,
      "grad_norm": 0.5388398170471191,
      "learning_rate": 0.00016946597760551248,
      "loss": 0.0984,
      "step": 30700
    },
    {
      "epoch": 79.45736434108527,
      "grad_norm": 0.5261983275413513,
      "learning_rate": 0.00016892764857881138,
      "loss": 0.1016,
      "step": 30750
    },
    {
      "epoch": 79.58656330749353,
      "grad_norm": 0.5288842916488647,
      "learning_rate": 0.00016838931955211027,
      "loss": 0.1017,
      "step": 30800
    },
    {
      "epoch": 79.7157622739018,
      "grad_norm": 0.5714542865753174,
      "learning_rate": 0.00016785099052540913,
      "loss": 0.1022,
      "step": 30850
    },
    {
      "epoch": 79.84496124031008,
      "grad_norm": 0.5694677829742432,
      "learning_rate": 0.00016731266149870802,
      "loss": 0.1032,
      "step": 30900
    },
    {
      "epoch": 79.97416020671835,
      "grad_norm": 0.5570291876792908,
      "learning_rate": 0.00016677433247200692,
      "loss": 0.1049,
      "step": 30950
    },
    {
      "epoch": 80.10335917312662,
      "grad_norm": 0.545073390007019,
      "learning_rate": 0.00016623600344530575,
      "loss": 0.0965,
      "step": 31000
    },
    {
      "epoch": 80.23255813953489,
      "grad_norm": 0.5710629820823669,
      "learning_rate": 0.00016569767441860464,
      "loss": 0.0938,
      "step": 31050
    },
    {
      "epoch": 80.36175710594316,
      "grad_norm": 0.5597730278968811,
      "learning_rate": 0.00016515934539190354,
      "loss": 0.0947,
      "step": 31100
    },
    {
      "epoch": 80.49095607235142,
      "grad_norm": 0.5450734496116638,
      "learning_rate": 0.0001646210163652024,
      "loss": 0.0981,
      "step": 31150
    },
    {
      "epoch": 80.62015503875969,
      "grad_norm": 0.5754163861274719,
      "learning_rate": 0.0001640826873385013,
      "loss": 0.0978,
      "step": 31200
    },
    {
      "epoch": 80.74935400516796,
      "grad_norm": 0.5538095831871033,
      "learning_rate": 0.00016354435831180018,
      "loss": 0.0986,
      "step": 31250
    },
    {
      "epoch": 80.87855297157623,
      "grad_norm": 0.5806236267089844,
      "learning_rate": 0.00016300602928509905,
      "loss": 0.1018,
      "step": 31300
    },
    {
      "epoch": 81.0077519379845,
      "grad_norm": 0.5255786180496216,
      "learning_rate": 0.00016246770025839794,
      "loss": 0.1007,
      "step": 31350
    },
    {
      "epoch": 81.13695090439276,
      "grad_norm": 0.5184102654457092,
      "learning_rate": 0.00016192937123169683,
      "loss": 0.0908,
      "step": 31400
    },
    {
      "epoch": 81.26614987080103,
      "grad_norm": 0.49886295199394226,
      "learning_rate": 0.0001613910422049957,
      "loss": 0.0915,
      "step": 31450
    },
    {
      "epoch": 81.3953488372093,
      "grad_norm": 0.5606934428215027,
      "learning_rate": 0.0001608527131782946,
      "loss": 0.0937,
      "step": 31500
    },
    {
      "epoch": 81.52454780361757,
      "grad_norm": 0.5387184023857117,
      "learning_rate": 0.00016031438415159348,
      "loss": 0.0942,
      "step": 31550
    },
    {
      "epoch": 81.65374677002583,
      "grad_norm": 0.5116854310035706,
      "learning_rate": 0.00015977605512489235,
      "loss": 0.0931,
      "step": 31600
    },
    {
      "epoch": 81.78294573643412,
      "grad_norm": 0.5231042504310608,
      "learning_rate": 0.0001592377260981912,
      "loss": 0.095,
      "step": 31650
    },
    {
      "epoch": 81.91214470284238,
      "grad_norm": 0.5129603147506714,
      "learning_rate": 0.00015869939707149007,
      "loss": 0.0944,
      "step": 31700
    },
    {
      "epoch": 82.04134366925065,
      "grad_norm": 0.5245692729949951,
      "learning_rate": 0.00015816106804478897,
      "loss": 0.0949,
      "step": 31750
    },
    {
      "epoch": 82.17054263565892,
      "grad_norm": 0.5415670275688171,
      "learning_rate": 0.00015762273901808786,
      "loss": 0.0883,
      "step": 31800
    },
    {
      "epoch": 82.29974160206719,
      "grad_norm": 0.5072740316390991,
      "learning_rate": 0.00015708440999138672,
      "loss": 0.0879,
      "step": 31850
    },
    {
      "epoch": 82.42894056847545,
      "grad_norm": 0.5251570343971252,
      "learning_rate": 0.00015654608096468561,
      "loss": 0.0887,
      "step": 31900
    },
    {
      "epoch": 82.55813953488372,
      "grad_norm": 0.4759777784347534,
      "learning_rate": 0.0001560077519379845,
      "loss": 0.089,
      "step": 31950
    },
    {
      "epoch": 82.68733850129199,
      "grad_norm": 0.5312922596931458,
      "learning_rate": 0.00015546942291128337,
      "loss": 0.089,
      "step": 32000
    },
    {
      "epoch": 82.81653746770026,
      "grad_norm": 0.49206650257110596,
      "learning_rate": 0.00015493109388458226,
      "loss": 0.0903,
      "step": 32050
    },
    {
      "epoch": 82.94573643410853,
      "grad_norm": 0.5158073902130127,
      "learning_rate": 0.00015439276485788115,
      "loss": 0.0938,
      "step": 32100
    },
    {
      "epoch": 83.0749354005168,
      "grad_norm": 0.5415765643119812,
      "learning_rate": 0.00015385443583118002,
      "loss": 0.0877,
      "step": 32150
    },
    {
      "epoch": 83.20413436692506,
      "grad_norm": 0.5216255784034729,
      "learning_rate": 0.0001533161068044789,
      "loss": 0.0833,
      "step": 32200
    },
    {
      "epoch": 83.33333333333333,
      "grad_norm": 0.5101693868637085,
      "learning_rate": 0.0001527777777777778,
      "loss": 0.0821,
      "step": 32250
    },
    {
      "epoch": 83.4625322997416,
      "grad_norm": 0.5045164823532104,
      "learning_rate": 0.00015223944875107664,
      "loss": 0.0841,
      "step": 32300
    },
    {
      "epoch": 83.59173126614986,
      "grad_norm": 0.5058096051216125,
      "learning_rate": 0.00015170111972437553,
      "loss": 0.086,
      "step": 32350
    },
    {
      "epoch": 83.72093023255815,
      "grad_norm": 0.5525325536727905,
      "learning_rate": 0.00015116279069767442,
      "loss": 0.0892,
      "step": 32400
    },
    {
      "epoch": 83.85012919896641,
      "grad_norm": 0.5055533051490784,
      "learning_rate": 0.0001506244616709733,
      "loss": 0.0877,
      "step": 32450
    },
    {
      "epoch": 83.97932816537468,
      "grad_norm": 0.5254034996032715,
      "learning_rate": 0.00015008613264427218,
      "loss": 0.0868,
      "step": 32500
    },
    {
      "epoch": 84.10852713178295,
      "grad_norm": 0.5174809694290161,
      "learning_rate": 0.00014954780361757107,
      "loss": 0.0836,
      "step": 32550
    },
    {
      "epoch": 84.23772609819122,
      "grad_norm": 0.5142568349838257,
      "learning_rate": 0.00014900947459086994,
      "loss": 0.0801,
      "step": 32600
    },
    {
      "epoch": 84.36692506459949,
      "grad_norm": 0.5007867217063904,
      "learning_rate": 0.00014847114556416883,
      "loss": 0.0818,
      "step": 32650
    },
    {
      "epoch": 84.49612403100775,
      "grad_norm": 0.5386996865272522,
      "learning_rate": 0.00014793281653746772,
      "loss": 0.0833,
      "step": 32700
    },
    {
      "epoch": 84.62532299741602,
      "grad_norm": 0.5061959624290466,
      "learning_rate": 0.00014739448751076658,
      "loss": 0.0819,
      "step": 32750
    },
    {
      "epoch": 84.75452196382429,
      "grad_norm": 0.5171679854393005,
      "learning_rate": 0.00014685615848406548,
      "loss": 0.0841,
      "step": 32800
    },
    {
      "epoch": 84.88372093023256,
      "grad_norm": 0.50377357006073,
      "learning_rate": 0.00014631782945736437,
      "loss": 0.0839,
      "step": 32850
    },
    {
      "epoch": 85.01291989664082,
      "grad_norm": 0.5096918344497681,
      "learning_rate": 0.00014577950043066323,
      "loss": 0.0847,
      "step": 32900
    },
    {
      "epoch": 85.14211886304909,
      "grad_norm": 0.46912992000579834,
      "learning_rate": 0.0001452411714039621,
      "loss": 0.0777,
      "step": 32950
    },
    {
      "epoch": 85.27131782945736,
      "grad_norm": 0.4951119124889374,
      "learning_rate": 0.00014470284237726096,
      "loss": 0.0788,
      "step": 33000
    },
    {
      "epoch": 85.40051679586563,
      "grad_norm": 0.4560762345790863,
      "learning_rate": 0.00014416451335055985,
      "loss": 0.0787,
      "step": 33050
    },
    {
      "epoch": 85.5297157622739,
      "grad_norm": 0.5237986445426941,
      "learning_rate": 0.00014362618432385874,
      "loss": 0.079,
      "step": 33100
    },
    {
      "epoch": 85.65891472868218,
      "grad_norm": 0.4753262400627136,
      "learning_rate": 0.0001430878552971576,
      "loss": 0.0784,
      "step": 33150
    },
    {
      "epoch": 85.78811369509044,
      "grad_norm": 0.5306388735771179,
      "learning_rate": 0.0001425495262704565,
      "loss": 0.0797,
      "step": 33200
    },
    {
      "epoch": 85.91731266149871,
      "grad_norm": 0.4896502196788788,
      "learning_rate": 0.0001420111972437554,
      "loss": 0.0816,
      "step": 33250
    },
    {
      "epoch": 86.04651162790698,
      "grad_norm": 0.4777597486972809,
      "learning_rate": 0.00014147286821705426,
      "loss": 0.0789,
      "step": 33300
    },
    {
      "epoch": 86.17571059431525,
      "grad_norm": 0.47817400097846985,
      "learning_rate": 0.00014093453919035315,
      "loss": 0.0747,
      "step": 33350
    },
    {
      "epoch": 86.30490956072352,
      "grad_norm": 0.46865278482437134,
      "learning_rate": 0.00014039621016365204,
      "loss": 0.0744,
      "step": 33400
    },
    {
      "epoch": 86.43410852713178,
      "grad_norm": 0.492794007062912,
      "learning_rate": 0.0001398578811369509,
      "loss": 0.0738,
      "step": 33450
    },
    {
      "epoch": 86.56330749354005,
      "grad_norm": 0.5230957269668579,
      "learning_rate": 0.0001393195521102498,
      "loss": 0.0759,
      "step": 33500
    },
    {
      "epoch": 86.69250645994832,
      "grad_norm": 0.4782662093639374,
      "learning_rate": 0.0001387812230835487,
      "loss": 0.0755,
      "step": 33550
    },
    {
      "epoch": 86.82170542635659,
      "grad_norm": 0.44037720561027527,
      "learning_rate": 0.00013824289405684753,
      "loss": 0.0772,
      "step": 33600
    },
    {
      "epoch": 86.95090439276485,
      "grad_norm": 0.5181645750999451,
      "learning_rate": 0.00013770456503014642,
      "loss": 0.0773,
      "step": 33650
    },
    {
      "epoch": 87.08010335917312,
      "grad_norm": 0.48176249861717224,
      "learning_rate": 0.0001371662360034453,
      "loss": 0.0736,
      "step": 33700
    },
    {
      "epoch": 87.20930232558139,
      "grad_norm": 0.4969804883003235,
      "learning_rate": 0.00013662790697674417,
      "loss": 0.0727,
      "step": 33750
    },
    {
      "epoch": 87.33850129198966,
      "grad_norm": 0.47709715366363525,
      "learning_rate": 0.00013608957795004307,
      "loss": 0.0721,
      "step": 33800
    },
    {
      "epoch": 87.46770025839793,
      "grad_norm": 0.4579997658729553,
      "learning_rate": 0.00013555124892334196,
      "loss": 0.0742,
      "step": 33850
    },
    {
      "epoch": 87.59689922480621,
      "grad_norm": 0.4676733613014221,
      "learning_rate": 0.00013501291989664082,
      "loss": 0.073,
      "step": 33900
    },
    {
      "epoch": 87.72609819121448,
      "grad_norm": 0.4743864834308624,
      "learning_rate": 0.00013447459086993971,
      "loss": 0.0735,
      "step": 33950
    },
    {
      "epoch": 87.85529715762274,
      "grad_norm": 0.4351957440376282,
      "learning_rate": 0.0001339362618432386,
      "loss": 0.0726,
      "step": 34000
    },
    {
      "epoch": 87.98449612403101,
      "grad_norm": 0.49457040429115295,
      "learning_rate": 0.00013339793281653747,
      "loss": 0.0759,
      "step": 34050
    },
    {
      "epoch": 88.11369509043928,
      "grad_norm": 0.48343417048454285,
      "learning_rate": 0.00013285960378983636,
      "loss": 0.0698,
      "step": 34100
    },
    {
      "epoch": 88.24289405684755,
      "grad_norm": 0.47031763195991516,
      "learning_rate": 0.00013232127476313525,
      "loss": 0.0706,
      "step": 34150
    },
    {
      "epoch": 88.37209302325581,
      "grad_norm": 0.4494394361972809,
      "learning_rate": 0.00013178294573643412,
      "loss": 0.0684,
      "step": 34200
    },
    {
      "epoch": 88.50129198966408,
      "grad_norm": 0.4400874078273773,
      "learning_rate": 0.00013124461670973298,
      "loss": 0.0692,
      "step": 34250
    },
    {
      "epoch": 88.63049095607235,
      "grad_norm": 0.507520854473114,
      "learning_rate": 0.00013070628768303185,
      "loss": 0.0699,
      "step": 34300
    },
    {
      "epoch": 88.75968992248062,
      "grad_norm": 0.46303391456604004,
      "learning_rate": 0.00013016795865633074,
      "loss": 0.0706,
      "step": 34350
    },
    {
      "epoch": 88.88888888888889,
      "grad_norm": 0.4445197880268097,
      "learning_rate": 0.00012962962962962963,
      "loss": 0.0702,
      "step": 34400
    },
    {
      "epoch": 89.01808785529715,
      "grad_norm": 0.48853182792663574,
      "learning_rate": 0.0001290913006029285,
      "loss": 0.0701,
      "step": 34450
    },
    {
      "epoch": 89.14728682170542,
      "grad_norm": 0.385803759098053,
      "learning_rate": 0.0001285529715762274,
      "loss": 0.0667,
      "step": 34500
    },
    {
      "epoch": 89.27648578811369,
      "grad_norm": 0.5106987953186035,
      "learning_rate": 0.00012801464254952628,
      "loss": 0.0646,
      "step": 34550
    },
    {
      "epoch": 89.40568475452196,
      "grad_norm": 0.46638891100883484,
      "learning_rate": 0.00012747631352282514,
      "loss": 0.0674,
      "step": 34600
    },
    {
      "epoch": 89.53488372093024,
      "grad_norm": 0.467160701751709,
      "learning_rate": 0.00012693798449612404,
      "loss": 0.0666,
      "step": 34650
    },
    {
      "epoch": 89.6640826873385,
      "grad_norm": 0.475965291261673,
      "learning_rate": 0.00012639965546942293,
      "loss": 0.0677,
      "step": 34700
    },
    {
      "epoch": 89.79328165374677,
      "grad_norm": 0.4474709928035736,
      "learning_rate": 0.0001258613264427218,
      "loss": 0.0679,
      "step": 34750
    },
    {
      "epoch": 89.92248062015504,
      "grad_norm": 0.45818397402763367,
      "learning_rate": 0.00012532299741602068,
      "loss": 0.0689,
      "step": 34800
    },
    {
      "epoch": 90.05167958656331,
      "grad_norm": 0.45599740743637085,
      "learning_rate": 0.00012478466838931955,
      "loss": 0.066,
      "step": 34850
    },
    {
      "epoch": 90.18087855297158,
      "grad_norm": 0.4563447833061218,
      "learning_rate": 0.00012424633936261844,
      "loss": 0.0627,
      "step": 34900
    },
    {
      "epoch": 90.31007751937985,
      "grad_norm": 0.4174925982952118,
      "learning_rate": 0.0001237080103359173,
      "loss": 0.0632,
      "step": 34950
    },
    {
      "epoch": 90.43927648578811,
      "grad_norm": 0.4518079459667206,
      "learning_rate": 0.0001231696813092162,
      "loss": 0.0659,
      "step": 35000
    },
    {
      "epoch": 90.56847545219638,
      "grad_norm": 0.4764799177646637,
      "learning_rate": 0.0001226313522825151,
      "loss": 0.065,
      "step": 35050
    },
    {
      "epoch": 90.69767441860465,
      "grad_norm": 0.4654959440231323,
      "learning_rate": 0.00012209302325581395,
      "loss": 0.066,
      "step": 35100
    },
    {
      "epoch": 90.82687338501292,
      "grad_norm": 0.44473323225975037,
      "learning_rate": 0.00012155469422911283,
      "loss": 0.0652,
      "step": 35150
    },
    {
      "epoch": 90.95607235142118,
      "grad_norm": 0.460421085357666,
      "learning_rate": 0.00012101636520241171,
      "loss": 0.0669,
      "step": 35200
    },
    {
      "epoch": 91.08527131782945,
      "grad_norm": 0.4466608166694641,
      "learning_rate": 0.0001204780361757106,
      "loss": 0.0628,
      "step": 35250
    },
    {
      "epoch": 91.21447028423772,
      "grad_norm": 0.4198400378227234,
      "learning_rate": 0.00011993970714900948,
      "loss": 0.0618,
      "step": 35300
    },
    {
      "epoch": 91.343669250646,
      "grad_norm": 0.4136331081390381,
      "learning_rate": 0.00011940137812230836,
      "loss": 0.0615,
      "step": 35350
    },
    {
      "epoch": 91.47286821705427,
      "grad_norm": 0.46128422021865845,
      "learning_rate": 0.00011886304909560724,
      "loss": 0.0615,
      "step": 35400
    },
    {
      "epoch": 91.60206718346254,
      "grad_norm": 0.44016239047050476,
      "learning_rate": 0.00011832472006890611,
      "loss": 0.0623,
      "step": 35450
    },
    {
      "epoch": 91.7312661498708,
      "grad_norm": 0.4262442886829376,
      "learning_rate": 0.00011778639104220499,
      "loss": 0.0632,
      "step": 35500
    },
    {
      "epoch": 91.86046511627907,
      "grad_norm": 0.4556371569633484,
      "learning_rate": 0.00011724806201550388,
      "loss": 0.0627,
      "step": 35550
    },
    {
      "epoch": 91.98966408268734,
      "grad_norm": 0.46493425965309143,
      "learning_rate": 0.00011670973298880276,
      "loss": 0.0628,
      "step": 35600
    },
    {
      "epoch": 92.11886304909561,
      "grad_norm": 0.4092924892902374,
      "learning_rate": 0.00011617140396210164,
      "loss": 0.0583,
      "step": 35650
    },
    {
      "epoch": 92.24806201550388,
      "grad_norm": 0.42103105783462524,
      "learning_rate": 0.00011563307493540053,
      "loss": 0.0582,
      "step": 35700
    },
    {
      "epoch": 92.37726098191214,
      "grad_norm": 0.43579620122909546,
      "learning_rate": 0.0001150947459086994,
      "loss": 0.0585,
      "step": 35750
    },
    {
      "epoch": 92.50645994832041,
      "grad_norm": 0.46419718861579895,
      "learning_rate": 0.00011455641688199827,
      "loss": 0.0589,
      "step": 35800
    },
    {
      "epoch": 92.63565891472868,
      "grad_norm": 0.44076141715049744,
      "learning_rate": 0.00011401808785529715,
      "loss": 0.0602,
      "step": 35850
    },
    {
      "epoch": 92.76485788113695,
      "grad_norm": 0.4496707022190094,
      "learning_rate": 0.00011347975882859604,
      "loss": 0.0605,
      "step": 35900
    },
    {
      "epoch": 92.89405684754522,
      "grad_norm": 0.4114878177642822,
      "learning_rate": 0.00011294142980189492,
      "loss": 0.0594,
      "step": 35950
    },
    {
      "epoch": 93.02325581395348,
      "grad_norm": 0.4541579484939575,
      "learning_rate": 0.0001124031007751938,
      "loss": 0.0594,
      "step": 36000
    },
    {
      "epoch": 93.15245478036175,
      "grad_norm": 0.4383090138435364,
      "learning_rate": 0.00011186477174849268,
      "loss": 0.0559,
      "step": 36050
    },
    {
      "epoch": 93.28165374677003,
      "grad_norm": 0.41675901412963867,
      "learning_rate": 0.00011132644272179156,
      "loss": 0.0573,
      "step": 36100
    },
    {
      "epoch": 93.4108527131783,
      "grad_norm": 0.4273838698863983,
      "learning_rate": 0.00011078811369509044,
      "loss": 0.0561,
      "step": 36150
    },
    {
      "epoch": 93.54005167958657,
      "grad_norm": 0.4871039390563965,
      "learning_rate": 0.00011024978466838933,
      "loss": 0.057,
      "step": 36200
    },
    {
      "epoch": 93.66925064599484,
      "grad_norm": 0.4159230589866638,
      "learning_rate": 0.0001097114556416882,
      "loss": 0.0559,
      "step": 36250
    },
    {
      "epoch": 93.7984496124031,
      "grad_norm": 0.4573890268802643,
      "learning_rate": 0.00010917312661498708,
      "loss": 0.0567,
      "step": 36300
    },
    {
      "epoch": 93.92764857881137,
      "grad_norm": 0.42454472184181213,
      "learning_rate": 0.00010863479758828598,
      "loss": 0.0573,
      "step": 36350
    },
    {
      "epoch": 94.05684754521964,
      "grad_norm": 0.4321294128894806,
      "learning_rate": 0.00010809646856158484,
      "loss": 0.0548,
      "step": 36400
    },
    {
      "epoch": 94.18604651162791,
      "grad_norm": 0.4019274413585663,
      "learning_rate": 0.00010755813953488372,
      "loss": 0.0542,
      "step": 36450
    },
    {
      "epoch": 94.31524547803618,
      "grad_norm": 0.4189925491809845,
      "learning_rate": 0.0001070198105081826,
      "loss": 0.0535,
      "step": 36500
    },
    {
      "epoch": 94.44444444444444,
      "grad_norm": 0.41363525390625,
      "learning_rate": 0.00010648148148148149,
      "loss": 0.0534,
      "step": 36550
    },
    {
      "epoch": 94.57364341085271,
      "grad_norm": 0.4355786144733429,
      "learning_rate": 0.00010594315245478037,
      "loss": 0.0543,
      "step": 36600
    },
    {
      "epoch": 94.70284237726098,
      "grad_norm": 0.4254317879676819,
      "learning_rate": 0.00010540482342807924,
      "loss": 0.055,
      "step": 36650
    },
    {
      "epoch": 94.83204134366925,
      "grad_norm": 0.41139477491378784,
      "learning_rate": 0.00010486649440137812,
      "loss": 0.0562,
      "step": 36700
    },
    {
      "epoch": 94.96124031007751,
      "grad_norm": 0.4678131937980652,
      "learning_rate": 0.000104328165374677,
      "loss": 0.0562,
      "step": 36750
    },
    {
      "epoch": 95.09043927648578,
      "grad_norm": 0.39462369680404663,
      "learning_rate": 0.00010378983634797588,
      "loss": 0.0531,
      "step": 36800
    },
    {
      "epoch": 95.21963824289406,
      "grad_norm": 0.38998299837112427,
      "learning_rate": 0.00010325150732127477,
      "loss": 0.0513,
      "step": 36850
    },
    {
      "epoch": 95.34883720930233,
      "grad_norm": 0.4036840796470642,
      "learning_rate": 0.00010271317829457365,
      "loss": 0.0504,
      "step": 36900
    },
    {
      "epoch": 95.4780361757106,
      "grad_norm": 0.3964759111404419,
      "learning_rate": 0.00010217484926787253,
      "loss": 0.0531,
      "step": 36950
    },
    {
      "epoch": 95.60723514211887,
      "grad_norm": 0.40481048822402954,
      "learning_rate": 0.00010163652024117142,
      "loss": 0.0519,
      "step": 37000
    },
    {
      "epoch": 95.73643410852713,
      "grad_norm": 0.4211597740650177,
      "learning_rate": 0.00010109819121447028,
      "loss": 0.0519,
      "step": 37050
    },
    {
      "epoch": 95.8656330749354,
      "grad_norm": 0.3958792984485626,
      "learning_rate": 0.00010055986218776916,
      "loss": 0.0529,
      "step": 37100
    },
    {
      "epoch": 95.99483204134367,
      "grad_norm": 0.3794058859348297,
      "learning_rate": 0.00010002153316106804,
      "loss": 0.0528,
      "step": 37150
    },
    {
      "epoch": 96.12403100775194,
      "grad_norm": 0.3733406662940979,
      "learning_rate": 9.948320413436693e-05,
      "loss": 0.0497,
      "step": 37200
    },
    {
      "epoch": 96.2532299741602,
      "grad_norm": 0.4121762216091156,
      "learning_rate": 9.894487510766581e-05,
      "loss": 0.0496,
      "step": 37250
    },
    {
      "epoch": 96.38242894056847,
      "grad_norm": 0.42456814646720886,
      "learning_rate": 9.840654608096469e-05,
      "loss": 0.0506,
      "step": 37300
    },
    {
      "epoch": 96.51162790697674,
      "grad_norm": 0.37236857414245605,
      "learning_rate": 9.786821705426357e-05,
      "loss": 0.0501,
      "step": 37350
    },
    {
      "epoch": 96.64082687338501,
      "grad_norm": 0.43841931223869324,
      "learning_rate": 9.732988802756244e-05,
      "loss": 0.0505,
      "step": 37400
    },
    {
      "epoch": 96.77002583979328,
      "grad_norm": 0.42934465408325195,
      "learning_rate": 9.679155900086132e-05,
      "loss": 0.051,
      "step": 37450
    },
    {
      "epoch": 96.89922480620154,
      "grad_norm": 0.42682304978370667,
      "learning_rate": 9.625322997416021e-05,
      "loss": 0.0507,
      "step": 37500
    },
    {
      "epoch": 97.02842377260981,
      "grad_norm": 0.40529173612594604,
      "learning_rate": 9.571490094745909e-05,
      "loss": 0.0516,
      "step": 37550
    },
    {
      "epoch": 97.1576227390181,
      "grad_norm": 0.37240245938301086,
      "learning_rate": 9.517657192075797e-05,
      "loss": 0.0486,
      "step": 37600
    },
    {
      "epoch": 97.28682170542636,
      "grad_norm": 0.3948003649711609,
      "learning_rate": 9.463824289405686e-05,
      "loss": 0.0477,
      "step": 37650
    },
    {
      "epoch": 97.41602067183463,
      "grad_norm": 0.37918588519096375,
      "learning_rate": 9.409991386735573e-05,
      "loss": 0.0467,
      "step": 37700
    },
    {
      "epoch": 97.5452196382429,
      "grad_norm": 0.3936639130115509,
      "learning_rate": 9.35615848406546e-05,
      "loss": 0.0482,
      "step": 37750
    },
    {
      "epoch": 97.67441860465117,
      "grad_norm": 0.4552816152572632,
      "learning_rate": 9.302325581395348e-05,
      "loss": 0.0476,
      "step": 37800
    },
    {
      "epoch": 97.80361757105943,
      "grad_norm": 0.39734596014022827,
      "learning_rate": 9.248492678725237e-05,
      "loss": 0.0478,
      "step": 37850
    },
    {
      "epoch": 97.9328165374677,
      "grad_norm": 0.400199830532074,
      "learning_rate": 9.194659776055125e-05,
      "loss": 0.0487,
      "step": 37900
    },
    {
      "epoch": 98.06201550387597,
      "grad_norm": 0.40986114740371704,
      "learning_rate": 9.140826873385013e-05,
      "loss": 0.0473,
      "step": 37950
    },
    {
      "epoch": 98.19121447028424,
      "grad_norm": 0.37425094842910767,
      "learning_rate": 9.086993970714901e-05,
      "loss": 0.0454,
      "step": 38000
    },
    {
      "epoch": 98.3204134366925,
      "grad_norm": 0.4002920985221863,
      "learning_rate": 9.033161068044789e-05,
      "loss": 0.045,
      "step": 38050
    },
    {
      "epoch": 98.44961240310077,
      "grad_norm": 0.37533891201019287,
      "learning_rate": 8.979328165374677e-05,
      "loss": 0.0464,
      "step": 38100
    },
    {
      "epoch": 98.57881136950904,
      "grad_norm": 0.36428573727607727,
      "learning_rate": 8.925495262704566e-05,
      "loss": 0.046,
      "step": 38150
    },
    {
      "epoch": 98.70801033591731,
      "grad_norm": 0.4033423662185669,
      "learning_rate": 8.871662360034454e-05,
      "loss": 0.0464,
      "step": 38200
    },
    {
      "epoch": 98.83720930232558,
      "grad_norm": 0.3599696755409241,
      "learning_rate": 8.817829457364341e-05,
      "loss": 0.0463,
      "step": 38250
    },
    {
      "epoch": 98.96640826873384,
      "grad_norm": 0.4143561124801636,
      "learning_rate": 8.76399655469423e-05,
      "loss": 0.0465,
      "step": 38300
    },
    {
      "epoch": 99.09560723514213,
      "grad_norm": 0.40714794397354126,
      "learning_rate": 8.710163652024117e-05,
      "loss": 0.0471,
      "step": 38350
    },
    {
      "epoch": 99.2248062015504,
      "grad_norm": 0.3889954388141632,
      "learning_rate": 8.656330749354005e-05,
      "loss": 0.0433,
      "step": 38400
    },
    {
      "epoch": 99.35400516795866,
      "grad_norm": 0.38456812500953674,
      "learning_rate": 8.602497846683893e-05,
      "loss": 0.0432,
      "step": 38450
    },
    {
      "epoch": 99.48320413436693,
      "grad_norm": 0.32937514781951904,
      "learning_rate": 8.548664944013782e-05,
      "loss": 0.043,
      "step": 38500
    },
    {
      "epoch": 99.6124031007752,
      "grad_norm": 0.3830249309539795,
      "learning_rate": 8.49483204134367e-05,
      "loss": 0.0435,
      "step": 38550
    },
    {
      "epoch": 99.74160206718346,
      "grad_norm": 0.37751129269599915,
      "learning_rate": 8.440999138673557e-05,
      "loss": 0.0443,
      "step": 38600
    },
    {
      "epoch": 99.87080103359173,
      "grad_norm": 0.38694775104522705,
      "learning_rate": 8.387166236003445e-05,
      "loss": 0.0433,
      "step": 38650
    },
    {
      "epoch": 100.0,
      "grad_norm": 0.6568818092346191,
      "learning_rate": 8.333333333333333e-05,
      "loss": 0.0442,
      "step": 38700
    },
    {
      "epoch": 100.12919896640827,
      "grad_norm": 0.3570460379123688,
      "learning_rate": 8.279500430663221e-05,
      "loss": 0.0419,
      "step": 38750
    },
    {
      "epoch": 100.25839793281654,
      "grad_norm": 0.41265904903411865,
      "learning_rate": 8.22566752799311e-05,
      "loss": 0.0422,
      "step": 38800
    },
    {
      "epoch": 100.3875968992248,
      "grad_norm": 0.3816283941268921,
      "learning_rate": 8.171834625322998e-05,
      "loss": 0.041,
      "step": 38850
    },
    {
      "epoch": 100.51679586563307,
      "grad_norm": 0.3651896119117737,
      "learning_rate": 8.118001722652886e-05,
      "loss": 0.042,
      "step": 38900
    },
    {
      "epoch": 100.64599483204134,
      "grad_norm": 0.3702296316623688,
      "learning_rate": 8.064168819982775e-05,
      "loss": 0.0428,
      "step": 38950
    },
    {
      "epoch": 100.7751937984496,
      "grad_norm": 0.3538137972354889,
      "learning_rate": 8.010335917312661e-05,
      "loss": 0.0425,
      "step": 39000
    },
    {
      "epoch": 100.90439276485787,
      "grad_norm": 0.397035151720047,
      "learning_rate": 7.956503014642549e-05,
      "loss": 0.0426,
      "step": 39050
    },
    {
      "epoch": 101.03359173126616,
      "grad_norm": 0.35197508335113525,
      "learning_rate": 7.902670111972437e-05,
      "loss": 0.0407,
      "step": 39100
    },
    {
      "epoch": 101.16279069767442,
      "grad_norm": 0.33107805252075195,
      "learning_rate": 7.848837209302326e-05,
      "loss": 0.0414,
      "step": 39150
    },
    {
      "epoch": 101.29198966408269,
      "grad_norm": 0.349092036485672,
      "learning_rate": 7.795004306632214e-05,
      "loss": 0.0398,
      "step": 39200
    },
    {
      "epoch": 101.42118863049096,
      "grad_norm": 0.34708458185195923,
      "learning_rate": 7.741171403962102e-05,
      "loss": 0.0397,
      "step": 39250
    },
    {
      "epoch": 101.55038759689923,
      "grad_norm": 0.37818095088005066,
      "learning_rate": 7.68733850129199e-05,
      "loss": 0.0401,
      "step": 39300
    },
    {
      "epoch": 101.6795865633075,
      "grad_norm": 0.36844754219055176,
      "learning_rate": 7.633505598621877e-05,
      "loss": 0.0406,
      "step": 39350
    },
    {
      "epoch": 101.80878552971576,
      "grad_norm": 0.3928978741168976,
      "learning_rate": 7.579672695951765e-05,
      "loss": 0.0409,
      "step": 39400
    },
    {
      "epoch": 101.93798449612403,
      "grad_norm": 0.3787928521633148,
      "learning_rate": 7.525839793281654e-05,
      "loss": 0.0408,
      "step": 39450
    },
    {
      "epoch": 102.0671834625323,
      "grad_norm": 0.3467255234718323,
      "learning_rate": 7.472006890611542e-05,
      "loss": 0.0389,
      "step": 39500
    },
    {
      "epoch": 102.19638242894057,
      "grad_norm": 0.35923025012016296,
      "learning_rate": 7.41817398794143e-05,
      "loss": 0.039,
      "step": 39550
    },
    {
      "epoch": 102.32558139534883,
      "grad_norm": 0.37049078941345215,
      "learning_rate": 7.364341085271319e-05,
      "loss": 0.0382,
      "step": 39600
    },
    {
      "epoch": 102.4547803617571,
      "grad_norm": 0.3471563458442688,
      "learning_rate": 7.310508182601206e-05,
      "loss": 0.0384,
      "step": 39650
    },
    {
      "epoch": 102.58397932816537,
      "grad_norm": 0.38930314779281616,
      "learning_rate": 7.256675279931093e-05,
      "loss": 0.0382,
      "step": 39700
    },
    {
      "epoch": 102.71317829457364,
      "grad_norm": 0.32015353441238403,
      "learning_rate": 7.202842377260981e-05,
      "loss": 0.0384,
      "step": 39750
    },
    {
      "epoch": 102.8423772609819,
      "grad_norm": 0.3972156047821045,
      "learning_rate": 7.14900947459087e-05,
      "loss": 0.0395,
      "step": 39800
    },
    {
      "epoch": 102.97157622739019,
      "grad_norm": 0.37609419226646423,
      "learning_rate": 7.095176571920758e-05,
      "loss": 0.0385,
      "step": 39850
    },
    {
      "epoch": 103.10077519379846,
      "grad_norm": 0.3322031497955322,
      "learning_rate": 7.041343669250646e-05,
      "loss": 0.0367,
      "step": 39900
    },
    {
      "epoch": 103.22997416020672,
      "grad_norm": 0.35171768069267273,
      "learning_rate": 6.987510766580535e-05,
      "loss": 0.0362,
      "step": 39950
    },
    {
      "epoch": 103.35917312661499,
      "grad_norm": 0.3546689748764038,
      "learning_rate": 6.933677863910422e-05,
      "loss": 0.0359,
      "step": 40000
    },
    {
      "epoch": 103.48837209302326,
      "grad_norm": 0.3415464162826538,
      "learning_rate": 6.87984496124031e-05,
      "loss": 0.0366,
      "step": 40050
    },
    {
      "epoch": 103.61757105943153,
      "grad_norm": 0.3414008617401123,
      "learning_rate": 6.826012058570199e-05,
      "loss": 0.0382,
      "step": 40100
    },
    {
      "epoch": 103.7467700258398,
      "grad_norm": 0.36913546919822693,
      "learning_rate": 6.772179155900087e-05,
      "loss": 0.0372,
      "step": 40150
    },
    {
      "epoch": 103.87596899224806,
      "grad_norm": 0.34218931198120117,
      "learning_rate": 6.718346253229974e-05,
      "loss": 0.0365,
      "step": 40200
    },
    {
      "epoch": 104.00516795865633,
      "grad_norm": 0.37227755784988403,
      "learning_rate": 6.664513350559864e-05,
      "loss": 0.0374,
      "step": 40250
    },
    {
      "epoch": 104.1343669250646,
      "grad_norm": 0.326456755399704,
      "learning_rate": 6.61068044788975e-05,
      "loss": 0.0341,
      "step": 40300
    },
    {
      "epoch": 104.26356589147287,
      "grad_norm": 0.3372190296649933,
      "learning_rate": 6.556847545219638e-05,
      "loss": 0.0352,
      "step": 40350
    },
    {
      "epoch": 104.39276485788113,
      "grad_norm": 0.31879281997680664,
      "learning_rate": 6.503014642549526e-05,
      "loss": 0.0353,
      "step": 40400
    },
    {
      "epoch": 104.5219638242894,
      "grad_norm": 0.36283543705940247,
      "learning_rate": 6.449181739879415e-05,
      "loss": 0.0352,
      "step": 40450
    },
    {
      "epoch": 104.65116279069767,
      "grad_norm": 0.32433897256851196,
      "learning_rate": 6.395348837209303e-05,
      "loss": 0.0348,
      "step": 40500
    },
    {
      "epoch": 104.78036175710594,
      "grad_norm": 0.3450929522514343,
      "learning_rate": 6.34151593453919e-05,
      "loss": 0.0356,
      "step": 40550
    },
    {
      "epoch": 104.90956072351422,
      "grad_norm": 0.31949424743652344,
      "learning_rate": 6.28768303186908e-05,
      "loss": 0.0354,
      "step": 40600
    },
    {
      "epoch": 105.03875968992249,
      "grad_norm": 0.3567245900630951,
      "learning_rate": 6.233850129198966e-05,
      "loss": 0.0347,
      "step": 40650
    },
    {
      "epoch": 105.16795865633075,
      "grad_norm": 0.3256223499774933,
      "learning_rate": 6.180017226528854e-05,
      "loss": 0.0336,
      "step": 40700
    },
    {
      "epoch": 105.29715762273902,
      "grad_norm": 0.34892016649246216,
      "learning_rate": 6.126184323858743e-05,
      "loss": 0.0328,
      "step": 40750
    },
    {
      "epoch": 105.42635658914729,
      "grad_norm": 0.33009088039398193,
      "learning_rate": 6.072351421188631e-05,
      "loss": 0.0335,
      "step": 40800
    },
    {
      "epoch": 105.55555555555556,
      "grad_norm": 0.3117457926273346,
      "learning_rate": 6.018518518518518e-05,
      "loss": 0.0341,
      "step": 40850
    },
    {
      "epoch": 105.68475452196382,
      "grad_norm": 0.3454241156578064,
      "learning_rate": 5.9646856158484065e-05,
      "loss": 0.0335,
      "step": 40900
    },
    {
      "epoch": 105.81395348837209,
      "grad_norm": 0.3349403142929077,
      "learning_rate": 5.910852713178295e-05,
      "loss": 0.0334,
      "step": 40950
    },
    {
      "epoch": 105.94315245478036,
      "grad_norm": 0.3261469602584839,
      "learning_rate": 5.857019810508183e-05,
      "loss": 0.0332,
      "step": 41000
    },
    {
      "epoch": 106.07235142118863,
      "grad_norm": 0.29498082399368286,
      "learning_rate": 5.8031869078380706e-05,
      "loss": 0.0329,
      "step": 41050
    },
    {
      "epoch": 106.2015503875969,
      "grad_norm": 0.3454033434391022,
      "learning_rate": 5.749354005167959e-05,
      "loss": 0.0319,
      "step": 41100
    },
    {
      "epoch": 106.33074935400516,
      "grad_norm": 0.36111462116241455,
      "learning_rate": 5.695521102497847e-05,
      "loss": 0.0321,
      "step": 41150
    },
    {
      "epoch": 106.45994832041343,
      "grad_norm": 0.3629954755306244,
      "learning_rate": 5.641688199827735e-05,
      "loss": 0.032,
      "step": 41200
    },
    {
      "epoch": 106.5891472868217,
      "grad_norm": 0.3075568974018097,
      "learning_rate": 5.5878552971576226e-05,
      "loss": 0.0326,
      "step": 41250
    },
    {
      "epoch": 106.71834625322997,
      "grad_norm": 0.3288273811340332,
      "learning_rate": 5.534022394487511e-05,
      "loss": 0.0319,
      "step": 41300
    },
    {
      "epoch": 106.84754521963825,
      "grad_norm": 0.33936843276023865,
      "learning_rate": 5.480189491817399e-05,
      "loss": 0.0327,
      "step": 41350
    },
    {
      "epoch": 106.97674418604652,
      "grad_norm": 0.32175374031066895,
      "learning_rate": 5.426356589147287e-05,
      "loss": 0.0328,
      "step": 41400
    },
    {
      "epoch": 107.10594315245478,
      "grad_norm": 0.35394299030303955,
      "learning_rate": 5.372523686477175e-05,
      "loss": 0.0309,
      "step": 41450
    },
    {
      "epoch": 107.23514211886305,
      "grad_norm": 0.3082609176635742,
      "learning_rate": 5.3186907838070624e-05,
      "loss": 0.0305,
      "step": 41500
    },
    {
      "epoch": 107.36434108527132,
      "grad_norm": 0.33303478360176086,
      "learning_rate": 5.264857881136951e-05,
      "loss": 0.0309,
      "step": 41550
    },
    {
      "epoch": 107.49354005167959,
      "grad_norm": 0.3374268412590027,
      "learning_rate": 5.2110249784668393e-05,
      "loss": 0.0308,
      "step": 41600
    },
    {
      "epoch": 107.62273901808786,
      "grad_norm": 0.3220424950122833,
      "learning_rate": 5.157192075796727e-05,
      "loss": 0.0308,
      "step": 41650
    },
    {
      "epoch": 107.75193798449612,
      "grad_norm": 0.33315759897232056,
      "learning_rate": 5.103359173126615e-05,
      "loss": 0.0306,
      "step": 41700
    },
    {
      "epoch": 107.88113695090439,
      "grad_norm": 0.3156038820743561,
      "learning_rate": 5.0495262704565035e-05,
      "loss": 0.0313,
      "step": 41750
    },
    {
      "epoch": 108.01033591731266,
      "grad_norm": 0.2947760820388794,
      "learning_rate": 4.995693367786391e-05,
      "loss": 0.0312,
      "step": 41800
    },
    {
      "epoch": 108.13953488372093,
      "grad_norm": 0.32752299308776855,
      "learning_rate": 4.941860465116279e-05,
      "loss": 0.029,
      "step": 41850
    },
    {
      "epoch": 108.2687338501292,
      "grad_norm": 0.32843589782714844,
      "learning_rate": 4.888027562446167e-05,
      "loss": 0.0301,
      "step": 41900
    },
    {
      "epoch": 108.39793281653746,
      "grad_norm": 0.30464568734169006,
      "learning_rate": 4.8341946597760554e-05,
      "loss": 0.0297,
      "step": 41950
    },
    {
      "epoch": 108.52713178294573,
      "grad_norm": 0.31845325231552124,
      "learning_rate": 4.780361757105943e-05,
      "loss": 0.0294,
      "step": 42000
    },
    {
      "epoch": 108.656330749354,
      "grad_norm": 0.2565822899341583,
      "learning_rate": 4.726528854435831e-05,
      "loss": 0.0286,
      "step": 42050
    },
    {
      "epoch": 108.78552971576228,
      "grad_norm": 0.300998330116272,
      "learning_rate": 4.6726959517657196e-05,
      "loss": 0.03,
      "step": 42100
    },
    {
      "epoch": 108.91472868217055,
      "grad_norm": 0.27671414613723755,
      "learning_rate": 4.6188630490956074e-05,
      "loss": 0.0293,
      "step": 42150
    },
    {
      "epoch": 109.04392764857882,
      "grad_norm": 0.2971770167350769,
      "learning_rate": 4.565030146425495e-05,
      "loss": 0.029,
      "step": 42200
    },
    {
      "epoch": 109.17312661498708,
      "grad_norm": 0.28473857045173645,
      "learning_rate": 4.511197243755384e-05,
      "loss": 0.0276,
      "step": 42250
    },
    {
      "epoch": 109.30232558139535,
      "grad_norm": 0.3101308345794678,
      "learning_rate": 4.4573643410852715e-05,
      "loss": 0.0275,
      "step": 42300
    },
    {
      "epoch": 109.43152454780362,
      "grad_norm": 0.28792232275009155,
      "learning_rate": 4.403531438415159e-05,
      "loss": 0.0286,
      "step": 42350
    },
    {
      "epoch": 109.56072351421189,
      "grad_norm": 0.28611868619918823,
      "learning_rate": 4.349698535745048e-05,
      "loss": 0.0284,
      "step": 42400
    },
    {
      "epoch": 109.68992248062015,
      "grad_norm": 0.2752722501754761,
      "learning_rate": 4.2958656330749356e-05,
      "loss": 0.0275,
      "step": 42450
    },
    {
      "epoch": 109.81912144702842,
      "grad_norm": 0.28532662987709045,
      "learning_rate": 4.2420327304048234e-05,
      "loss": 0.0283,
      "step": 42500
    },
    {
      "epoch": 109.94832041343669,
      "grad_norm": 0.30644506216049194,
      "learning_rate": 4.188199827734711e-05,
      "loss": 0.0281,
      "step": 42550
    },
    {
      "epoch": 110.07751937984496,
      "grad_norm": 0.2961358428001404,
      "learning_rate": 4.1343669250646e-05,
      "loss": 0.0277,
      "step": 42600
    },
    {
      "epoch": 110.20671834625323,
      "grad_norm": 0.3172013461589813,
      "learning_rate": 4.0805340223944876e-05,
      "loss": 0.0267,
      "step": 42650
    },
    {
      "epoch": 110.3359173126615,
      "grad_norm": 0.2817389965057373,
      "learning_rate": 4.0267011197243754e-05,
      "loss": 0.0273,
      "step": 42700
    },
    {
      "epoch": 110.46511627906976,
      "grad_norm": 0.2854345440864563,
      "learning_rate": 3.972868217054264e-05,
      "loss": 0.0272,
      "step": 42750
    },
    {
      "epoch": 110.59431524547804,
      "grad_norm": 0.2611036002635956,
      "learning_rate": 3.919035314384152e-05,
      "loss": 0.0273,
      "step": 42800
    },
    {
      "epoch": 110.72351421188631,
      "grad_norm": 0.31307440996170044,
      "learning_rate": 3.8652024117140395e-05,
      "loss": 0.0274,
      "step": 42850
    },
    {
      "epoch": 110.85271317829458,
      "grad_norm": 0.2854072153568268,
      "learning_rate": 3.811369509043928e-05,
      "loss": 0.0267,
      "step": 42900
    },
    {
      "epoch": 110.98191214470285,
      "grad_norm": 0.30634745955467224,
      "learning_rate": 3.757536606373816e-05,
      "loss": 0.027,
      "step": 42950
    },
    {
      "epoch": 111.11111111111111,
      "grad_norm": 0.2771252989768982,
      "learning_rate": 3.7037037037037037e-05,
      "loss": 0.0254,
      "step": 43000
    },
    {
      "epoch": 111.24031007751938,
      "grad_norm": 0.2962842583656311,
      "learning_rate": 3.649870801033592e-05,
      "loss": 0.0252,
      "step": 43050
    },
    {
      "epoch": 111.36950904392765,
      "grad_norm": 0.31489765644073486,
      "learning_rate": 3.59603789836348e-05,
      "loss": 0.0266,
      "step": 43100
    },
    {
      "epoch": 111.49870801033592,
      "grad_norm": 0.29601141810417175,
      "learning_rate": 3.542204995693368e-05,
      "loss": 0.0259,
      "step": 43150
    },
    {
      "epoch": 111.62790697674419,
      "grad_norm": 0.28086596727371216,
      "learning_rate": 3.4883720930232556e-05,
      "loss": 0.0259,
      "step": 43200
    },
    {
      "epoch": 111.75710594315245,
      "grad_norm": 0.2749045491218567,
      "learning_rate": 3.434539190353144e-05,
      "loss": 0.0257,
      "step": 43250
    },
    {
      "epoch": 111.88630490956072,
      "grad_norm": 0.2901517450809479,
      "learning_rate": 3.380706287683032e-05,
      "loss": 0.0265,
      "step": 43300
    },
    {
      "epoch": 112.01550387596899,
      "grad_norm": 0.29971635341644287,
      "learning_rate": 3.32687338501292e-05,
      "loss": 0.0253,
      "step": 43350
    },
    {
      "epoch": 112.14470284237726,
      "grad_norm": 0.24688522517681122,
      "learning_rate": 3.273040482342808e-05,
      "loss": 0.0246,
      "step": 43400
    },
    {
      "epoch": 112.27390180878552,
      "grad_norm": 0.2922942042350769,
      "learning_rate": 3.219207579672696e-05,
      "loss": 0.0253,
      "step": 43450
    },
    {
      "epoch": 112.40310077519379,
      "grad_norm": 0.293390154838562,
      "learning_rate": 3.165374677002584e-05,
      "loss": 0.0247,
      "step": 43500
    },
    {
      "epoch": 112.53229974160207,
      "grad_norm": 0.2890658676624298,
      "learning_rate": 3.1115417743324724e-05,
      "loss": 0.0248,
      "step": 43550
    },
    {
      "epoch": 112.66149870801034,
      "grad_norm": 0.2771155536174774,
      "learning_rate": 3.05770887166236e-05,
      "loss": 0.0248,
      "step": 43600
    },
    {
      "epoch": 112.79069767441861,
      "grad_norm": 0.26423656940460205,
      "learning_rate": 3.0038759689922483e-05,
      "loss": 0.0249,
      "step": 43650
    },
    {
      "epoch": 112.91989664082688,
      "grad_norm": 0.27324965596199036,
      "learning_rate": 2.950043066322136e-05,
      "loss": 0.0254,
      "step": 43700
    },
    {
      "epoch": 113.04909560723515,
      "grad_norm": 0.31072843074798584,
      "learning_rate": 2.896210163652024e-05,
      "loss": 0.0249,
      "step": 43750
    },
    {
      "epoch": 113.17829457364341,
      "grad_norm": 0.2592846751213074,
      "learning_rate": 2.842377260981912e-05,
      "loss": 0.0242,
      "step": 43800
    },
    {
      "epoch": 113.30749354005168,
      "grad_norm": 0.2808310389518738,
      "learning_rate": 2.7885443583118003e-05,
      "loss": 0.0239,
      "step": 43850
    },
    {
      "epoch": 113.43669250645995,
      "grad_norm": 0.2779451012611389,
      "learning_rate": 2.7347114556416884e-05,
      "loss": 0.0242,
      "step": 43900
    },
    {
      "epoch": 113.56589147286822,
      "grad_norm": 0.25948548316955566,
      "learning_rate": 2.6808785529715763e-05,
      "loss": 0.0241,
      "step": 43950
    },
    {
      "epoch": 113.69509043927648,
      "grad_norm": 0.2576157748699188,
      "learning_rate": 2.627045650301464e-05,
      "loss": 0.0241,
      "step": 44000
    },
    {
      "epoch": 113.82428940568475,
      "grad_norm": 0.27603548765182495,
      "learning_rate": 2.5732127476313526e-05,
      "loss": 0.0242,
      "step": 44050
    },
    {
      "epoch": 113.95348837209302,
      "grad_norm": 0.2617557942867279,
      "learning_rate": 2.5193798449612404e-05,
      "loss": 0.0237,
      "step": 44100
    },
    {
      "epoch": 114.08268733850129,
      "grad_norm": 0.2749338448047638,
      "learning_rate": 2.4655469422911282e-05,
      "loss": 0.0235,
      "step": 44150
    },
    {
      "epoch": 114.21188630490956,
      "grad_norm": 0.2552667558193207,
      "learning_rate": 2.4117140396210164e-05,
      "loss": 0.023,
      "step": 44200
    },
    {
      "epoch": 114.34108527131782,
      "grad_norm": 0.2878882884979248,
      "learning_rate": 2.3578811369509045e-05,
      "loss": 0.0228,
      "step": 44250
    },
    {
      "epoch": 114.4702842377261,
      "grad_norm": 0.24316275119781494,
      "learning_rate": 2.3040482342807927e-05,
      "loss": 0.0236,
      "step": 44300
    },
    {
      "epoch": 114.59948320413437,
      "grad_norm": 0.24825261533260345,
      "learning_rate": 2.2502153316106805e-05,
      "loss": 0.0226,
      "step": 44350
    },
    {
      "epoch": 114.72868217054264,
      "grad_norm": 0.2578867971897125,
      "learning_rate": 2.1963824289405683e-05,
      "loss": 0.0233,
      "step": 44400
    },
    {
      "epoch": 114.85788113695091,
      "grad_norm": 0.26079779863357544,
      "learning_rate": 2.1425495262704565e-05,
      "loss": 0.0227,
      "step": 44450
    },
    {
      "epoch": 114.98708010335918,
      "grad_norm": 0.312183678150177,
      "learning_rate": 2.0887166236003446e-05,
      "loss": 0.0233,
      "step": 44500
    },
    {
      "epoch": 115.11627906976744,
      "grad_norm": 0.25289422273635864,
      "learning_rate": 2.0348837209302328e-05,
      "loss": 0.0225,
      "step": 44550
    },
    {
      "epoch": 115.24547803617571,
      "grad_norm": 0.2389523983001709,
      "learning_rate": 1.9810508182601206e-05,
      "loss": 0.0221,
      "step": 44600
    },
    {
      "epoch": 115.37467700258398,
      "grad_norm": 0.2758323848247528,
      "learning_rate": 1.9272179155900084e-05,
      "loss": 0.0225,
      "step": 44650
    },
    {
      "epoch": 115.50387596899225,
      "grad_norm": 0.24797233939170837,
      "learning_rate": 1.873385012919897e-05,
      "loss": 0.0223,
      "step": 44700
    },
    {
      "epoch": 115.63307493540051,
      "grad_norm": 0.29036766290664673,
      "learning_rate": 1.8195521102497847e-05,
      "loss": 0.0228,
      "step": 44750
    },
    {
      "epoch": 115.76227390180878,
      "grad_norm": 0.25313904881477356,
      "learning_rate": 1.765719207579673e-05,
      "loss": 0.0219,
      "step": 44800
    },
    {
      "epoch": 115.89147286821705,
      "grad_norm": 0.27569347620010376,
      "learning_rate": 1.7118863049095607e-05,
      "loss": 0.0223,
      "step": 44850
    },
    {
      "epoch": 116.02067183462532,
      "grad_norm": 0.22613263130187988,
      "learning_rate": 1.658053402239449e-05,
      "loss": 0.0221,
      "step": 44900
    },
    {
      "epoch": 116.14987080103359,
      "grad_norm": 0.2258797585964203,
      "learning_rate": 1.604220499569337e-05,
      "loss": 0.0218,
      "step": 44950
    },
    {
      "epoch": 116.27906976744185,
      "grad_norm": 0.25496187806129456,
      "learning_rate": 1.5503875968992248e-05,
      "loss": 0.0212,
      "step": 45000
    },
    {
      "epoch": 116.40826873385014,
      "grad_norm": 0.31300124526023865,
      "learning_rate": 1.4965546942291128e-05,
      "loss": 0.021,
      "step": 45050
    },
    {
      "epoch": 116.5374677002584,
      "grad_norm": 0.24364866316318512,
      "learning_rate": 1.442721791559001e-05,
      "loss": 0.0212,
      "step": 45100
    },
    {
      "epoch": 116.66666666666667,
      "grad_norm": 0.2792830169200897,
      "learning_rate": 1.3888888888888888e-05,
      "loss": 0.0218,
      "step": 45150
    },
    {
      "epoch": 116.79586563307494,
      "grad_norm": 0.28561463952064514,
      "learning_rate": 1.335055986218777e-05,
      "loss": 0.0215,
      "step": 45200
    },
    {
      "epoch": 116.9250645994832,
      "grad_norm": 0.2517456114292145,
      "learning_rate": 1.281223083548665e-05,
      "loss": 0.0209,
      "step": 45250
    },
    {
      "epoch": 117.05426356589147,
      "grad_norm": 0.2366306334733963,
      "learning_rate": 1.2273901808785531e-05,
      "loss": 0.0214,
      "step": 45300
    },
    {
      "epoch": 117.18346253229974,
      "grad_norm": 0.2552024722099304,
      "learning_rate": 1.173557278208441e-05,
      "loss": 0.0213,
      "step": 45350
    },
    {
      "epoch": 117.31266149870801,
      "grad_norm": 0.2651830017566681,
      "learning_rate": 1.119724375538329e-05,
      "loss": 0.0212,
      "step": 45400
    },
    {
      "epoch": 117.44186046511628,
      "grad_norm": 0.24631120264530182,
      "learning_rate": 1.065891472868217e-05,
      "loss": 0.0204,
      "step": 45450
    },
    {
      "epoch": 117.57105943152455,
      "grad_norm": 0.26830899715423584,
      "learning_rate": 1.0120585701981052e-05,
      "loss": 0.0206,
      "step": 45500
    },
    {
      "epoch": 117.70025839793281,
      "grad_norm": 0.2557155191898346,
      "learning_rate": 9.582256675279932e-06,
      "loss": 0.0209,
      "step": 45550
    },
    {
      "epoch": 117.82945736434108,
      "grad_norm": 0.24410898983478546,
      "learning_rate": 9.04392764857881e-06,
      "loss": 0.0209,
      "step": 45600
    },
    {
      "epoch": 117.95865633074935,
      "grad_norm": 0.2705914080142975,
      "learning_rate": 8.505598621877692e-06,
      "loss": 0.0211,
      "step": 45650
    },
    {
      "epoch": 118.08785529715762,
      "grad_norm": 0.23315739631652832,
      "learning_rate": 7.967269595176572e-06,
      "loss": 0.0201,
      "step": 45700
    },
    {
      "epoch": 118.21705426356588,
      "grad_norm": 0.2561619281768799,
      "learning_rate": 7.428940568475452e-06,
      "loss": 0.02,
      "step": 45750
    },
    {
      "epoch": 118.34625322997417,
      "grad_norm": 0.2205018848180771,
      "learning_rate": 6.890611541774333e-06,
      "loss": 0.0206,
      "step": 45800
    },
    {
      "epoch": 118.47545219638243,
      "grad_norm": 0.27165380120277405,
      "learning_rate": 6.352282515073214e-06,
      "loss": 0.0201,
      "step": 45850
    },
    {
      "epoch": 118.6046511627907,
      "grad_norm": 0.2685433328151703,
      "learning_rate": 5.813953488372093e-06,
      "loss": 0.0206,
      "step": 45900
    },
    {
      "epoch": 118.73385012919897,
      "grad_norm": 0.2667860984802246,
      "learning_rate": 5.275624461670973e-06,
      "loss": 0.0202,
      "step": 45950
    },
    {
      "epoch": 118.86304909560724,
      "grad_norm": 0.24058909714221954,
      "learning_rate": 4.737295434969853e-06,
      "loss": 0.0199,
      "step": 46000
    },
    {
      "epoch": 118.9922480620155,
      "grad_norm": 0.25530824065208435,
      "learning_rate": 4.198966408268734e-06,
      "loss": 0.0196,
      "step": 46050
    },
    {
      "epoch": 119.12144702842377,
      "grad_norm": 0.24846842885017395,
      "learning_rate": 3.6606373815676143e-06,
      "loss": 0.02,
      "step": 46100
    },
    {
      "epoch": 119.25064599483204,
      "grad_norm": 0.21992355585098267,
      "learning_rate": 3.1223083548664946e-06,
      "loss": 0.0201,
      "step": 46150
    },
    {
      "epoch": 119.37984496124031,
      "grad_norm": 0.2541758418083191,
      "learning_rate": 2.583979328165375e-06,
      "loss": 0.0197,
      "step": 46200
    },
    {
      "epoch": 119.50904392764858,
      "grad_norm": 0.22993811964988708,
      "learning_rate": 2.045650301464255e-06,
      "loss": 0.0198,
      "step": 46250
    },
    {
      "epoch": 119.63824289405684,
      "grad_norm": 0.2497793585062027,
      "learning_rate": 1.5073212747631352e-06,
      "loss": 0.0195,
      "step": 46300
    },
    {
      "epoch": 119.76744186046511,
      "grad_norm": 0.23531974852085114,
      "learning_rate": 9.689922480620155e-07,
      "loss": 0.0198,
      "step": 46350
    },
    {
      "epoch": 119.89664082687338,
      "grad_norm": 0.25819331407546997,
      "learning_rate": 4.306632213608958e-07,
      "loss": 0.0203,
      "step": 46400
    },
    {
      "epoch": 120.0,
      "step": 46440,
      "total_flos": 1.4536198324224e+17,
      "train_loss": 1.0919239408427226,
      "train_runtime": 17989.9631,
      "train_samples_per_second": 15.462,
      "train_steps_per_second": 2.581
    }
  ],
  "logging_steps": 50,
  "max_steps": 46440,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 120,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.4536198324224e+17,
  "train_batch_size": 6,
  "trial_name": null,
  "trial_params": null
}
