{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.9961389961389964,
  "eval_steps": 500,
  "global_step": 873,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.003432003432003432,
      "grad_norm": 5.184005260467529,
      "learning_rate": 3.7037037037037037e-06,
      "loss": 3.0339,
      "step": 1
    },
    {
      "epoch": 0.006864006864006864,
      "grad_norm": 5.359204292297363,
      "learning_rate": 7.4074074074074075e-06,
      "loss": 3.0694,
      "step": 2
    },
    {
      "epoch": 0.010296010296010296,
      "grad_norm": 5.702766418457031,
      "learning_rate": 1.1111111111111112e-05,
      "loss": 3.0138,
      "step": 3
    },
    {
      "epoch": 0.013728013728013728,
      "grad_norm": 7.7562665939331055,
      "learning_rate": 1.4814814814814815e-05,
      "loss": 3.0998,
      "step": 4
    },
    {
      "epoch": 0.01716001716001716,
      "grad_norm": 26.37129783630371,
      "learning_rate": 1.8518518518518518e-05,
      "loss": 3.1265,
      "step": 5
    },
    {
      "epoch": 0.02059202059202059,
      "grad_norm": 5.379796504974365,
      "learning_rate": 2.2222222222222223e-05,
      "loss": 2.8296,
      "step": 6
    },
    {
      "epoch": 0.024024024024024024,
      "grad_norm": 4.507719993591309,
      "learning_rate": 2.5925925925925925e-05,
      "loss": 3.0961,
      "step": 7
    },
    {
      "epoch": 0.027456027456027456,
      "grad_norm": 3.67179536819458,
      "learning_rate": 2.962962962962963e-05,
      "loss": 2.5984,
      "step": 8
    },
    {
      "epoch": 0.03088803088803089,
      "grad_norm": 3.1504886150360107,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 2.3912,
      "step": 9
    },
    {
      "epoch": 0.03432003432003432,
      "grad_norm": 3.434729814529419,
      "learning_rate": 3.7037037037037037e-05,
      "loss": 2.5704,
      "step": 10
    },
    {
      "epoch": 0.037752037752037754,
      "grad_norm": 3.0129332542419434,
      "learning_rate": 4.074074074074074e-05,
      "loss": 2.1721,
      "step": 11
    },
    {
      "epoch": 0.04118404118404118,
      "grad_norm": 3.3500425815582275,
      "learning_rate": 4.4444444444444447e-05,
      "loss": 2.2606,
      "step": 12
    },
    {
      "epoch": 0.04461604461604462,
      "grad_norm": 5.141391277313232,
      "learning_rate": 4.814814814814815e-05,
      "loss": 2.3098,
      "step": 13
    },
    {
      "epoch": 0.04804804804804805,
      "grad_norm": 3.4497439861297607,
      "learning_rate": 5.185185185185185e-05,
      "loss": 2.3478,
      "step": 14
    },
    {
      "epoch": 0.05148005148005148,
      "grad_norm": 2.9150047302246094,
      "learning_rate": 5.555555555555556e-05,
      "loss": 2.35,
      "step": 15
    },
    {
      "epoch": 0.05491205491205491,
      "grad_norm": 2.9752209186553955,
      "learning_rate": 5.925925925925926e-05,
      "loss": 2.0891,
      "step": 16
    },
    {
      "epoch": 0.05834405834405834,
      "grad_norm": 3.183112382888794,
      "learning_rate": 6.296296296296296e-05,
      "loss": 2.1868,
      "step": 17
    },
    {
      "epoch": 0.06177606177606178,
      "grad_norm": 2.7361626625061035,
      "learning_rate": 6.666666666666667e-05,
      "loss": 1.8375,
      "step": 18
    },
    {
      "epoch": 0.06520806520806521,
      "grad_norm": 3.534162998199463,
      "learning_rate": 7.037037037037038e-05,
      "loss": 1.8235,
      "step": 19
    },
    {
      "epoch": 0.06864006864006864,
      "grad_norm": 3.5269033908843994,
      "learning_rate": 7.407407407407407e-05,
      "loss": 2.1037,
      "step": 20
    },
    {
      "epoch": 0.07207207207207207,
      "grad_norm": 2.9290904998779297,
      "learning_rate": 7.777777777777778e-05,
      "loss": 2.1099,
      "step": 21
    },
    {
      "epoch": 0.07550407550407551,
      "grad_norm": 3.102985382080078,
      "learning_rate": 8.148148148148148e-05,
      "loss": 2.1633,
      "step": 22
    },
    {
      "epoch": 0.07893607893607893,
      "grad_norm": 4.01931095123291,
      "learning_rate": 8.518518518518518e-05,
      "loss": 1.9983,
      "step": 23
    },
    {
      "epoch": 0.08236808236808237,
      "grad_norm": 3.163924217224121,
      "learning_rate": 8.888888888888889e-05,
      "loss": 2.1446,
      "step": 24
    },
    {
      "epoch": 0.0858000858000858,
      "grad_norm": 3.011898994445801,
      "learning_rate": 9.25925925925926e-05,
      "loss": 2.0235,
      "step": 25
    },
    {
      "epoch": 0.08923208923208924,
      "grad_norm": 2.3570194244384766,
      "learning_rate": 9.62962962962963e-05,
      "loss": 2.0706,
      "step": 26
    },
    {
      "epoch": 0.09266409266409266,
      "grad_norm": 2.7415566444396973,
      "learning_rate": 0.0001,
      "loss": 2.0419,
      "step": 27
    },
    {
      "epoch": 0.0960960960960961,
      "grad_norm": 2.199080228805542,
      "learning_rate": 9.999965525456118e-05,
      "loss": 1.9461,
      "step": 28
    },
    {
      "epoch": 0.09952809952809953,
      "grad_norm": 3.0650007724761963,
      "learning_rate": 9.999862102299873e-05,
      "loss": 1.8553,
      "step": 29
    },
    {
      "epoch": 0.10296010296010295,
      "grad_norm": 2.48889422416687,
      "learning_rate": 9.999689731957449e-05,
      "loss": 1.8751,
      "step": 30
    },
    {
      "epoch": 0.10639210639210639,
      "grad_norm": 3.5070440769195557,
      "learning_rate": 9.999448416805801e-05,
      "loss": 1.7736,
      "step": 31
    },
    {
      "epoch": 0.10982410982410983,
      "grad_norm": 3.273362636566162,
      "learning_rate": 9.999138160172624e-05,
      "loss": 1.9243,
      "step": 32
    },
    {
      "epoch": 0.11325611325611326,
      "grad_norm": 2.8825366497039795,
      "learning_rate": 9.998758966336295e-05,
      "loss": 1.6635,
      "step": 33
    },
    {
      "epoch": 0.11668811668811668,
      "grad_norm": 2.4181392192840576,
      "learning_rate": 9.998310840525834e-05,
      "loss": 1.9687,
      "step": 34
    },
    {
      "epoch": 0.12012012012012012,
      "grad_norm": 2.4263453483581543,
      "learning_rate": 9.99779378892081e-05,
      "loss": 1.9676,
      "step": 35
    },
    {
      "epoch": 0.12355212355212356,
      "grad_norm": 4.394459247589111,
      "learning_rate": 9.997207818651274e-05,
      "loss": 1.9818,
      "step": 36
    },
    {
      "epoch": 0.12698412698412698,
      "grad_norm": 2.0204310417175293,
      "learning_rate": 9.996552937797645e-05,
      "loss": 1.9847,
      "step": 37
    },
    {
      "epoch": 0.13041613041613043,
      "grad_norm": 2.9429335594177246,
      "learning_rate": 9.995829155390612e-05,
      "loss": 2.0861,
      "step": 38
    },
    {
      "epoch": 0.13384813384813385,
      "grad_norm": 2.190267324447632,
      "learning_rate": 9.995036481411004e-05,
      "loss": 1.9002,
      "step": 39
    },
    {
      "epoch": 0.13728013728013727,
      "grad_norm": 1.987072229385376,
      "learning_rate": 9.994174926789648e-05,
      "loss": 1.8249,
      "step": 40
    },
    {
      "epoch": 0.14071214071214072,
      "grad_norm": 1.8482897281646729,
      "learning_rate": 9.993244503407226e-05,
      "loss": 1.9031,
      "step": 41
    },
    {
      "epoch": 0.14414414414414414,
      "grad_norm": 3.756895065307617,
      "learning_rate": 9.992245224094109e-05,
      "loss": 1.9235,
      "step": 42
    },
    {
      "epoch": 0.14757614757614756,
      "grad_norm": 2.6553876399993896,
      "learning_rate": 9.991177102630174e-05,
      "loss": 2.05,
      "step": 43
    },
    {
      "epoch": 0.15100815100815101,
      "grad_norm": 2.1707394123077393,
      "learning_rate": 9.99004015374462e-05,
      "loss": 2.011,
      "step": 44
    },
    {
      "epoch": 0.15444015444015444,
      "grad_norm": 2.711578130722046,
      "learning_rate": 9.988834393115767e-05,
      "loss": 1.9139,
      "step": 45
    },
    {
      "epoch": 0.15787215787215786,
      "grad_norm": 3.846721649169922,
      "learning_rate": 9.987559837370832e-05,
      "loss": 1.9054,
      "step": 46
    },
    {
      "epoch": 0.1613041613041613,
      "grad_norm": 5.9825119972229,
      "learning_rate": 9.986216504085708e-05,
      "loss": 2.1934,
      "step": 47
    },
    {
      "epoch": 0.16473616473616473,
      "grad_norm": 2.6636195182800293,
      "learning_rate": 9.984804411784716e-05,
      "loss": 1.8764,
      "step": 48
    },
    {
      "epoch": 0.16816816816816818,
      "grad_norm": 2.8059933185577393,
      "learning_rate": 9.983323579940351e-05,
      "loss": 1.6272,
      "step": 49
    },
    {
      "epoch": 0.1716001716001716,
      "grad_norm": 2.020521640777588,
      "learning_rate": 9.981774028973012e-05,
      "loss": 1.8913,
      "step": 50
    },
    {
      "epoch": 0.17503217503217502,
      "grad_norm": 2.5621821880340576,
      "learning_rate": 9.980155780250727e-05,
      "loss": 1.8054,
      "step": 51
    },
    {
      "epoch": 0.17846417846417847,
      "grad_norm": 2.6888129711151123,
      "learning_rate": 9.97846885608885e-05,
      "loss": 1.9112,
      "step": 52
    },
    {
      "epoch": 0.1818961818961819,
      "grad_norm": 2.0055980682373047,
      "learning_rate": 9.976713279749755e-05,
      "loss": 1.6869,
      "step": 53
    },
    {
      "epoch": 0.18532818532818532,
      "grad_norm": 4.626190662384033,
      "learning_rate": 9.974889075442521e-05,
      "loss": 1.7754,
      "step": 54
    },
    {
      "epoch": 0.18876018876018877,
      "grad_norm": 2.160983085632324,
      "learning_rate": 9.972996268322594e-05,
      "loss": 1.9216,
      "step": 55
    },
    {
      "epoch": 0.1921921921921922,
      "grad_norm": 2.3721227645874023,
      "learning_rate": 9.971034884491436e-05,
      "loss": 1.6964,
      "step": 56
    },
    {
      "epoch": 0.1956241956241956,
      "grad_norm": 2.282869577407837,
      "learning_rate": 9.969004950996175e-05,
      "loss": 1.9376,
      "step": 57
    },
    {
      "epoch": 0.19905619905619906,
      "grad_norm": 2.425886631011963,
      "learning_rate": 9.96690649582922e-05,
      "loss": 1.7808,
      "step": 58
    },
    {
      "epoch": 0.20248820248820248,
      "grad_norm": 2.961613655090332,
      "learning_rate": 9.96473954792789e-05,
      "loss": 1.98,
      "step": 59
    },
    {
      "epoch": 0.2059202059202059,
      "grad_norm": 3.089876174926758,
      "learning_rate": 9.962504137173997e-05,
      "loss": 1.9192,
      "step": 60
    },
    {
      "epoch": 0.20935220935220936,
      "grad_norm": 3.2221014499664307,
      "learning_rate": 9.96020029439345e-05,
      "loss": 1.9114,
      "step": 61
    },
    {
      "epoch": 0.21278421278421278,
      "grad_norm": 2.7439064979553223,
      "learning_rate": 9.957828051355817e-05,
      "loss": 2.0451,
      "step": 62
    },
    {
      "epoch": 0.21621621621621623,
      "grad_norm": 2.3849947452545166,
      "learning_rate": 9.9553874407739e-05,
      "loss": 2.074,
      "step": 63
    },
    {
      "epoch": 0.21964821964821965,
      "grad_norm": 2.4099457263946533,
      "learning_rate": 9.952878496303274e-05,
      "loss": 1.6153,
      "step": 64
    },
    {
      "epoch": 0.22308022308022307,
      "grad_norm": 2.1160531044006348,
      "learning_rate": 9.950301252541824e-05,
      "loss": 1.7211,
      "step": 65
    },
    {
      "epoch": 0.22651222651222652,
      "grad_norm": 2.4706003665924072,
      "learning_rate": 9.947655745029271e-05,
      "loss": 2.209,
      "step": 66
    },
    {
      "epoch": 0.22994422994422994,
      "grad_norm": 2.7614855766296387,
      "learning_rate": 9.94494201024668e-05,
      "loss": 1.9789,
      "step": 67
    },
    {
      "epoch": 0.23337623337623337,
      "grad_norm": 2.266435146331787,
      "learning_rate": 9.942160085615962e-05,
      "loss": 1.7431,
      "step": 68
    },
    {
      "epoch": 0.23680823680823682,
      "grad_norm": 2.1997110843658447,
      "learning_rate": 9.939310009499348e-05,
      "loss": 1.7625,
      "step": 69
    },
    {
      "epoch": 0.24024024024024024,
      "grad_norm": 2.339843511581421,
      "learning_rate": 9.936391821198867e-05,
      "loss": 2.098,
      "step": 70
    },
    {
      "epoch": 0.24367224367224366,
      "grad_norm": 2.2072062492370605,
      "learning_rate": 9.933405560955804e-05,
      "loss": 1.7918,
      "step": 71
    },
    {
      "epoch": 0.2471042471042471,
      "grad_norm": 2.847227096557617,
      "learning_rate": 9.930351269950143e-05,
      "loss": 1.7764,
      "step": 72
    },
    {
      "epoch": 0.25053625053625056,
      "grad_norm": 2.221370220184326,
      "learning_rate": 9.9272289903e-05,
      "loss": 1.8666,
      "step": 73
    },
    {
      "epoch": 0.25396825396825395,
      "grad_norm": 2.041595697402954,
      "learning_rate": 9.924038765061042e-05,
      "loss": 1.8921,
      "step": 74
    },
    {
      "epoch": 0.2574002574002574,
      "grad_norm": 3.089862823486328,
      "learning_rate": 9.92078063822589e-05,
      "loss": 1.8018,
      "step": 75
    },
    {
      "epoch": 0.26083226083226085,
      "grad_norm": 2.2386691570281982,
      "learning_rate": 9.917454654723523e-05,
      "loss": 1.7405,
      "step": 76
    },
    {
      "epoch": 0.26426426426426425,
      "grad_norm": 2.6134860515594482,
      "learning_rate": 9.914060860418643e-05,
      "loss": 1.9033,
      "step": 77
    },
    {
      "epoch": 0.2676962676962677,
      "grad_norm": 2.118520498275757,
      "learning_rate": 9.910599302111057e-05,
      "loss": 1.8391,
      "step": 78
    },
    {
      "epoch": 0.27112827112827115,
      "grad_norm": 2.1773688793182373,
      "learning_rate": 9.907070027535021e-05,
      "loss": 2.0907,
      "step": 79
    },
    {
      "epoch": 0.27456027456027454,
      "grad_norm": 2.3599183559417725,
      "learning_rate": 9.903473085358588e-05,
      "loss": 1.6063,
      "step": 80
    },
    {
      "epoch": 0.277992277992278,
      "grad_norm": 2.6991305351257324,
      "learning_rate": 9.899808525182935e-05,
      "loss": 1.9409,
      "step": 81
    },
    {
      "epoch": 0.28142428142428144,
      "grad_norm": 2.201826572418213,
      "learning_rate": 9.896076397541676e-05,
      "loss": 1.9585,
      "step": 82
    },
    {
      "epoch": 0.28485628485628484,
      "grad_norm": 2.1751887798309326,
      "learning_rate": 9.892276753900174e-05,
      "loss": 2.0857,
      "step": 83
    },
    {
      "epoch": 0.2882882882882883,
      "grad_norm": 2.5154078006744385,
      "learning_rate": 9.888409646654817e-05,
      "loss": 1.8994,
      "step": 84
    },
    {
      "epoch": 0.29172029172029174,
      "grad_norm": 2.3449671268463135,
      "learning_rate": 9.884475129132312e-05,
      "loss": 1.7481,
      "step": 85
    },
    {
      "epoch": 0.29515229515229513,
      "grad_norm": 1.8997211456298828,
      "learning_rate": 9.880473255588936e-05,
      "loss": 1.863,
      "step": 86
    },
    {
      "epoch": 0.2985842985842986,
      "grad_norm": 2.9354772567749023,
      "learning_rate": 9.876404081209796e-05,
      "loss": 1.539,
      "step": 87
    },
    {
      "epoch": 0.30201630201630203,
      "grad_norm": 2.5494043827056885,
      "learning_rate": 9.872267662108064e-05,
      "loss": 1.9396,
      "step": 88
    },
    {
      "epoch": 0.3054483054483054,
      "grad_norm": 2.2717385292053223,
      "learning_rate": 9.868064055324204e-05,
      "loss": 1.9746,
      "step": 89
    },
    {
      "epoch": 0.3088803088803089,
      "grad_norm": 2.7317569255828857,
      "learning_rate": 9.863793318825186e-05,
      "loss": 1.8214,
      "step": 90
    },
    {
      "epoch": 0.3123123123123123,
      "grad_norm": 2.5960466861724854,
      "learning_rate": 9.85945551150369e-05,
      "loss": 1.8462,
      "step": 91
    },
    {
      "epoch": 0.3157443157443157,
      "grad_norm": 2.4519548416137695,
      "learning_rate": 9.855050693177286e-05,
      "loss": 1.9058,
      "step": 92
    },
    {
      "epoch": 0.31917631917631917,
      "grad_norm": 2.4833014011383057,
      "learning_rate": 9.850578924587614e-05,
      "loss": 1.7343,
      "step": 93
    },
    {
      "epoch": 0.3226083226083226,
      "grad_norm": 2.637434720993042,
      "learning_rate": 9.846040267399548e-05,
      "loss": 1.8474,
      "step": 94
    },
    {
      "epoch": 0.32604032604032607,
      "grad_norm": 2.984177827835083,
      "learning_rate": 9.841434784200342e-05,
      "loss": 1.9982,
      "step": 95
    },
    {
      "epoch": 0.32947232947232946,
      "grad_norm": 2.6004798412323,
      "learning_rate": 9.83676253849877e-05,
      "loss": 1.8097,
      "step": 96
    },
    {
      "epoch": 0.3329043329043329,
      "grad_norm": 2.4632151126861572,
      "learning_rate": 9.832023594724246e-05,
      "loss": 1.9271,
      "step": 97
    },
    {
      "epoch": 0.33633633633633636,
      "grad_norm": 3.0951144695281982,
      "learning_rate": 9.827218018225945e-05,
      "loss": 2.1339,
      "step": 98
    },
    {
      "epoch": 0.33976833976833976,
      "grad_norm": 2.6406679153442383,
      "learning_rate": 9.822345875271883e-05,
      "loss": 1.7577,
      "step": 99
    },
    {
      "epoch": 0.3432003432003432,
      "grad_norm": 3.040127754211426,
      "learning_rate": 9.817407233048028e-05,
      "loss": 1.7221,
      "step": 100
    },
    {
      "epoch": 0.34663234663234666,
      "grad_norm": 2.2173871994018555,
      "learning_rate": 9.812402159657353e-05,
      "loss": 1.8399,
      "step": 101
    },
    {
      "epoch": 0.35006435006435005,
      "grad_norm": 2.456836700439453,
      "learning_rate": 9.807330724118905e-05,
      "loss": 1.8107,
      "step": 102
    },
    {
      "epoch": 0.3534963534963535,
      "grad_norm": 2.338442802429199,
      "learning_rate": 9.802192996366857e-05,
      "loss": 1.6887,
      "step": 103
    },
    {
      "epoch": 0.35692835692835695,
      "grad_norm": 3.318669080734253,
      "learning_rate": 9.796989047249538e-05,
      "loss": 1.9335,
      "step": 104
    },
    {
      "epoch": 0.36036036036036034,
      "grad_norm": 5.614456653594971,
      "learning_rate": 9.791718948528458e-05,
      "loss": 2.1305,
      "step": 105
    },
    {
      "epoch": 0.3637923637923638,
      "grad_norm": 2.3048205375671387,
      "learning_rate": 9.786382772877313e-05,
      "loss": 1.8739,
      "step": 106
    },
    {
      "epoch": 0.36722436722436724,
      "grad_norm": 2.909367799758911,
      "learning_rate": 9.780980593880992e-05,
      "loss": 1.9081,
      "step": 107
    },
    {
      "epoch": 0.37065637065637064,
      "grad_norm": 2.2507643699645996,
      "learning_rate": 9.775512486034563e-05,
      "loss": 1.8024,
      "step": 108
    },
    {
      "epoch": 0.3740883740883741,
      "grad_norm": 2.9189658164978027,
      "learning_rate": 9.769978524742229e-05,
      "loss": 1.6697,
      "step": 109
    },
    {
      "epoch": 0.37752037752037754,
      "grad_norm": 2.4188058376312256,
      "learning_rate": 9.764378786316311e-05,
      "loss": 1.7351,
      "step": 110
    },
    {
      "epoch": 0.38095238095238093,
      "grad_norm": 2.837905168533325,
      "learning_rate": 9.758713347976179e-05,
      "loss": 1.8429,
      "step": 111
    },
    {
      "epoch": 0.3843843843843844,
      "grad_norm": 2.2551000118255615,
      "learning_rate": 9.752982287847193e-05,
      "loss": 2.0456,
      "step": 112
    },
    {
      "epoch": 0.38781638781638783,
      "grad_norm": 2.2400460243225098,
      "learning_rate": 9.747185684959626e-05,
      "loss": 1.8121,
      "step": 113
    },
    {
      "epoch": 0.3912483912483912,
      "grad_norm": 2.452930212020874,
      "learning_rate": 9.741323619247576e-05,
      "loss": 1.8838,
      "step": 114
    },
    {
      "epoch": 0.3946803946803947,
      "grad_norm": 2.1023879051208496,
      "learning_rate": 9.735396171547859e-05,
      "loss": 1.9713,
      "step": 115
    },
    {
      "epoch": 0.3981123981123981,
      "grad_norm": 2.449895143508911,
      "learning_rate": 9.7294034235989e-05,
      "loss": 1.9496,
      "step": 116
    },
    {
      "epoch": 0.4015444015444015,
      "grad_norm": 2.7928390502929688,
      "learning_rate": 9.723345458039594e-05,
      "loss": 1.802,
      "step": 117
    },
    {
      "epoch": 0.40497640497640497,
      "grad_norm": 2.20778489112854,
      "learning_rate": 9.717222358408187e-05,
      "loss": 1.6901,
      "step": 118
    },
    {
      "epoch": 0.4084084084084084,
      "grad_norm": 1.8358359336853027,
      "learning_rate": 9.711034209141101e-05,
      "loss": 1.9301,
      "step": 119
    },
    {
      "epoch": 0.4118404118404118,
      "grad_norm": 9.378925323486328,
      "learning_rate": 9.704781095571788e-05,
      "loss": 2.0087,
      "step": 120
    },
    {
      "epoch": 0.41527241527241526,
      "grad_norm": 2.1176841259002686,
      "learning_rate": 9.698463103929542e-05,
      "loss": 1.8024,
      "step": 121
    },
    {
      "epoch": 0.4187044187044187,
      "grad_norm": 3.427274465560913,
      "learning_rate": 9.692080321338317e-05,
      "loss": 1.9796,
      "step": 122
    },
    {
      "epoch": 0.42213642213642216,
      "grad_norm": 3.1508138179779053,
      "learning_rate": 9.685632835815518e-05,
      "loss": 1.8916,
      "step": 123
    },
    {
      "epoch": 0.42556842556842556,
      "grad_norm": 2.2403833866119385,
      "learning_rate": 9.679120736270795e-05,
      "loss": 1.8939,
      "step": 124
    },
    {
      "epoch": 0.429000429000429,
      "grad_norm": 2.0978407859802246,
      "learning_rate": 9.672544112504813e-05,
      "loss": 1.7539,
      "step": 125
    },
    {
      "epoch": 0.43243243243243246,
      "grad_norm": 3.302398681640625,
      "learning_rate": 9.665903055208014e-05,
      "loss": 1.769,
      "step": 126
    },
    {
      "epoch": 0.43586443586443585,
      "grad_norm": 1.6830466985702515,
      "learning_rate": 9.659197655959365e-05,
      "loss": 1.3232,
      "step": 127
    },
    {
      "epoch": 0.4392964392964393,
      "grad_norm": 2.4156012535095215,
      "learning_rate": 9.6524280072251e-05,
      "loss": 1.8686,
      "step": 128
    },
    {
      "epoch": 0.44272844272844275,
      "grad_norm": 2.547417163848877,
      "learning_rate": 9.645594202357439e-05,
      "loss": 1.9916,
      "step": 129
    },
    {
      "epoch": 0.44616044616044614,
      "grad_norm": 2.9648120403289795,
      "learning_rate": 9.638696335593304e-05,
      "loss": 1.9991,
      "step": 130
    },
    {
      "epoch": 0.4495924495924496,
      "grad_norm": 5.339567184448242,
      "learning_rate": 9.63173450205302e-05,
      "loss": 1.5526,
      "step": 131
    },
    {
      "epoch": 0.45302445302445304,
      "grad_norm": 4.767643451690674,
      "learning_rate": 9.624708797739001e-05,
      "loss": 1.6808,
      "step": 132
    },
    {
      "epoch": 0.45645645645645644,
      "grad_norm": 2.9505746364593506,
      "learning_rate": 9.617619319534428e-05,
      "loss": 1.9059,
      "step": 133
    },
    {
      "epoch": 0.4598884598884599,
      "grad_norm": 3.120511531829834,
      "learning_rate": 9.610466165201912e-05,
      "loss": 1.8282,
      "step": 134
    },
    {
      "epoch": 0.46332046332046334,
      "grad_norm": 1.9741610288619995,
      "learning_rate": 9.603249433382144e-05,
      "loss": 1.7602,
      "step": 135
    },
    {
      "epoch": 0.46675246675246673,
      "grad_norm": 2.086529493331909,
      "learning_rate": 9.595969223592542e-05,
      "loss": 1.672,
      "step": 136
    },
    {
      "epoch": 0.4701844701844702,
      "grad_norm": 2.10164475440979,
      "learning_rate": 9.58862563622587e-05,
      "loss": 1.594,
      "step": 137
    },
    {
      "epoch": 0.47361647361647363,
      "grad_norm": 2.18156099319458,
      "learning_rate": 9.581218772548859e-05,
      "loss": 1.5188,
      "step": 138
    },
    {
      "epoch": 0.477048477048477,
      "grad_norm": 3.0884039402008057,
      "learning_rate": 9.573748734700805e-05,
      "loss": 1.97,
      "step": 139
    },
    {
      "epoch": 0.4804804804804805,
      "grad_norm": 3.0098655223846436,
      "learning_rate": 9.566215625692167e-05,
      "loss": 1.8118,
      "step": 140
    },
    {
      "epoch": 0.4839124839124839,
      "grad_norm": 2.358550548553467,
      "learning_rate": 9.558619549403147e-05,
      "loss": 2.0236,
      "step": 141
    },
    {
      "epoch": 0.4873444873444873,
      "grad_norm": 2.390237331390381,
      "learning_rate": 9.55096061058225e-05,
      "loss": 1.699,
      "step": 142
    },
    {
      "epoch": 0.49077649077649077,
      "grad_norm": 2.140711545944214,
      "learning_rate": 9.543238914844843e-05,
      "loss": 1.8893,
      "step": 143
    },
    {
      "epoch": 0.4942084942084942,
      "grad_norm": 4.973402976989746,
      "learning_rate": 9.535454568671704e-05,
      "loss": 1.7963,
      "step": 144
    },
    {
      "epoch": 0.4976404976404976,
      "grad_norm": 2.0261099338531494,
      "learning_rate": 9.527607679407545e-05,
      "loss": 1.9501,
      "step": 145
    },
    {
      "epoch": 0.5010725010725011,
      "grad_norm": 3.0895743370056152,
      "learning_rate": 9.519698355259538e-05,
      "loss": 1.889,
      "step": 146
    },
    {
      "epoch": 0.5045045045045045,
      "grad_norm": 5.1937665939331055,
      "learning_rate": 9.511726705295817e-05,
      "loss": 1.8727,
      "step": 147
    },
    {
      "epoch": 0.5079365079365079,
      "grad_norm": 2.1922714710235596,
      "learning_rate": 9.503692839443987e-05,
      "loss": 1.7979,
      "step": 148
    },
    {
      "epoch": 0.5113685113685114,
      "grad_norm": 2.2177042961120605,
      "learning_rate": 9.495596868489588e-05,
      "loss": 1.8349,
      "step": 149
    },
    {
      "epoch": 0.5148005148005148,
      "grad_norm": 2.0511586666107178,
      "learning_rate": 9.48743890407458e-05,
      "loss": 1.9304,
      "step": 150
    },
    {
      "epoch": 0.5182325182325183,
      "grad_norm": 2.397942543029785,
      "learning_rate": 9.479219058695809e-05,
      "loss": 1.8319,
      "step": 151
    },
    {
      "epoch": 0.5216645216645217,
      "grad_norm": 2.496051788330078,
      "learning_rate": 9.470937445703441e-05,
      "loss": 1.6197,
      "step": 152
    },
    {
      "epoch": 0.525096525096525,
      "grad_norm": 2.660151481628418,
      "learning_rate": 9.462594179299406e-05,
      "loss": 1.7991,
      "step": 153
    },
    {
      "epoch": 0.5285285285285285,
      "grad_norm": 2.997344493865967,
      "learning_rate": 9.45418937453583e-05,
      "loss": 1.9411,
      "step": 154
    },
    {
      "epoch": 0.531960531960532,
      "grad_norm": 2.129572629928589,
      "learning_rate": 9.445723147313433e-05,
      "loss": 1.6132,
      "step": 155
    },
    {
      "epoch": 0.5353925353925354,
      "grad_norm": 3.1659598350524902,
      "learning_rate": 9.437195614379947e-05,
      "loss": 1.5485,
      "step": 156
    },
    {
      "epoch": 0.5388245388245388,
      "grad_norm": 2.9549195766448975,
      "learning_rate": 9.428606893328493e-05,
      "loss": 1.8277,
      "step": 157
    },
    {
      "epoch": 0.5422565422565423,
      "grad_norm": 2.9015045166015625,
      "learning_rate": 9.419957102595968e-05,
      "loss": 1.9793,
      "step": 158
    },
    {
      "epoch": 0.5456885456885456,
      "grad_norm": 2.1800100803375244,
      "learning_rate": 9.41124636146141e-05,
      "loss": 1.7755,
      "step": 159
    },
    {
      "epoch": 0.5491205491205491,
      "grad_norm": 2.4357590675354004,
      "learning_rate": 9.402474790044346e-05,
      "loss": 1.8315,
      "step": 160
    },
    {
      "epoch": 0.5525525525525525,
      "grad_norm": 3.1982839107513428,
      "learning_rate": 9.393642509303148e-05,
      "loss": 2.0192,
      "step": 161
    },
    {
      "epoch": 0.555984555984556,
      "grad_norm": 2.481682777404785,
      "learning_rate": 9.384749641033359e-05,
      "loss": 1.8774,
      "step": 162
    },
    {
      "epoch": 0.5594165594165594,
      "grad_norm": 2.147491455078125,
      "learning_rate": 9.375796307866002e-05,
      "loss": 1.7033,
      "step": 163
    },
    {
      "epoch": 0.5628485628485629,
      "grad_norm": 10.794540405273438,
      "learning_rate": 9.366782633265915e-05,
      "loss": 1.6162,
      "step": 164
    },
    {
      "epoch": 0.5662805662805663,
      "grad_norm": 2.4288411140441895,
      "learning_rate": 9.357708741530025e-05,
      "loss": 1.5756,
      "step": 165
    },
    {
      "epoch": 0.5697125697125697,
      "grad_norm": 2.1308836936950684,
      "learning_rate": 9.348574757785642e-05,
      "loss": 1.8761,
      "step": 166
    },
    {
      "epoch": 0.5731445731445731,
      "grad_norm": 2.852059841156006,
      "learning_rate": 9.339380807988734e-05,
      "loss": 1.7468,
      "step": 167
    },
    {
      "epoch": 0.5765765765765766,
      "grad_norm": 3.1816141605377197,
      "learning_rate": 9.330127018922194e-05,
      "loss": 1.9141,
      "step": 168
    },
    {
      "epoch": 0.58000858000858,
      "grad_norm": 4.14068078994751,
      "learning_rate": 9.320813518194084e-05,
      "loss": 1.9244,
      "step": 169
    },
    {
      "epoch": 0.5834405834405835,
      "grad_norm": 2.571298360824585,
      "learning_rate": 9.311440434235878e-05,
      "loss": 1.8695,
      "step": 170
    },
    {
      "epoch": 0.5868725868725869,
      "grad_norm": 2.5125808715820312,
      "learning_rate": 9.302007896300698e-05,
      "loss": 1.8694,
      "step": 171
    },
    {
      "epoch": 0.5903045903045903,
      "grad_norm": 5.100837230682373,
      "learning_rate": 9.292516034461516e-05,
      "loss": 1.8425,
      "step": 172
    },
    {
      "epoch": 0.5937365937365937,
      "grad_norm": 2.336090087890625,
      "learning_rate": 9.282964979609379e-05,
      "loss": 1.508,
      "step": 173
    },
    {
      "epoch": 0.5971685971685972,
      "grad_norm": 2.5418477058410645,
      "learning_rate": 9.273354863451589e-05,
      "loss": 1.7451,
      "step": 174
    },
    {
      "epoch": 0.6006006006006006,
      "grad_norm": 2.772040367126465,
      "learning_rate": 9.263685818509895e-05,
      "loss": 1.8512,
      "step": 175
    },
    {
      "epoch": 0.6040326040326041,
      "grad_norm": 2.528163194656372,
      "learning_rate": 9.253957978118663e-05,
      "loss": 1.7668,
      "step": 176
    },
    {
      "epoch": 0.6074646074646075,
      "grad_norm": 4.159308433532715,
      "learning_rate": 9.244171476423037e-05,
      "loss": 1.9209,
      "step": 177
    },
    {
      "epoch": 0.6108966108966108,
      "grad_norm": 3.254849433898926,
      "learning_rate": 9.234326448377088e-05,
      "loss": 1.8634,
      "step": 178
    },
    {
      "epoch": 0.6143286143286143,
      "grad_norm": 4.252657890319824,
      "learning_rate": 9.22442302974196e-05,
      "loss": 1.7258,
      "step": 179
    },
    {
      "epoch": 0.6177606177606177,
      "grad_norm": 2.296783208847046,
      "learning_rate": 9.214461357083985e-05,
      "loss": 1.5565,
      "step": 180
    },
    {
      "epoch": 0.6211926211926212,
      "grad_norm": 2.672006130218506,
      "learning_rate": 9.204441567772816e-05,
      "loss": 1.8712,
      "step": 181
    },
    {
      "epoch": 0.6246246246246246,
      "grad_norm": 2.5030322074890137,
      "learning_rate": 9.194363799979516e-05,
      "loss": 1.9006,
      "step": 182
    },
    {
      "epoch": 0.6280566280566281,
      "grad_norm": 2.64009428024292,
      "learning_rate": 9.184228192674667e-05,
      "loss": 2.0076,
      "step": 183
    },
    {
      "epoch": 0.6314886314886314,
      "grad_norm": 2.7804088592529297,
      "learning_rate": 9.17403488562644e-05,
      "loss": 1.7026,
      "step": 184
    },
    {
      "epoch": 0.6349206349206349,
      "grad_norm": 2.835517406463623,
      "learning_rate": 9.163784019398684e-05,
      "loss": 1.6777,
      "step": 185
    },
    {
      "epoch": 0.6383526383526383,
      "grad_norm": 3.8086190223693848,
      "learning_rate": 9.153475735348973e-05,
      "loss": 1.7653,
      "step": 186
    },
    {
      "epoch": 0.6417846417846418,
      "grad_norm": 2.2626163959503174,
      "learning_rate": 9.14311017562666e-05,
      "loss": 1.6602,
      "step": 187
    },
    {
      "epoch": 0.6452166452166452,
      "grad_norm": 6.700633525848389,
      "learning_rate": 9.13268748317093e-05,
      "loss": 1.7544,
      "step": 188
    },
    {
      "epoch": 0.6486486486486487,
      "grad_norm": 2.1161675453186035,
      "learning_rate": 9.122207801708802e-05,
      "loss": 1.6338,
      "step": 189
    },
    {
      "epoch": 0.6520806520806521,
      "grad_norm": 2.7198283672332764,
      "learning_rate": 9.111671275753175e-05,
      "loss": 1.6245,
      "step": 190
    },
    {
      "epoch": 0.6555126555126555,
      "grad_norm": 3.4131920337677,
      "learning_rate": 9.101078050600822e-05,
      "loss": 2.2857,
      "step": 191
    },
    {
      "epoch": 0.6589446589446589,
      "grad_norm": 2.328498601913452,
      "learning_rate": 9.09042827233038e-05,
      "loss": 1.6268,
      "step": 192
    },
    {
      "epoch": 0.6623766623766624,
      "grad_norm": 2.212374448776245,
      "learning_rate": 9.079722087800351e-05,
      "loss": 1.6387,
      "step": 193
    },
    {
      "epoch": 0.6658086658086658,
      "grad_norm": 2.673947334289551,
      "learning_rate": 9.068959644647068e-05,
      "loss": 1.6648,
      "step": 194
    },
    {
      "epoch": 0.6692406692406693,
      "grad_norm": 9.229345321655273,
      "learning_rate": 9.058141091282656e-05,
      "loss": 1.7526,
      "step": 195
    },
    {
      "epoch": 0.6726726726726727,
      "grad_norm": 2.4878323078155518,
      "learning_rate": 9.047266576892992e-05,
      "loss": 1.9867,
      "step": 196
    },
    {
      "epoch": 0.6761046761046761,
      "grad_norm": 2.2111189365386963,
      "learning_rate": 9.036336251435648e-05,
      "loss": 1.8327,
      "step": 197
    },
    {
      "epoch": 0.6795366795366795,
      "grad_norm": 4.265904903411865,
      "learning_rate": 9.025350265637815e-05,
      "loss": 1.7825,
      "step": 198
    },
    {
      "epoch": 0.682968682968683,
      "grad_norm": 2.7483103275299072,
      "learning_rate": 9.014308770994236e-05,
      "loss": 1.847,
      "step": 199
    },
    {
      "epoch": 0.6864006864006864,
      "grad_norm": 2.3553097248077393,
      "learning_rate": 9.003211919765103e-05,
      "loss": 1.8932,
      "step": 200
    },
    {
      "epoch": 0.6898326898326899,
      "grad_norm": 2.2095086574554443,
      "learning_rate": 8.992059864973972e-05,
      "loss": 1.708,
      "step": 201
    },
    {
      "epoch": 0.6932646932646933,
      "grad_norm": 1.9508806467056274,
      "learning_rate": 8.980852760405646e-05,
      "loss": 1.7148,
      "step": 202
    },
    {
      "epoch": 0.6966966966966966,
      "grad_norm": 8.628199577331543,
      "learning_rate": 8.96959076060405e-05,
      "loss": 1.8225,
      "step": 203
    },
    {
      "epoch": 0.7001287001287001,
      "grad_norm": 2.6558032035827637,
      "learning_rate": 8.958274020870107e-05,
      "loss": 1.7184,
      "step": 204
    },
    {
      "epoch": 0.7035607035607035,
      "grad_norm": 3.4817161560058594,
      "learning_rate": 8.946902697259594e-05,
      "loss": 1.7698,
      "step": 205
    },
    {
      "epoch": 0.706992706992707,
      "grad_norm": 2.168901205062866,
      "learning_rate": 8.935476946580987e-05,
      "loss": 1.9039,
      "step": 206
    },
    {
      "epoch": 0.7104247104247104,
      "grad_norm": 2.858168601989746,
      "learning_rate": 8.923996926393305e-05,
      "loss": 1.888,
      "step": 207
    },
    {
      "epoch": 0.7138567138567139,
      "grad_norm": 2.3486440181732178,
      "learning_rate": 8.912462795003932e-05,
      "loss": 1.4931,
      "step": 208
    },
    {
      "epoch": 0.7172887172887172,
      "grad_norm": 2.0562994480133057,
      "learning_rate": 8.900874711466435e-05,
      "loss": 1.7112,
      "step": 209
    },
    {
      "epoch": 0.7207207207207207,
      "grad_norm": 3.2924375534057617,
      "learning_rate": 8.889232835578371e-05,
      "loss": 1.5558,
      "step": 210
    },
    {
      "epoch": 0.7241527241527241,
      "grad_norm": 2.7133419513702393,
      "learning_rate": 8.877537327879087e-05,
      "loss": 1.6954,
      "step": 211
    },
    {
      "epoch": 0.7275847275847276,
      "grad_norm": 5.117989540100098,
      "learning_rate": 8.865788349647496e-05,
      "loss": 1.6909,
      "step": 212
    },
    {
      "epoch": 0.731016731016731,
      "grad_norm": 2.2281339168548584,
      "learning_rate": 8.853986062899868e-05,
      "loss": 2.063,
      "step": 213
    },
    {
      "epoch": 0.7344487344487345,
      "grad_norm": 2.1984591484069824,
      "learning_rate": 8.842130630387582e-05,
      "loss": 1.8716,
      "step": 214
    },
    {
      "epoch": 0.7378807378807379,
      "grad_norm": 2.3471291065216064,
      "learning_rate": 8.83022221559489e-05,
      "loss": 1.7861,
      "step": 215
    },
    {
      "epoch": 0.7413127413127413,
      "grad_norm": 2.002487897872925,
      "learning_rate": 8.818260982736661e-05,
      "loss": 1.8193,
      "step": 216
    },
    {
      "epoch": 0.7447447447447447,
      "grad_norm": 2.1225132942199707,
      "learning_rate": 8.80624709675611e-05,
      "loss": 1.774,
      "step": 217
    },
    {
      "epoch": 0.7481767481767482,
      "grad_norm": 2.483532190322876,
      "learning_rate": 8.794180723322536e-05,
      "loss": 1.746,
      "step": 218
    },
    {
      "epoch": 0.7516087516087516,
      "grad_norm": 1.921994686126709,
      "learning_rate": 8.782062028829028e-05,
      "loss": 1.7584,
      "step": 219
    },
    {
      "epoch": 0.7550407550407551,
      "grad_norm": 7.183517932891846,
      "learning_rate": 8.769891180390167e-05,
      "loss": 1.7469,
      "step": 220
    },
    {
      "epoch": 0.7584727584727585,
      "grad_norm": 2.710439920425415,
      "learning_rate": 8.757668345839738e-05,
      "loss": 1.6966,
      "step": 221
    },
    {
      "epoch": 0.7619047619047619,
      "grad_norm": 3.2819042205810547,
      "learning_rate": 8.745393693728395e-05,
      "loss": 1.699,
      "step": 222
    },
    {
      "epoch": 0.7653367653367653,
      "grad_norm": 2.411834478378296,
      "learning_rate": 8.733067393321355e-05,
      "loss": 1.9387,
      "step": 223
    },
    {
      "epoch": 0.7687687687687688,
      "grad_norm": 2.950380325317383,
      "learning_rate": 8.720689614596049e-05,
      "loss": 1.758,
      "step": 224
    },
    {
      "epoch": 0.7722007722007722,
      "grad_norm": 2.9294228553771973,
      "learning_rate": 8.708260528239788e-05,
      "loss": 1.7489,
      "step": 225
    },
    {
      "epoch": 0.7756327756327757,
      "grad_norm": 2.321061134338379,
      "learning_rate": 8.695780305647405e-05,
      "loss": 1.7914,
      "step": 226
    },
    {
      "epoch": 0.7790647790647791,
      "grad_norm": 2.7354109287261963,
      "learning_rate": 8.683249118918894e-05,
      "loss": 2.0925,
      "step": 227
    },
    {
      "epoch": 0.7824967824967825,
      "grad_norm": 2.8662607669830322,
      "learning_rate": 8.670667140857032e-05,
      "loss": 1.8967,
      "step": 228
    },
    {
      "epoch": 0.7859287859287859,
      "grad_norm": 3.0616633892059326,
      "learning_rate": 8.658034544965003e-05,
      "loss": 1.7685,
      "step": 229
    },
    {
      "epoch": 0.7893607893607893,
      "grad_norm": 2.5298497676849365,
      "learning_rate": 8.645351505443997e-05,
      "loss": 2.117,
      "step": 230
    },
    {
      "epoch": 0.7927927927927928,
      "grad_norm": 2.8021018505096436,
      "learning_rate": 8.632618197190816e-05,
      "loss": 1.6865,
      "step": 231
    },
    {
      "epoch": 0.7962247962247962,
      "grad_norm": 6.0556511878967285,
      "learning_rate": 8.619834795795458e-05,
      "loss": 1.5834,
      "step": 232
    },
    {
      "epoch": 0.7996567996567997,
      "grad_norm": 5.122035503387451,
      "learning_rate": 8.607001477538696e-05,
      "loss": 1.9057,
      "step": 233
    },
    {
      "epoch": 0.803088803088803,
      "grad_norm": 2.636565685272217,
      "learning_rate": 8.594118419389647e-05,
      "loss": 1.74,
      "step": 234
    },
    {
      "epoch": 0.8065208065208065,
      "grad_norm": 2.3524961471557617,
      "learning_rate": 8.581185799003333e-05,
      "loss": 1.8237,
      "step": 235
    },
    {
      "epoch": 0.8099528099528099,
      "grad_norm": 2.5847346782684326,
      "learning_rate": 8.568203794718229e-05,
      "loss": 1.8233,
      "step": 236
    },
    {
      "epoch": 0.8133848133848134,
      "grad_norm": 2.6022896766662598,
      "learning_rate": 8.555172585553805e-05,
      "loss": 1.842,
      "step": 237
    },
    {
      "epoch": 0.8168168168168168,
      "grad_norm": 2.00921368598938,
      "learning_rate": 8.542092351208058e-05,
      "loss": 1.5684,
      "step": 238
    },
    {
      "epoch": 0.8202488202488203,
      "grad_norm": 2.348506212234497,
      "learning_rate": 8.528963272055035e-05,
      "loss": 2.035,
      "step": 239
    },
    {
      "epoch": 0.8236808236808236,
      "grad_norm": 2.172614336013794,
      "learning_rate": 8.515785529142339e-05,
      "loss": 1.7046,
      "step": 240
    },
    {
      "epoch": 0.8271128271128271,
      "grad_norm": 2.242969036102295,
      "learning_rate": 8.502559304188644e-05,
      "loss": 1.7894,
      "step": 241
    },
    {
      "epoch": 0.8305448305448305,
      "grad_norm": 2.3041832447052,
      "learning_rate": 8.489284779581178e-05,
      "loss": 1.7618,
      "step": 242
    },
    {
      "epoch": 0.833976833976834,
      "grad_norm": 2.7007389068603516,
      "learning_rate": 8.475962138373213e-05,
      "loss": 1.8749,
      "step": 243
    },
    {
      "epoch": 0.8374088374088374,
      "grad_norm": 2.5166304111480713,
      "learning_rate": 8.462591564281539e-05,
      "loss": 1.7654,
      "step": 244
    },
    {
      "epoch": 0.8408408408408409,
      "grad_norm": 3.481743574142456,
      "learning_rate": 8.449173241683934e-05,
      "loss": 1.6174,
      "step": 245
    },
    {
      "epoch": 0.8442728442728443,
      "grad_norm": 4.370404243469238,
      "learning_rate": 8.43570735561662e-05,
      "loss": 1.8078,
      "step": 246
    },
    {
      "epoch": 0.8477048477048477,
      "grad_norm": 1.8009841442108154,
      "learning_rate": 8.422194091771709e-05,
      "loss": 2.0142,
      "step": 247
    },
    {
      "epoch": 0.8511368511368511,
      "grad_norm": 6.226415157318115,
      "learning_rate": 8.408633636494643e-05,
      "loss": 1.7789,
      "step": 248
    },
    {
      "epoch": 0.8545688545688546,
      "grad_norm": 2.1044347286224365,
      "learning_rate": 8.395026176781627e-05,
      "loss": 1.609,
      "step": 249
    },
    {
      "epoch": 0.858000858000858,
      "grad_norm": 2.2350592613220215,
      "learning_rate": 8.381371900277045e-05,
      "loss": 1.5779,
      "step": 250
    },
    {
      "epoch": 0.8614328614328615,
      "grad_norm": 2.8647027015686035,
      "learning_rate": 8.367670995270882e-05,
      "loss": 1.6507,
      "step": 251
    },
    {
      "epoch": 0.8648648648648649,
      "grad_norm": 4.939163684844971,
      "learning_rate": 8.353923650696118e-05,
      "loss": 1.748,
      "step": 252
    },
    {
      "epoch": 0.8682968682968683,
      "grad_norm": 2.062880754470825,
      "learning_rate": 8.340130056126125e-05,
      "loss": 1.5184,
      "step": 253
    },
    {
      "epoch": 0.8717288717288717,
      "grad_norm": 2.096306324005127,
      "learning_rate": 8.326290401772056e-05,
      "loss": 1.8033,
      "step": 254
    },
    {
      "epoch": 0.8751608751608752,
      "grad_norm": 2.528865098953247,
      "learning_rate": 8.312404878480222e-05,
      "loss": 1.7344,
      "step": 255
    },
    {
      "epoch": 0.8785928785928786,
      "grad_norm": 2.1523735523223877,
      "learning_rate": 8.298473677729452e-05,
      "loss": 1.9331,
      "step": 256
    },
    {
      "epoch": 0.882024882024882,
      "grad_norm": 3.4486896991729736,
      "learning_rate": 8.284496991628465e-05,
      "loss": 1.6207,
      "step": 257
    },
    {
      "epoch": 0.8854568854568855,
      "grad_norm": 4.5695881843566895,
      "learning_rate": 8.270475012913211e-05,
      "loss": 1.8614,
      "step": 258
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 5.191723346710205,
      "learning_rate": 8.25640793494422e-05,
      "loss": 1.8196,
      "step": 259
    },
    {
      "epoch": 0.8923208923208923,
      "grad_norm": 2.0518269538879395,
      "learning_rate": 8.242295951703929e-05,
      "loss": 1.9145,
      "step": 260
    },
    {
      "epoch": 0.8957528957528957,
      "grad_norm": 2.5167107582092285,
      "learning_rate": 8.228139257794012e-05,
      "loss": 1.94,
      "step": 261
    },
    {
      "epoch": 0.8991848991848992,
      "grad_norm": 2.6135048866271973,
      "learning_rate": 8.213938048432697e-05,
      "loss": 1.7861,
      "step": 262
    },
    {
      "epoch": 0.9026169026169026,
      "grad_norm": 2.6990857124328613,
      "learning_rate": 8.199692519452069e-05,
      "loss": 1.9417,
      "step": 263
    },
    {
      "epoch": 0.9060489060489061,
      "grad_norm": 1.760377049446106,
      "learning_rate": 8.185402867295373e-05,
      "loss": 1.8393,
      "step": 264
    },
    {
      "epoch": 0.9094809094809094,
      "grad_norm": 1.9401514530181885,
      "learning_rate": 8.171069289014306e-05,
      "loss": 1.7374,
      "step": 265
    },
    {
      "epoch": 0.9129129129129129,
      "grad_norm": 2.020557165145874,
      "learning_rate": 8.156691982266298e-05,
      "loss": 1.621,
      "step": 266
    },
    {
      "epoch": 0.9163449163449163,
      "grad_norm": 2.3324530124664307,
      "learning_rate": 8.142271145311783e-05,
      "loss": 1.5926,
      "step": 267
    },
    {
      "epoch": 0.9197769197769198,
      "grad_norm": 1.5700269937515259,
      "learning_rate": 8.127806977011475e-05,
      "loss": 1.6872,
      "step": 268
    },
    {
      "epoch": 0.9232089232089232,
      "grad_norm": 3.170302391052246,
      "learning_rate": 8.113299676823614e-05,
      "loss": 1.6807,
      "step": 269
    },
    {
      "epoch": 0.9266409266409267,
      "grad_norm": 8.72167682647705,
      "learning_rate": 8.098749444801224e-05,
      "loss": 1.9134,
      "step": 270
    },
    {
      "epoch": 0.9300729300729301,
      "grad_norm": 2.907050132751465,
      "learning_rate": 8.084156481589349e-05,
      "loss": 1.8967,
      "step": 271
    },
    {
      "epoch": 0.9335049335049335,
      "grad_norm": 2.757725238800049,
      "learning_rate": 8.069520988422292e-05,
      "loss": 1.881,
      "step": 272
    },
    {
      "epoch": 0.9369369369369369,
      "grad_norm": 2.3894410133361816,
      "learning_rate": 8.054843167120827e-05,
      "loss": 1.7762,
      "step": 273
    },
    {
      "epoch": 0.9403689403689404,
      "grad_norm": 1.7541773319244385,
      "learning_rate": 8.040123220089437e-05,
      "loss": 1.4438,
      "step": 274
    },
    {
      "epoch": 0.9438009438009438,
      "grad_norm": 1.8482638597488403,
      "learning_rate": 8.025361350313506e-05,
      "loss": 1.5979,
      "step": 275
    },
    {
      "epoch": 0.9472329472329473,
      "grad_norm": 3.970987319946289,
      "learning_rate": 8.010557761356523e-05,
      "loss": 1.4536,
      "step": 276
    },
    {
      "epoch": 0.9506649506649507,
      "grad_norm": 3.014207363128662,
      "learning_rate": 7.99571265735728e-05,
      "loss": 2.0115,
      "step": 277
    },
    {
      "epoch": 0.954096954096954,
      "grad_norm": 2.0984294414520264,
      "learning_rate": 7.980826243027051e-05,
      "loss": 1.7534,
      "step": 278
    },
    {
      "epoch": 0.9575289575289575,
      "grad_norm": 2.135024070739746,
      "learning_rate": 7.965898723646776e-05,
      "loss": 1.9935,
      "step": 279
    },
    {
      "epoch": 0.960960960960961,
      "grad_norm": 2.4189157485961914,
      "learning_rate": 7.950930305064224e-05,
      "loss": 1.7623,
      "step": 280
    },
    {
      "epoch": 0.9643929643929644,
      "grad_norm": 2.0982775688171387,
      "learning_rate": 7.935921193691153e-05,
      "loss": 1.6372,
      "step": 281
    },
    {
      "epoch": 0.9678249678249679,
      "grad_norm": 4.051909923553467,
      "learning_rate": 7.920871596500472e-05,
      "loss": 1.734,
      "step": 282
    },
    {
      "epoch": 0.9712569712569713,
      "grad_norm": 2.6742334365844727,
      "learning_rate": 7.905781721023384e-05,
      "loss": 1.7797,
      "step": 283
    },
    {
      "epoch": 0.9746889746889746,
      "grad_norm": 2.6985645294189453,
      "learning_rate": 7.890651775346512e-05,
      "loss": 1.6586,
      "step": 284
    },
    {
      "epoch": 0.9781209781209781,
      "grad_norm": 1.903810739517212,
      "learning_rate": 7.875481968109052e-05,
      "loss": 1.9216,
      "step": 285
    },
    {
      "epoch": 0.9815529815529815,
      "grad_norm": 2.0464494228363037,
      "learning_rate": 7.860272508499876e-05,
      "loss": 1.6745,
      "step": 286
    },
    {
      "epoch": 0.984984984984985,
      "grad_norm": 1.7176523208618164,
      "learning_rate": 7.845023606254658e-05,
      "loss": 1.7076,
      "step": 287
    },
    {
      "epoch": 0.9884169884169884,
      "grad_norm": 2.3394172191619873,
      "learning_rate": 7.829735471652978e-05,
      "loss": 2.1384,
      "step": 288
    },
    {
      "epoch": 0.9918489918489919,
      "grad_norm": 2.015266180038452,
      "learning_rate": 7.814408315515418e-05,
      "loss": 1.606,
      "step": 289
    },
    {
      "epoch": 0.9952809952809952,
      "grad_norm": 2.388758897781372,
      "learning_rate": 7.799042349200671e-05,
      "loss": 1.9451,
      "step": 290
    },
    {
      "epoch": 0.9987129987129987,
      "grad_norm": 1.8600746393203735,
      "learning_rate": 7.783637784602609e-05,
      "loss": 1.8282,
      "step": 291
    },
    {
      "epoch": 1.0021450021450022,
      "grad_norm": 1.583534598350525,
      "learning_rate": 7.768194834147362e-05,
      "loss": 1.4212,
      "step": 292
    },
    {
      "epoch": 1.0055770055770055,
      "grad_norm": 1.9959355592727661,
      "learning_rate": 7.752713710790404e-05,
      "loss": 1.2946,
      "step": 293
    },
    {
      "epoch": 1.009009009009009,
      "grad_norm": 2.62949538230896,
      "learning_rate": 7.7371946280136e-05,
      "loss": 1.7998,
      "step": 294
    },
    {
      "epoch": 1.0124410124410124,
      "grad_norm": 2.431450128555298,
      "learning_rate": 7.721637799822269e-05,
      "loss": 1.5646,
      "step": 295
    },
    {
      "epoch": 1.0158730158730158,
      "grad_norm": 1.7983845472335815,
      "learning_rate": 7.706043440742234e-05,
      "loss": 1.4601,
      "step": 296
    },
    {
      "epoch": 1.0193050193050193,
      "grad_norm": 2.2960171699523926,
      "learning_rate": 7.690411765816864e-05,
      "loss": 1.4027,
      "step": 297
    },
    {
      "epoch": 1.0227370227370227,
      "grad_norm": 2.6544857025146484,
      "learning_rate": 7.674742990604101e-05,
      "loss": 1.3949,
      "step": 298
    },
    {
      "epoch": 1.0261690261690262,
      "grad_norm": 2.6421749591827393,
      "learning_rate": 7.659037331173497e-05,
      "loss": 1.6299,
      "step": 299
    },
    {
      "epoch": 1.0296010296010296,
      "grad_norm": 2.059410333633423,
      "learning_rate": 7.643295004103232e-05,
      "loss": 1.5578,
      "step": 300
    },
    {
      "epoch": 1.033033033033033,
      "grad_norm": 2.357328414916992,
      "learning_rate": 7.627516226477122e-05,
      "loss": 1.6844,
      "step": 301
    },
    {
      "epoch": 1.0364650364650365,
      "grad_norm": 2.1102635860443115,
      "learning_rate": 7.611701215881634e-05,
      "loss": 1.5805,
      "step": 302
    },
    {
      "epoch": 1.03989703989704,
      "grad_norm": 3.6646649837493896,
      "learning_rate": 7.595850190402876e-05,
      "loss": 1.5425,
      "step": 303
    },
    {
      "epoch": 1.0433290433290434,
      "grad_norm": 2.2950656414031982,
      "learning_rate": 7.579963368623601e-05,
      "loss": 1.6959,
      "step": 304
    },
    {
      "epoch": 1.0467610467610469,
      "grad_norm": 5.265210151672363,
      "learning_rate": 7.564040969620178e-05,
      "loss": 1.6165,
      "step": 305
    },
    {
      "epoch": 1.05019305019305,
      "grad_norm": 2.438647508621216,
      "learning_rate": 7.548083212959588e-05,
      "loss": 1.5532,
      "step": 306
    },
    {
      "epoch": 1.0536250536250535,
      "grad_norm": 2.18306827545166,
      "learning_rate": 7.532090318696382e-05,
      "loss": 1.6798,
      "step": 307
    },
    {
      "epoch": 1.057057057057057,
      "grad_norm": 2.5863749980926514,
      "learning_rate": 7.516062507369654e-05,
      "loss": 1.6429,
      "step": 308
    },
    {
      "epoch": 1.0604890604890604,
      "grad_norm": 6.74139404296875,
      "learning_rate": 7.500000000000001e-05,
      "loss": 1.8963,
      "step": 309
    },
    {
      "epoch": 1.063921063921064,
      "grad_norm": 2.420839548110962,
      "learning_rate": 7.483903018086465e-05,
      "loss": 1.6864,
      "step": 310
    },
    {
      "epoch": 1.0673530673530673,
      "grad_norm": 2.9578287601470947,
      "learning_rate": 7.467771783603492e-05,
      "loss": 1.722,
      "step": 311
    },
    {
      "epoch": 1.0707850707850708,
      "grad_norm": 2.322808265686035,
      "learning_rate": 7.451606518997862e-05,
      "loss": 1.569,
      "step": 312
    },
    {
      "epoch": 1.0742170742170742,
      "grad_norm": 6.6969709396362305,
      "learning_rate": 7.435407447185623e-05,
      "loss": 1.424,
      "step": 313
    },
    {
      "epoch": 1.0776490776490777,
      "grad_norm": 4.087669372558594,
      "learning_rate": 7.419174791549022e-05,
      "loss": 1.6839,
      "step": 314
    },
    {
      "epoch": 1.0810810810810811,
      "grad_norm": 2.3197500705718994,
      "learning_rate": 7.402908775933419e-05,
      "loss": 1.4053,
      "step": 315
    },
    {
      "epoch": 1.0845130845130846,
      "grad_norm": 3.6255760192871094,
      "learning_rate": 7.386609624644201e-05,
      "loss": 1.5687,
      "step": 316
    },
    {
      "epoch": 1.087945087945088,
      "grad_norm": 2.651404619216919,
      "learning_rate": 7.370277562443688e-05,
      "loss": 1.5241,
      "step": 317
    },
    {
      "epoch": 1.0913770913770913,
      "grad_norm": 2.5847132205963135,
      "learning_rate": 7.353912814548042e-05,
      "loss": 1.9545,
      "step": 318
    },
    {
      "epoch": 1.0948090948090947,
      "grad_norm": 3.0099332332611084,
      "learning_rate": 7.337515606624148e-05,
      "loss": 1.7549,
      "step": 319
    },
    {
      "epoch": 1.0982410982410982,
      "grad_norm": 3.1133766174316406,
      "learning_rate": 7.321086164786513e-05,
      "loss": 1.5875,
      "step": 320
    },
    {
      "epoch": 1.1016731016731016,
      "grad_norm": 3.0638010501861572,
      "learning_rate": 7.304624715594139e-05,
      "loss": 1.5743,
      "step": 321
    },
    {
      "epoch": 1.105105105105105,
      "grad_norm": 1.9236012697219849,
      "learning_rate": 7.288131486047414e-05,
      "loss": 1.5209,
      "step": 322
    },
    {
      "epoch": 1.1085371085371085,
      "grad_norm": 2.70574951171875,
      "learning_rate": 7.27160670358496e-05,
      "loss": 1.6866,
      "step": 323
    },
    {
      "epoch": 1.111969111969112,
      "grad_norm": 2.5942938327789307,
      "learning_rate": 7.255050596080509e-05,
      "loss": 1.5188,
      "step": 324
    },
    {
      "epoch": 1.1154011154011154,
      "grad_norm": 6.315788745880127,
      "learning_rate": 7.238463391839769e-05,
      "loss": 1.5032,
      "step": 325
    },
    {
      "epoch": 1.1188331188331189,
      "grad_norm": 2.6975343227386475,
      "learning_rate": 7.221845319597258e-05,
      "loss": 1.6924,
      "step": 326
    },
    {
      "epoch": 1.1222651222651223,
      "grad_norm": 3.0455782413482666,
      "learning_rate": 7.205196608513159e-05,
      "loss": 1.5321,
      "step": 327
    },
    {
      "epoch": 1.1256971256971258,
      "grad_norm": 2.102262496948242,
      "learning_rate": 7.18851748817016e-05,
      "loss": 1.2692,
      "step": 328
    },
    {
      "epoch": 1.1291291291291292,
      "grad_norm": 2.070570468902588,
      "learning_rate": 7.171808188570291e-05,
      "loss": 1.6117,
      "step": 329
    },
    {
      "epoch": 1.1325611325611327,
      "grad_norm": 4.169351577758789,
      "learning_rate": 7.155068940131741e-05,
      "loss": 1.5667,
      "step": 330
    },
    {
      "epoch": 1.135993135993136,
      "grad_norm": 3.117227792739868,
      "learning_rate": 7.138299973685694e-05,
      "loss": 1.3962,
      "step": 331
    },
    {
      "epoch": 1.1394251394251393,
      "grad_norm": 2.2660415172576904,
      "learning_rate": 7.121501520473137e-05,
      "loss": 1.6048,
      "step": 332
    },
    {
      "epoch": 1.1428571428571428,
      "grad_norm": 2.9432554244995117,
      "learning_rate": 7.104673812141675e-05,
      "loss": 1.5757,
      "step": 333
    },
    {
      "epoch": 1.1462891462891462,
      "grad_norm": 2.6431076526641846,
      "learning_rate": 7.087817080742338e-05,
      "loss": 1.6614,
      "step": 334
    },
    {
      "epoch": 1.1497211497211497,
      "grad_norm": 4.8314714431762695,
      "learning_rate": 7.070931558726372e-05,
      "loss": 1.5387,
      "step": 335
    },
    {
      "epoch": 1.1531531531531531,
      "grad_norm": 3.9536054134368896,
      "learning_rate": 7.054017478942047e-05,
      "loss": 1.7762,
      "step": 336
    },
    {
      "epoch": 1.1565851565851566,
      "grad_norm": 4.137585163116455,
      "learning_rate": 7.03707507463144e-05,
      "loss": 1.5941,
      "step": 337
    },
    {
      "epoch": 1.16001716001716,
      "grad_norm": 2.785764455795288,
      "learning_rate": 7.020104579427213e-05,
      "loss": 1.8104,
      "step": 338
    },
    {
      "epoch": 1.1634491634491635,
      "grad_norm": 2.21512508392334,
      "learning_rate": 7.003106227349399e-05,
      "loss": 1.5065,
      "step": 339
    },
    {
      "epoch": 1.166881166881167,
      "grad_norm": 2.6862733364105225,
      "learning_rate": 6.986080252802171e-05,
      "loss": 1.6227,
      "step": 340
    },
    {
      "epoch": 1.1703131703131704,
      "grad_norm": 6.402591705322266,
      "learning_rate": 6.969026890570611e-05,
      "loss": 1.6624,
      "step": 341
    },
    {
      "epoch": 1.1737451737451738,
      "grad_norm": 1.7947615385055542,
      "learning_rate": 6.951946375817474e-05,
      "loss": 1.5338,
      "step": 342
    },
    {
      "epoch": 1.1771771771771773,
      "grad_norm": 2.200287103652954,
      "learning_rate": 6.934838944079943e-05,
      "loss": 1.5133,
      "step": 343
    },
    {
      "epoch": 1.1806091806091805,
      "grad_norm": 2.283151865005493,
      "learning_rate": 6.917704831266381e-05,
      "loss": 1.4876,
      "step": 344
    },
    {
      "epoch": 1.184041184041184,
      "grad_norm": 2.5204415321350098,
      "learning_rate": 6.900544273653074e-05,
      "loss": 1.5456,
      "step": 345
    },
    {
      "epoch": 1.1874731874731874,
      "grad_norm": 2.615139961242676,
      "learning_rate": 6.883357507880985e-05,
      "loss": 1.6792,
      "step": 346
    },
    {
      "epoch": 1.1909051909051909,
      "grad_norm": 4.858724117279053,
      "learning_rate": 6.866144770952474e-05,
      "loss": 1.6486,
      "step": 347
    },
    {
      "epoch": 1.1943371943371943,
      "grad_norm": 2.408298969268799,
      "learning_rate": 6.848906300228046e-05,
      "loss": 1.588,
      "step": 348
    },
    {
      "epoch": 1.1977691977691978,
      "grad_norm": 3.1886606216430664,
      "learning_rate": 6.831642333423067e-05,
      "loss": 1.4274,
      "step": 349
    },
    {
      "epoch": 1.2012012012012012,
      "grad_norm": 2.370371103286743,
      "learning_rate": 6.814353108604488e-05,
      "loss": 1.6522,
      "step": 350
    },
    {
      "epoch": 1.2046332046332047,
      "grad_norm": 3.1015868186950684,
      "learning_rate": 6.797038864187564e-05,
      "loss": 1.6606,
      "step": 351
    },
    {
      "epoch": 1.2080652080652081,
      "grad_norm": 2.486515998840332,
      "learning_rate": 6.77969983893257e-05,
      "loss": 1.4555,
      "step": 352
    },
    {
      "epoch": 1.2114972114972116,
      "grad_norm": 4.0898542404174805,
      "learning_rate": 6.762336271941498e-05,
      "loss": 1.4092,
      "step": 353
    },
    {
      "epoch": 1.214929214929215,
      "grad_norm": 2.9915337562561035,
      "learning_rate": 6.744948402654771e-05,
      "loss": 1.8173,
      "step": 354
    },
    {
      "epoch": 1.2183612183612182,
      "grad_norm": 3.557752847671509,
      "learning_rate": 6.727536470847931e-05,
      "loss": 1.8509,
      "step": 355
    },
    {
      "epoch": 1.221793221793222,
      "grad_norm": 2.575721502304077,
      "learning_rate": 6.710100716628344e-05,
      "loss": 1.5043,
      "step": 356
    },
    {
      "epoch": 1.2252252252252251,
      "grad_norm": 3.771576166152954,
      "learning_rate": 6.692641380431879e-05,
      "loss": 1.4471,
      "step": 357
    },
    {
      "epoch": 1.2286572286572286,
      "grad_norm": 3.7303056716918945,
      "learning_rate": 6.675158703019594e-05,
      "loss": 1.5909,
      "step": 358
    },
    {
      "epoch": 1.232089232089232,
      "grad_norm": 2.8177194595336914,
      "learning_rate": 6.657652925474423e-05,
      "loss": 1.5792,
      "step": 359
    },
    {
      "epoch": 1.2355212355212355,
      "grad_norm": 3.9052274227142334,
      "learning_rate": 6.640124289197845e-05,
      "loss": 1.623,
      "step": 360
    },
    {
      "epoch": 1.238953238953239,
      "grad_norm": 2.6038625240325928,
      "learning_rate": 6.622573035906557e-05,
      "loss": 1.4613,
      "step": 361
    },
    {
      "epoch": 1.2423852423852424,
      "grad_norm": 3.152754545211792,
      "learning_rate": 6.604999407629137e-05,
      "loss": 1.487,
      "step": 362
    },
    {
      "epoch": 1.2458172458172458,
      "grad_norm": 2.4888927936553955,
      "learning_rate": 6.587403646702714e-05,
      "loss": 1.5112,
      "step": 363
    },
    {
      "epoch": 1.2492492492492493,
      "grad_norm": 3.3498973846435547,
      "learning_rate": 6.569785995769619e-05,
      "loss": 1.5928,
      "step": 364
    },
    {
      "epoch": 1.2526812526812527,
      "grad_norm": 2.9565393924713135,
      "learning_rate": 6.552146697774049e-05,
      "loss": 1.7839,
      "step": 365
    },
    {
      "epoch": 1.2561132561132562,
      "grad_norm": 2.848174571990967,
      "learning_rate": 6.5344859959587e-05,
      "loss": 1.6326,
      "step": 366
    },
    {
      "epoch": 1.2595452595452596,
      "grad_norm": 2.7913596630096436,
      "learning_rate": 6.516804133861429e-05,
      "loss": 1.5104,
      "step": 367
    },
    {
      "epoch": 1.2629772629772629,
      "grad_norm": 2.659428358078003,
      "learning_rate": 6.49910135531189e-05,
      "loss": 1.758,
      "step": 368
    },
    {
      "epoch": 1.2664092664092665,
      "grad_norm": 7.941112995147705,
      "learning_rate": 6.481377904428171e-05,
      "loss": 1.4422,
      "step": 369
    },
    {
      "epoch": 1.2698412698412698,
      "grad_norm": 4.364922046661377,
      "learning_rate": 6.463634025613422e-05,
      "loss": 1.5459,
      "step": 370
    },
    {
      "epoch": 1.2732732732732732,
      "grad_norm": 2.149338960647583,
      "learning_rate": 6.445869963552496e-05,
      "loss": 1.724,
      "step": 371
    },
    {
      "epoch": 1.2767052767052767,
      "grad_norm": 2.581162691116333,
      "learning_rate": 6.428085963208566e-05,
      "loss": 1.5953,
      "step": 372
    },
    {
      "epoch": 1.2801372801372801,
      "grad_norm": 4.6043381690979,
      "learning_rate": 6.410282269819756e-05,
      "loss": 1.5684,
      "step": 373
    },
    {
      "epoch": 1.2835692835692836,
      "grad_norm": 2.2293617725372314,
      "learning_rate": 6.392459128895747e-05,
      "loss": 1.4449,
      "step": 374
    },
    {
      "epoch": 1.287001287001287,
      "grad_norm": 2.732926607131958,
      "learning_rate": 6.374616786214402e-05,
      "loss": 1.5415,
      "step": 375
    },
    {
      "epoch": 1.2904332904332905,
      "grad_norm": 2.5129523277282715,
      "learning_rate": 6.356755487818371e-05,
      "loss": 1.4591,
      "step": 376
    },
    {
      "epoch": 1.293865293865294,
      "grad_norm": 2.4862000942230225,
      "learning_rate": 6.338875480011698e-05,
      "loss": 1.6485,
      "step": 377
    },
    {
      "epoch": 1.2972972972972974,
      "grad_norm": 3.033751964569092,
      "learning_rate": 6.320977009356431e-05,
      "loss": 1.5986,
      "step": 378
    },
    {
      "epoch": 1.3007293007293006,
      "grad_norm": 4.5888495445251465,
      "learning_rate": 6.303060322669215e-05,
      "loss": 1.5021,
      "step": 379
    },
    {
      "epoch": 1.3041613041613043,
      "grad_norm": 2.429192543029785,
      "learning_rate": 6.285125667017886e-05,
      "loss": 1.6152,
      "step": 380
    },
    {
      "epoch": 1.3075933075933075,
      "grad_norm": 7.091180801391602,
      "learning_rate": 6.26717328971808e-05,
      "loss": 1.4411,
      "step": 381
    },
    {
      "epoch": 1.311025311025311,
      "grad_norm": 2.807136058807373,
      "learning_rate": 6.249203438329799e-05,
      "loss": 1.5097,
      "step": 382
    },
    {
      "epoch": 1.3144573144573144,
      "grad_norm": 2.2933549880981445,
      "learning_rate": 6.23121636065402e-05,
      "loss": 1.4201,
      "step": 383
    },
    {
      "epoch": 1.3178893178893178,
      "grad_norm": 2.7558300495147705,
      "learning_rate": 6.213212304729259e-05,
      "loss": 1.5992,
      "step": 384
    },
    {
      "epoch": 1.3213213213213213,
      "grad_norm": 4.076783180236816,
      "learning_rate": 6.195191518828163e-05,
      "loss": 1.4535,
      "step": 385
    },
    {
      "epoch": 1.3247533247533247,
      "grad_norm": 3.4067318439483643,
      "learning_rate": 6.177154251454082e-05,
      "loss": 1.4231,
      "step": 386
    },
    {
      "epoch": 1.3281853281853282,
      "grad_norm": 2.713345766067505,
      "learning_rate": 6.159100751337642e-05,
      "loss": 1.609,
      "step": 387
    },
    {
      "epoch": 1.3316173316173316,
      "grad_norm": 7.186104774475098,
      "learning_rate": 6.141031267433316e-05,
      "loss": 1.6651,
      "step": 388
    },
    {
      "epoch": 1.335049335049335,
      "grad_norm": 2.461686372756958,
      "learning_rate": 6.122946048915991e-05,
      "loss": 1.6094,
      "step": 389
    },
    {
      "epoch": 1.3384813384813385,
      "grad_norm": 2.3088924884796143,
      "learning_rate": 6.10484534517753e-05,
      "loss": 1.6525,
      "step": 390
    },
    {
      "epoch": 1.341913341913342,
      "grad_norm": 2.723689317703247,
      "learning_rate": 6.086729405823335e-05,
      "loss": 1.5337,
      "step": 391
    },
    {
      "epoch": 1.3453453453453452,
      "grad_norm": 2.900031089782715,
      "learning_rate": 6.068598480668906e-05,
      "loss": 1.4429,
      "step": 392
    },
    {
      "epoch": 1.348777348777349,
      "grad_norm": 2.979921340942383,
      "learning_rate": 6.0504528197363894e-05,
      "loss": 1.7144,
      "step": 393
    },
    {
      "epoch": 1.3522093522093521,
      "grad_norm": 5.341972351074219,
      "learning_rate": 6.032292673251143e-05,
      "loss": 1.3489,
      "step": 394
    },
    {
      "epoch": 1.3556413556413556,
      "grad_norm": 2.208777904510498,
      "learning_rate": 6.014118291638271e-05,
      "loss": 1.5446,
      "step": 395
    },
    {
      "epoch": 1.359073359073359,
      "grad_norm": 2.7157530784606934,
      "learning_rate": 5.99592992551918e-05,
      "loss": 1.4669,
      "step": 396
    },
    {
      "epoch": 1.3625053625053625,
      "grad_norm": 2.8407084941864014,
      "learning_rate": 5.977727825708123e-05,
      "loss": 1.4224,
      "step": 397
    },
    {
      "epoch": 1.365937365937366,
      "grad_norm": 5.027857780456543,
      "learning_rate": 5.959512243208732e-05,
      "loss": 1.7307,
      "step": 398
    },
    {
      "epoch": 1.3693693693693694,
      "grad_norm": 2.730710506439209,
      "learning_rate": 5.9412834292105676e-05,
      "loss": 1.5422,
      "step": 399
    },
    {
      "epoch": 1.3728013728013728,
      "grad_norm": 2.9895334243774414,
      "learning_rate": 5.92304163508565e-05,
      "loss": 1.6901,
      "step": 400
    },
    {
      "epoch": 1.3762333762333763,
      "grad_norm": 3.7523319721221924,
      "learning_rate": 5.9047871123849907e-05,
      "loss": 1.7742,
      "step": 401
    },
    {
      "epoch": 1.3796653796653797,
      "grad_norm": 2.4346377849578857,
      "learning_rate": 5.886520112835128e-05,
      "loss": 1.7416,
      "step": 402
    },
    {
      "epoch": 1.3830973830973832,
      "grad_norm": 2.3179407119750977,
      "learning_rate": 5.868240888334653e-05,
      "loss": 1.4677,
      "step": 403
    },
    {
      "epoch": 1.3865293865293866,
      "grad_norm": 2.831315279006958,
      "learning_rate": 5.849949690950735e-05,
      "loss": 1.567,
      "step": 404
    },
    {
      "epoch": 1.3899613899613898,
      "grad_norm": 2.7766289710998535,
      "learning_rate": 5.831646772915651e-05,
      "loss": 1.752,
      "step": 405
    },
    {
      "epoch": 1.3933933933933935,
      "grad_norm": 2.0719926357269287,
      "learning_rate": 5.8133323866233e-05,
      "loss": 1.5228,
      "step": 406
    },
    {
      "epoch": 1.3968253968253967,
      "grad_norm": 2.6451454162597656,
      "learning_rate": 5.7950067846257284e-05,
      "loss": 1.575,
      "step": 407
    },
    {
      "epoch": 1.4002574002574002,
      "grad_norm": 2.1423537731170654,
      "learning_rate": 5.776670219629643e-05,
      "loss": 1.481,
      "step": 408
    },
    {
      "epoch": 1.4036894036894036,
      "grad_norm": 2.1066346168518066,
      "learning_rate": 5.75832294449293e-05,
      "loss": 1.5382,
      "step": 409
    },
    {
      "epoch": 1.407121407121407,
      "grad_norm": 2.0059642791748047,
      "learning_rate": 5.7399652122211676e-05,
      "loss": 1.3148,
      "step": 410
    },
    {
      "epoch": 1.4105534105534105,
      "grad_norm": 2.6886708736419678,
      "learning_rate": 5.721597275964133e-05,
      "loss": 1.6746,
      "step": 411
    },
    {
      "epoch": 1.413985413985414,
      "grad_norm": 2.4721059799194336,
      "learning_rate": 5.7032193890123174e-05,
      "loss": 1.4931,
      "step": 412
    },
    {
      "epoch": 1.4174174174174174,
      "grad_norm": 2.3078601360321045,
      "learning_rate": 5.684831804793427e-05,
      "loss": 1.5499,
      "step": 413
    },
    {
      "epoch": 1.420849420849421,
      "grad_norm": 3.18930721282959,
      "learning_rate": 5.666434776868895e-05,
      "loss": 1.5252,
      "step": 414
    },
    {
      "epoch": 1.4242814242814243,
      "grad_norm": 2.861396551132202,
      "learning_rate": 5.6480285589303794e-05,
      "loss": 1.4565,
      "step": 415
    },
    {
      "epoch": 1.4277134277134278,
      "grad_norm": 2.4543356895446777,
      "learning_rate": 5.629613404796267e-05,
      "loss": 1.5102,
      "step": 416
    },
    {
      "epoch": 1.4311454311454312,
      "grad_norm": 2.4268035888671875,
      "learning_rate": 5.6111895684081725e-05,
      "loss": 1.3716,
      "step": 417
    },
    {
      "epoch": 1.4345774345774345,
      "grad_norm": 2.2431933879852295,
      "learning_rate": 5.5927573038274406e-05,
      "loss": 1.4674,
      "step": 418
    },
    {
      "epoch": 1.4380094380094381,
      "grad_norm": 3.415754795074463,
      "learning_rate": 5.574316865231637e-05,
      "loss": 1.5415,
      "step": 419
    },
    {
      "epoch": 1.4414414414414414,
      "grad_norm": 2.748523712158203,
      "learning_rate": 5.555868506911044e-05,
      "loss": 1.594,
      "step": 420
    },
    {
      "epoch": 1.4448734448734448,
      "grad_norm": 2.713268995285034,
      "learning_rate": 5.537412483265156e-05,
      "loss": 1.6102,
      "step": 421
    },
    {
      "epoch": 1.4483054483054483,
      "grad_norm": 3.1691229343414307,
      "learning_rate": 5.518949048799176e-05,
      "loss": 1.7603,
      "step": 422
    },
    {
      "epoch": 1.4517374517374517,
      "grad_norm": 8.484031677246094,
      "learning_rate": 5.5004784581204927e-05,
      "loss": 1.6131,
      "step": 423
    },
    {
      "epoch": 1.4551694551694552,
      "grad_norm": 3.916632652282715,
      "learning_rate": 5.4820009659351823e-05,
      "loss": 1.5769,
      "step": 424
    },
    {
      "epoch": 1.4586014586014586,
      "grad_norm": 2.80100679397583,
      "learning_rate": 5.4635168270444915e-05,
      "loss": 1.5268,
      "step": 425
    },
    {
      "epoch": 1.462033462033462,
      "grad_norm": 3.006824493408203,
      "learning_rate": 5.445026296341325e-05,
      "loss": 1.46,
      "step": 426
    },
    {
      "epoch": 1.4654654654654655,
      "grad_norm": 3.5886383056640625,
      "learning_rate": 5.426529628806724e-05,
      "loss": 1.426,
      "step": 427
    },
    {
      "epoch": 1.468897468897469,
      "grad_norm": 2.1659512519836426,
      "learning_rate": 5.4080270795063616e-05,
      "loss": 1.4031,
      "step": 428
    },
    {
      "epoch": 1.4723294723294722,
      "grad_norm": 2.527465343475342,
      "learning_rate": 5.389518903587017e-05,
      "loss": 1.4704,
      "step": 429
    },
    {
      "epoch": 1.4757614757614759,
      "grad_norm": 2.6389286518096924,
      "learning_rate": 5.371005356273058e-05,
      "loss": 1.6118,
      "step": 430
    },
    {
      "epoch": 1.479193479193479,
      "grad_norm": 2.9786925315856934,
      "learning_rate": 5.352486692862926e-05,
      "loss": 1.7449,
      "step": 431
    },
    {
      "epoch": 1.4826254826254825,
      "grad_norm": 3.0405077934265137,
      "learning_rate": 5.3339631687256084e-05,
      "loss": 1.4651,
      "step": 432
    },
    {
      "epoch": 1.486057486057486,
      "grad_norm": 3.3209829330444336,
      "learning_rate": 5.315435039297124e-05,
      "loss": 1.5134,
      "step": 433
    },
    {
      "epoch": 1.4894894894894894,
      "grad_norm": 3.043468475341797,
      "learning_rate": 5.2969025600770006e-05,
      "loss": 1.5785,
      "step": 434
    },
    {
      "epoch": 1.492921492921493,
      "grad_norm": 2.710390329360962,
      "learning_rate": 5.2783659866247424e-05,
      "loss": 1.7819,
      "step": 435
    },
    {
      "epoch": 1.4963534963534963,
      "grad_norm": 3.2145400047302246,
      "learning_rate": 5.2598255745563155e-05,
      "loss": 1.6852,
      "step": 436
    },
    {
      "epoch": 1.4997854997854998,
      "grad_norm": 2.9676852226257324,
      "learning_rate": 5.241281579540619e-05,
      "loss": 1.6221,
      "step": 437
    },
    {
      "epoch": 1.5032175032175032,
      "grad_norm": 2.4987123012542725,
      "learning_rate": 5.2227342572959623e-05,
      "loss": 1.4295,
      "step": 438
    },
    {
      "epoch": 1.5066495066495067,
      "grad_norm": 2.309537172317505,
      "learning_rate": 5.2041838635865334e-05,
      "loss": 1.4446,
      "step": 439
    },
    {
      "epoch": 1.51008151008151,
      "grad_norm": 2.6226561069488525,
      "learning_rate": 5.18563065421888e-05,
      "loss": 1.6364,
      "step": 440
    },
    {
      "epoch": 1.5135135135135136,
      "grad_norm": 2.649700164794922,
      "learning_rate": 5.167074885038373e-05,
      "loss": 1.5526,
      "step": 441
    },
    {
      "epoch": 1.5169455169455168,
      "grad_norm": 2.3845086097717285,
      "learning_rate": 5.1485168119256844e-05,
      "loss": 1.3813,
      "step": 442
    },
    {
      "epoch": 1.5203775203775205,
      "grad_norm": 2.8573849201202393,
      "learning_rate": 5.129956690793255e-05,
      "loss": 1.5912,
      "step": 443
    },
    {
      "epoch": 1.5238095238095237,
      "grad_norm": 3.13277268409729,
      "learning_rate": 5.11139477758177e-05,
      "loss": 1.4192,
      "step": 444
    },
    {
      "epoch": 1.5272415272415274,
      "grad_norm": 2.6009128093719482,
      "learning_rate": 5.0928313282566255e-05,
      "loss": 1.76,
      "step": 445
    },
    {
      "epoch": 1.5306735306735306,
      "grad_norm": 3.9756667613983154,
      "learning_rate": 5.074266598804401e-05,
      "loss": 1.4431,
      "step": 446
    },
    {
      "epoch": 1.534105534105534,
      "grad_norm": 2.4864137172698975,
      "learning_rate": 5.055700845229327e-05,
      "loss": 1.4912,
      "step": 447
    },
    {
      "epoch": 1.5375375375375375,
      "grad_norm": 2.888032913208008,
      "learning_rate": 5.0371343235497624e-05,
      "loss": 1.6204,
      "step": 448
    },
    {
      "epoch": 1.540969540969541,
      "grad_norm": 10.124591827392578,
      "learning_rate": 5.018567289794651e-05,
      "loss": 1.5822,
      "step": 449
    },
    {
      "epoch": 1.5444015444015444,
      "grad_norm": 2.4046237468719482,
      "learning_rate": 5e-05,
      "loss": 1.4953,
      "step": 450
    },
    {
      "epoch": 1.5478335478335479,
      "grad_norm": 3.203213930130005,
      "learning_rate": 4.9814327102053507e-05,
      "loss": 1.5365,
      "step": 451
    },
    {
      "epoch": 1.5512655512655513,
      "grad_norm": 2.624288320541382,
      "learning_rate": 4.962865676450239e-05,
      "loss": 1.7619,
      "step": 452
    },
    {
      "epoch": 1.5546975546975546,
      "grad_norm": 2.5629420280456543,
      "learning_rate": 4.944299154770673e-05,
      "loss": 1.6015,
      "step": 453
    },
    {
      "epoch": 1.5581295581295582,
      "grad_norm": 2.410686731338501,
      "learning_rate": 4.9257334011956005e-05,
      "loss": 1.5704,
      "step": 454
    },
    {
      "epoch": 1.5615615615615615,
      "grad_norm": 2.7196202278137207,
      "learning_rate": 4.907168671743377e-05,
      "loss": 1.4139,
      "step": 455
    },
    {
      "epoch": 1.5649935649935651,
      "grad_norm": 3.039167642593384,
      "learning_rate": 4.8886052224182314e-05,
      "loss": 1.5981,
      "step": 456
    },
    {
      "epoch": 1.5684255684255684,
      "grad_norm": 2.5808839797973633,
      "learning_rate": 4.8700433092067476e-05,
      "loss": 1.4699,
      "step": 457
    },
    {
      "epoch": 1.571857571857572,
      "grad_norm": 2.9232373237609863,
      "learning_rate": 4.8514831880743175e-05,
      "loss": 1.6958,
      "step": 458
    },
    {
      "epoch": 1.5752895752895753,
      "grad_norm": 2.748061180114746,
      "learning_rate": 4.832925114961629e-05,
      "loss": 1.4212,
      "step": 459
    },
    {
      "epoch": 1.5787215787215787,
      "grad_norm": 2.6016733646392822,
      "learning_rate": 4.81436934578112e-05,
      "loss": 1.4815,
      "step": 460
    },
    {
      "epoch": 1.5821535821535821,
      "grad_norm": 2.9114651679992676,
      "learning_rate": 4.795816136413467e-05,
      "loss": 1.5563,
      "step": 461
    },
    {
      "epoch": 1.5855855855855856,
      "grad_norm": 2.745410919189453,
      "learning_rate": 4.777265742704039e-05,
      "loss": 1.6027,
      "step": 462
    },
    {
      "epoch": 1.589017589017589,
      "grad_norm": 2.276564836502075,
      "learning_rate": 4.758718420459383e-05,
      "loss": 1.4591,
      "step": 463
    },
    {
      "epoch": 1.5924495924495925,
      "grad_norm": 2.4882259368896484,
      "learning_rate": 4.7401744254436864e-05,
      "loss": 1.5881,
      "step": 464
    },
    {
      "epoch": 1.595881595881596,
      "grad_norm": 3.190080165863037,
      "learning_rate": 4.72163401337526e-05,
      "loss": 1.6737,
      "step": 465
    },
    {
      "epoch": 1.5993135993135992,
      "grad_norm": 2.3443238735198975,
      "learning_rate": 4.703097439923e-05,
      "loss": 1.4517,
      "step": 466
    },
    {
      "epoch": 1.6027456027456028,
      "grad_norm": 2.8179473876953125,
      "learning_rate": 4.684564960702877e-05,
      "loss": 1.4667,
      "step": 467
    },
    {
      "epoch": 1.606177606177606,
      "grad_norm": 2.715737819671631,
      "learning_rate": 4.666036831274392e-05,
      "loss": 1.6098,
      "step": 468
    },
    {
      "epoch": 1.6096096096096097,
      "grad_norm": 3.3109145164489746,
      "learning_rate": 4.647513307137076e-05,
      "loss": 1.6845,
      "step": 469
    },
    {
      "epoch": 1.613041613041613,
      "grad_norm": 2.448624849319458,
      "learning_rate": 4.6289946437269413e-05,
      "loss": 1.4842,
      "step": 470
    },
    {
      "epoch": 1.6164736164736164,
      "grad_norm": 2.686997413635254,
      "learning_rate": 4.610481096412984e-05,
      "loss": 1.5478,
      "step": 471
    },
    {
      "epoch": 1.6199056199056199,
      "grad_norm": 2.3995554447174072,
      "learning_rate": 4.591972920493638e-05,
      "loss": 1.6882,
      "step": 472
    },
    {
      "epoch": 1.6233376233376233,
      "grad_norm": 2.322045087814331,
      "learning_rate": 4.573470371193277e-05,
      "loss": 1.4495,
      "step": 473
    },
    {
      "epoch": 1.6267696267696268,
      "grad_norm": 2.94111704826355,
      "learning_rate": 4.554973703658676e-05,
      "loss": 1.6924,
      "step": 474
    },
    {
      "epoch": 1.6302016302016302,
      "grad_norm": 3.2255635261535645,
      "learning_rate": 4.5364831729555096e-05,
      "loss": 1.7349,
      "step": 475
    },
    {
      "epoch": 1.6336336336336337,
      "grad_norm": 2.6030948162078857,
      "learning_rate": 4.517999034064818e-05,
      "loss": 1.5316,
      "step": 476
    },
    {
      "epoch": 1.637065637065637,
      "grad_norm": 3.085925579071045,
      "learning_rate": 4.4995215418795085e-05,
      "loss": 1.62,
      "step": 477
    },
    {
      "epoch": 1.6404976404976406,
      "grad_norm": 2.6004717350006104,
      "learning_rate": 4.481050951200824e-05,
      "loss": 1.5708,
      "step": 478
    },
    {
      "epoch": 1.6439296439296438,
      "grad_norm": 2.6294591426849365,
      "learning_rate": 4.462587516734844e-05,
      "loss": 1.7194,
      "step": 479
    },
    {
      "epoch": 1.6473616473616475,
      "grad_norm": 2.7559385299682617,
      "learning_rate": 4.4441314930889564e-05,
      "loss": 1.6297,
      "step": 480
    },
    {
      "epoch": 1.6507936507936507,
      "grad_norm": 2.4827189445495605,
      "learning_rate": 4.4256831347683644e-05,
      "loss": 1.3492,
      "step": 481
    },
    {
      "epoch": 1.6542256542256544,
      "grad_norm": 2.629169464111328,
      "learning_rate": 4.407242696172559e-05,
      "loss": 1.6157,
      "step": 482
    },
    {
      "epoch": 1.6576576576576576,
      "grad_norm": 2.694340229034424,
      "learning_rate": 4.388810431591829e-05,
      "loss": 1.7438,
      "step": 483
    },
    {
      "epoch": 1.661089661089661,
      "grad_norm": 3.476625919342041,
      "learning_rate": 4.3703865952037356e-05,
      "loss": 1.5321,
      "step": 484
    },
    {
      "epoch": 1.6645216645216645,
      "grad_norm": 2.3876941204071045,
      "learning_rate": 4.3519714410696224e-05,
      "loss": 1.494,
      "step": 485
    },
    {
      "epoch": 1.667953667953668,
      "grad_norm": 2.34399676322937,
      "learning_rate": 4.333565223131107e-05,
      "loss": 1.4659,
      "step": 486
    },
    {
      "epoch": 1.6713856713856714,
      "grad_norm": 2.271775007247925,
      "learning_rate": 4.3151681952065734e-05,
      "loss": 1.6405,
      "step": 487
    },
    {
      "epoch": 1.6748176748176749,
      "grad_norm": 2.4160709381103516,
      "learning_rate": 4.296780610987685e-05,
      "loss": 1.4532,
      "step": 488
    },
    {
      "epoch": 1.6782496782496783,
      "grad_norm": 2.317929744720459,
      "learning_rate": 4.278402724035867e-05,
      "loss": 1.4455,
      "step": 489
    },
    {
      "epoch": 1.6816816816816815,
      "grad_norm": 3.378218173980713,
      "learning_rate": 4.2600347877788335e-05,
      "loss": 1.7272,
      "step": 490
    },
    {
      "epoch": 1.6851136851136852,
      "grad_norm": 2.5305070877075195,
      "learning_rate": 4.241677055507071e-05,
      "loss": 1.4878,
      "step": 491
    },
    {
      "epoch": 1.6885456885456884,
      "grad_norm": 2.1931586265563965,
      "learning_rate": 4.2233297803703586e-05,
      "loss": 1.4962,
      "step": 492
    },
    {
      "epoch": 1.691977691977692,
      "grad_norm": 3.561326026916504,
      "learning_rate": 4.204993215374273e-05,
      "loss": 1.6254,
      "step": 493
    },
    {
      "epoch": 1.6954096954096953,
      "grad_norm": 2.3371071815490723,
      "learning_rate": 4.186667613376702e-05,
      "loss": 1.6459,
      "step": 494
    },
    {
      "epoch": 1.698841698841699,
      "grad_norm": 3.9716603755950928,
      "learning_rate": 4.1683532270843504e-05,
      "loss": 1.6232,
      "step": 495
    },
    {
      "epoch": 1.7022737022737022,
      "grad_norm": 3.9168498516082764,
      "learning_rate": 4.150050309049267e-05,
      "loss": 1.5633,
      "step": 496
    },
    {
      "epoch": 1.7057057057057057,
      "grad_norm": 2.6538310050964355,
      "learning_rate": 4.131759111665349e-05,
      "loss": 1.464,
      "step": 497
    },
    {
      "epoch": 1.7091377091377091,
      "grad_norm": 3.6975090503692627,
      "learning_rate": 4.1134798871648733e-05,
      "loss": 1.7677,
      "step": 498
    },
    {
      "epoch": 1.7125697125697126,
      "grad_norm": 2.2970147132873535,
      "learning_rate": 4.0952128876150105e-05,
      "loss": 1.7045,
      "step": 499
    },
    {
      "epoch": 1.716001716001716,
      "grad_norm": 2.0167083740234375,
      "learning_rate": 4.0769583649143517e-05,
      "loss": 1.4864,
      "step": 500
    },
    {
      "epoch": 1.7194337194337195,
      "grad_norm": 2.450317144393921,
      "learning_rate": 4.058716570789433e-05,
      "loss": 1.3855,
      "step": 501
    },
    {
      "epoch": 1.722865722865723,
      "grad_norm": 2.4551656246185303,
      "learning_rate": 4.0404877567912695e-05,
      "loss": 1.6079,
      "step": 502
    },
    {
      "epoch": 1.7262977262977262,
      "grad_norm": 2.198312520980835,
      "learning_rate": 4.0222721742918776e-05,
      "loss": 1.487,
      "step": 503
    },
    {
      "epoch": 1.7297297297297298,
      "grad_norm": 2.5198142528533936,
      "learning_rate": 4.0040700744808204e-05,
      "loss": 1.5442,
      "step": 504
    },
    {
      "epoch": 1.733161733161733,
      "grad_norm": 2.639026165008545,
      "learning_rate": 3.9858817083617295e-05,
      "loss": 1.4701,
      "step": 505
    },
    {
      "epoch": 1.7365937365937367,
      "grad_norm": 2.2345986366271973,
      "learning_rate": 3.967707326748857e-05,
      "loss": 1.7051,
      "step": 506
    },
    {
      "epoch": 1.74002574002574,
      "grad_norm": 2.33160400390625,
      "learning_rate": 3.94954718026361e-05,
      "loss": 1.6322,
      "step": 507
    },
    {
      "epoch": 1.7434577434577434,
      "grad_norm": 2.6456820964813232,
      "learning_rate": 3.9314015193310955e-05,
      "loss": 1.5093,
      "step": 508
    },
    {
      "epoch": 1.7468897468897469,
      "grad_norm": 2.1802875995635986,
      "learning_rate": 3.913270594176664e-05,
      "loss": 1.5141,
      "step": 509
    },
    {
      "epoch": 1.7503217503217503,
      "grad_norm": 2.604865312576294,
      "learning_rate": 3.895154654822471e-05,
      "loss": 1.3396,
      "step": 510
    },
    {
      "epoch": 1.7537537537537538,
      "grad_norm": 2.0998575687408447,
      "learning_rate": 3.877053951084009e-05,
      "loss": 1.4238,
      "step": 511
    },
    {
      "epoch": 1.7571857571857572,
      "grad_norm": 3.5698776245117188,
      "learning_rate": 3.8589687325666853e-05,
      "loss": 1.7831,
      "step": 512
    },
    {
      "epoch": 1.7606177606177607,
      "grad_norm": 2.367262840270996,
      "learning_rate": 3.840899248662358e-05,
      "loss": 1.4916,
      "step": 513
    },
    {
      "epoch": 1.764049764049764,
      "grad_norm": 2.3220672607421875,
      "learning_rate": 3.822845748545919e-05,
      "loss": 1.4795,
      "step": 514
    },
    {
      "epoch": 1.7674817674817676,
      "grad_norm": 2.1096725463867188,
      "learning_rate": 3.804808481171838e-05,
      "loss": 1.572,
      "step": 515
    },
    {
      "epoch": 1.7709137709137708,
      "grad_norm": 2.3278422355651855,
      "learning_rate": 3.786787695270743e-05,
      "loss": 1.5269,
      "step": 516
    },
    {
      "epoch": 1.7743457743457745,
      "grad_norm": 2.749607563018799,
      "learning_rate": 3.768783639345982e-05,
      "loss": 1.5354,
      "step": 517
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 2.3141472339630127,
      "learning_rate": 3.7507965616702014e-05,
      "loss": 1.4091,
      "step": 518
    },
    {
      "epoch": 1.7812097812097814,
      "grad_norm": 2.2596139907836914,
      "learning_rate": 3.7328267102819225e-05,
      "loss": 1.453,
      "step": 519
    },
    {
      "epoch": 1.7846417846417846,
      "grad_norm": 2.355376720428467,
      "learning_rate": 3.714874332982115e-05,
      "loss": 1.6055,
      "step": 520
    },
    {
      "epoch": 1.788073788073788,
      "grad_norm": 2.4396281242370605,
      "learning_rate": 3.6969396773307885e-05,
      "loss": 1.4993,
      "step": 521
    },
    {
      "epoch": 1.7915057915057915,
      "grad_norm": 2.781219482421875,
      "learning_rate": 3.6790229906435705e-05,
      "loss": 1.5511,
      "step": 522
    },
    {
      "epoch": 1.794937794937795,
      "grad_norm": 2.634636878967285,
      "learning_rate": 3.661124519988304e-05,
      "loss": 1.5711,
      "step": 523
    },
    {
      "epoch": 1.7983697983697984,
      "grad_norm": 2.621788263320923,
      "learning_rate": 3.6432445121816304e-05,
      "loss": 1.4636,
      "step": 524
    },
    {
      "epoch": 1.8018018018018018,
      "grad_norm": 2.2490668296813965,
      "learning_rate": 3.6253832137856e-05,
      "loss": 1.4483,
      "step": 525
    },
    {
      "epoch": 1.8052338052338053,
      "grad_norm": 2.349517345428467,
      "learning_rate": 3.607540871104254e-05,
      "loss": 1.5784,
      "step": 526
    },
    {
      "epoch": 1.8086658086658085,
      "grad_norm": 2.67471981048584,
      "learning_rate": 3.589717730180246e-05,
      "loss": 1.704,
      "step": 527
    },
    {
      "epoch": 1.8120978120978122,
      "grad_norm": 2.7578306198120117,
      "learning_rate": 3.571914036791435e-05,
      "loss": 1.5665,
      "step": 528
    },
    {
      "epoch": 1.8155298155298154,
      "grad_norm": 2.493817090988159,
      "learning_rate": 3.554130036447506e-05,
      "loss": 1.4958,
      "step": 529
    },
    {
      "epoch": 1.818961818961819,
      "grad_norm": 2.3423521518707275,
      "learning_rate": 3.5363659743865794e-05,
      "loss": 1.6415,
      "step": 530
    },
    {
      "epoch": 1.8223938223938223,
      "grad_norm": 2.399747610092163,
      "learning_rate": 3.5186220955718306e-05,
      "loss": 1.548,
      "step": 531
    },
    {
      "epoch": 1.825825825825826,
      "grad_norm": 2.18708872795105,
      "learning_rate": 3.500898644688109e-05,
      "loss": 1.598,
      "step": 532
    },
    {
      "epoch": 1.8292578292578292,
      "grad_norm": 2.9461519718170166,
      "learning_rate": 3.4831958661385714e-05,
      "loss": 1.3514,
      "step": 533
    },
    {
      "epoch": 1.8326898326898327,
      "grad_norm": 2.9919519424438477,
      "learning_rate": 3.465514004041301e-05,
      "loss": 1.6886,
      "step": 534
    },
    {
      "epoch": 1.836121836121836,
      "grad_norm": 2.705054521560669,
      "learning_rate": 3.447853302225953e-05,
      "loss": 1.4899,
      "step": 535
    },
    {
      "epoch": 1.8395538395538396,
      "grad_norm": 2.7716612815856934,
      "learning_rate": 3.430214004230381e-05,
      "loss": 1.545,
      "step": 536
    },
    {
      "epoch": 1.842985842985843,
      "grad_norm": 3.493701457977295,
      "learning_rate": 3.4125963532972873e-05,
      "loss": 1.46,
      "step": 537
    },
    {
      "epoch": 1.8464178464178465,
      "grad_norm": 3.150582790374756,
      "learning_rate": 3.395000592370864e-05,
      "loss": 1.6785,
      "step": 538
    },
    {
      "epoch": 1.84984984984985,
      "grad_norm": 2.5565614700317383,
      "learning_rate": 3.377426964093445e-05,
      "loss": 1.3472,
      "step": 539
    },
    {
      "epoch": 1.8532818532818531,
      "grad_norm": 2.538355588912964,
      "learning_rate": 3.3598757108021546e-05,
      "loss": 1.6195,
      "step": 540
    },
    {
      "epoch": 1.8567138567138568,
      "grad_norm": 2.803187370300293,
      "learning_rate": 3.342347074525578e-05,
      "loss": 1.5813,
      "step": 541
    },
    {
      "epoch": 1.86014586014586,
      "grad_norm": 3.432896614074707,
      "learning_rate": 3.324841296980407e-05,
      "loss": 1.4468,
      "step": 542
    },
    {
      "epoch": 1.8635778635778637,
      "grad_norm": 2.3690922260284424,
      "learning_rate": 3.307358619568123e-05,
      "loss": 1.5372,
      "step": 543
    },
    {
      "epoch": 1.867009867009867,
      "grad_norm": 3.1075501441955566,
      "learning_rate": 3.289899283371657e-05,
      "loss": 1.5844,
      "step": 544
    },
    {
      "epoch": 1.8704418704418706,
      "grad_norm": 2.5869109630584717,
      "learning_rate": 3.27246352915207e-05,
      "loss": 1.5576,
      "step": 545
    },
    {
      "epoch": 1.8738738738738738,
      "grad_norm": 3.3243567943573,
      "learning_rate": 3.25505159734523e-05,
      "loss": 1.7545,
      "step": 546
    },
    {
      "epoch": 1.8773058773058773,
      "grad_norm": 2.211977005004883,
      "learning_rate": 3.237663728058502e-05,
      "loss": 1.5055,
      "step": 547
    },
    {
      "epoch": 1.8807378807378807,
      "grad_norm": 2.8710718154907227,
      "learning_rate": 3.220300161067432e-05,
      "loss": 1.6161,
      "step": 548
    },
    {
      "epoch": 1.8841698841698842,
      "grad_norm": 3.1551785469055176,
      "learning_rate": 3.202961135812437e-05,
      "loss": 1.4996,
      "step": 549
    },
    {
      "epoch": 1.8876018876018876,
      "grad_norm": 2.7353293895721436,
      "learning_rate": 3.185646891395514e-05,
      "loss": 1.4806,
      "step": 550
    },
    {
      "epoch": 1.891033891033891,
      "grad_norm": 2.7863030433654785,
      "learning_rate": 3.1683576665769344e-05,
      "loss": 1.4403,
      "step": 551
    },
    {
      "epoch": 1.8944658944658945,
      "grad_norm": 2.3312976360321045,
      "learning_rate": 3.1510936997719555e-05,
      "loss": 1.3643,
      "step": 552
    },
    {
      "epoch": 1.8978978978978978,
      "grad_norm": 2.453322410583496,
      "learning_rate": 3.1338552290475266e-05,
      "loss": 1.5053,
      "step": 553
    },
    {
      "epoch": 1.9013299013299014,
      "grad_norm": 2.392380475997925,
      "learning_rate": 3.116642492119017e-05,
      "loss": 1.5286,
      "step": 554
    },
    {
      "epoch": 1.9047619047619047,
      "grad_norm": 3.274395227432251,
      "learning_rate": 3.0994557263469265e-05,
      "loss": 1.4766,
      "step": 555
    },
    {
      "epoch": 1.9081939081939083,
      "grad_norm": 2.433957576751709,
      "learning_rate": 3.0822951687336214e-05,
      "loss": 1.6439,
      "step": 556
    },
    {
      "epoch": 1.9116259116259116,
      "grad_norm": 3.1075353622436523,
      "learning_rate": 3.065161055920057e-05,
      "loss": 1.5093,
      "step": 557
    },
    {
      "epoch": 1.915057915057915,
      "grad_norm": 2.2694778442382812,
      "learning_rate": 3.0480536241825263e-05,
      "loss": 1.4463,
      "step": 558
    },
    {
      "epoch": 1.9184899184899185,
      "grad_norm": 2.4638431072235107,
      "learning_rate": 3.0309731094293897e-05,
      "loss": 1.5163,
      "step": 559
    },
    {
      "epoch": 1.921921921921922,
      "grad_norm": 2.64046573638916,
      "learning_rate": 3.013919747197832e-05,
      "loss": 1.5562,
      "step": 560
    },
    {
      "epoch": 1.9253539253539254,
      "grad_norm": 2.456101179122925,
      "learning_rate": 2.996893772650602e-05,
      "loss": 1.5963,
      "step": 561
    },
    {
      "epoch": 1.9287859287859288,
      "grad_norm": 2.387228012084961,
      "learning_rate": 2.9798954205727885e-05,
      "loss": 1.3789,
      "step": 562
    },
    {
      "epoch": 1.9322179322179323,
      "grad_norm": 2.3570961952209473,
      "learning_rate": 2.9629249253685597e-05,
      "loss": 1.5827,
      "step": 563
    },
    {
      "epoch": 1.9356499356499357,
      "grad_norm": 2.116554021835327,
      "learning_rate": 2.9459825210579533e-05,
      "loss": 1.3608,
      "step": 564
    },
    {
      "epoch": 1.9390819390819392,
      "grad_norm": 2.3534789085388184,
      "learning_rate": 2.9290684412736292e-05,
      "loss": 1.5278,
      "step": 565
    },
    {
      "epoch": 1.9425139425139424,
      "grad_norm": 2.568575859069824,
      "learning_rate": 2.9121829192576643e-05,
      "loss": 1.5542,
      "step": 566
    },
    {
      "epoch": 1.945945945945946,
      "grad_norm": 2.402428388595581,
      "learning_rate": 2.895326187858326e-05,
      "loss": 1.4213,
      "step": 567
    },
    {
      "epoch": 1.9493779493779493,
      "grad_norm": 2.928065061569214,
      "learning_rate": 2.8784984795268642e-05,
      "loss": 1.6243,
      "step": 568
    },
    {
      "epoch": 1.952809952809953,
      "grad_norm": 2.1884844303131104,
      "learning_rate": 2.8617000263143078e-05,
      "loss": 1.6569,
      "step": 569
    },
    {
      "epoch": 1.9562419562419562,
      "grad_norm": 2.4743812084198,
      "learning_rate": 2.844931059868261e-05,
      "loss": 1.498,
      "step": 570
    },
    {
      "epoch": 1.9596739596739596,
      "grad_norm": 2.2158451080322266,
      "learning_rate": 2.828191811429709e-05,
      "loss": 1.2799,
      "step": 571
    },
    {
      "epoch": 1.963105963105963,
      "grad_norm": 2.5304431915283203,
      "learning_rate": 2.8114825118298416e-05,
      "loss": 1.4273,
      "step": 572
    },
    {
      "epoch": 1.9665379665379665,
      "grad_norm": 2.717284917831421,
      "learning_rate": 2.7948033914868415e-05,
      "loss": 1.7815,
      "step": 573
    },
    {
      "epoch": 1.96996996996997,
      "grad_norm": 3.1559271812438965,
      "learning_rate": 2.778154680402745e-05,
      "loss": 1.5246,
      "step": 574
    },
    {
      "epoch": 1.9734019734019734,
      "grad_norm": 4.472389221191406,
      "learning_rate": 2.7615366081602307e-05,
      "loss": 1.5053,
      "step": 575
    },
    {
      "epoch": 1.9768339768339769,
      "grad_norm": 2.4243619441986084,
      "learning_rate": 2.74494940391949e-05,
      "loss": 1.4879,
      "step": 576
    },
    {
      "epoch": 1.98026598026598,
      "grad_norm": 2.9998128414154053,
      "learning_rate": 2.7283932964150416e-05,
      "loss": 1.6159,
      "step": 577
    },
    {
      "epoch": 1.9836979836979838,
      "grad_norm": 2.6797726154327393,
      "learning_rate": 2.7118685139525866e-05,
      "loss": 1.5764,
      "step": 578
    },
    {
      "epoch": 1.987129987129987,
      "grad_norm": 2.6300737857818604,
      "learning_rate": 2.69537528440586e-05,
      "loss": 1.477,
      "step": 579
    },
    {
      "epoch": 1.9905619905619907,
      "grad_norm": 2.305650234222412,
      "learning_rate": 2.6789138352134884e-05,
      "loss": 1.4657,
      "step": 580
    },
    {
      "epoch": 1.993993993993994,
      "grad_norm": 2.707665205001831,
      "learning_rate": 2.6624843933758547e-05,
      "loss": 1.6608,
      "step": 581
    },
    {
      "epoch": 1.9974259974259976,
      "grad_norm": 2.695685863494873,
      "learning_rate": 2.6460871854519594e-05,
      "loss": 1.6347,
      "step": 582
    },
    {
      "epoch": 2.000858000858001,
      "grad_norm": 2.261622190475464,
      "learning_rate": 2.629722437556312e-05,
      "loss": 1.6215,
      "step": 583
    },
    {
      "epoch": 2.0042900042900045,
      "grad_norm": 1.9403581619262695,
      "learning_rate": 2.613390375355801e-05,
      "loss": 1.1664,
      "step": 584
    },
    {
      "epoch": 2.0077220077220077,
      "grad_norm": 2.76426100730896,
      "learning_rate": 2.5970912240665813e-05,
      "loss": 1.3421,
      "step": 585
    },
    {
      "epoch": 2.011154011154011,
      "grad_norm": 2.0548441410064697,
      "learning_rate": 2.5808252084509782e-05,
      "loss": 1.4097,
      "step": 586
    },
    {
      "epoch": 2.0145860145860146,
      "grad_norm": 1.929901361465454,
      "learning_rate": 2.564592552814378e-05,
      "loss": 1.2254,
      "step": 587
    },
    {
      "epoch": 2.018018018018018,
      "grad_norm": 2.4297282695770264,
      "learning_rate": 2.5483934810021397e-05,
      "loss": 1.391,
      "step": 588
    },
    {
      "epoch": 2.0214500214500215,
      "grad_norm": 1.720894694328308,
      "learning_rate": 2.5322282163965095e-05,
      "loss": 1.1722,
      "step": 589
    },
    {
      "epoch": 2.0248820248820247,
      "grad_norm": 2.1252617835998535,
      "learning_rate": 2.5160969819135365e-05,
      "loss": 1.2743,
      "step": 590
    },
    {
      "epoch": 2.0283140283140284,
      "grad_norm": 2.995781898498535,
      "learning_rate": 2.500000000000001e-05,
      "loss": 1.2497,
      "step": 591
    },
    {
      "epoch": 2.0317460317460316,
      "grad_norm": 2.0593655109405518,
      "learning_rate": 2.483937492630345e-05,
      "loss": 1.3154,
      "step": 592
    },
    {
      "epoch": 2.0351780351780353,
      "grad_norm": 2.212315559387207,
      "learning_rate": 2.46790968130362e-05,
      "loss": 1.4474,
      "step": 593
    },
    {
      "epoch": 2.0386100386100385,
      "grad_norm": 2.5200812816619873,
      "learning_rate": 2.4519167870404125e-05,
      "loss": 1.2021,
      "step": 594
    },
    {
      "epoch": 2.042042042042042,
      "grad_norm": 4.33349609375,
      "learning_rate": 2.4359590303798242e-05,
      "loss": 1.4978,
      "step": 595
    },
    {
      "epoch": 2.0454740454740454,
      "grad_norm": 2.3467800617218018,
      "learning_rate": 2.4200366313764e-05,
      "loss": 1.2416,
      "step": 596
    },
    {
      "epoch": 2.048906048906049,
      "grad_norm": 3.3223676681518555,
      "learning_rate": 2.4041498095971253e-05,
      "loss": 1.1576,
      "step": 597
    },
    {
      "epoch": 2.0523380523380523,
      "grad_norm": 2.173525094985962,
      "learning_rate": 2.3882987841183655e-05,
      "loss": 1.002,
      "step": 598
    },
    {
      "epoch": 2.0557700557700556,
      "grad_norm": 2.5221400260925293,
      "learning_rate": 2.372483773522877e-05,
      "loss": 1.1598,
      "step": 599
    },
    {
      "epoch": 2.0592020592020592,
      "grad_norm": 2.604292154312134,
      "learning_rate": 2.356704995896768e-05,
      "loss": 1.3011,
      "step": 600
    },
    {
      "epoch": 2.0626340626340625,
      "grad_norm": 2.4862782955169678,
      "learning_rate": 2.340962668826503e-05,
      "loss": 1.1556,
      "step": 601
    },
    {
      "epoch": 2.066066066066066,
      "grad_norm": 3.2816693782806396,
      "learning_rate": 2.3252570093958996e-05,
      "loss": 1.3003,
      "step": 602
    },
    {
      "epoch": 2.0694980694980694,
      "grad_norm": 2.946709394454956,
      "learning_rate": 2.3095882341831372e-05,
      "loss": 1.3948,
      "step": 603
    },
    {
      "epoch": 2.072930072930073,
      "grad_norm": 2.743342638015747,
      "learning_rate": 2.293956559257766e-05,
      "loss": 1.4356,
      "step": 604
    },
    {
      "epoch": 2.0763620763620763,
      "grad_norm": 2.7446043491363525,
      "learning_rate": 2.2783622001777322e-05,
      "loss": 1.3927,
      "step": 605
    },
    {
      "epoch": 2.07979407979408,
      "grad_norm": 3.4391164779663086,
      "learning_rate": 2.2628053719864017e-05,
      "loss": 1.3702,
      "step": 606
    },
    {
      "epoch": 2.083226083226083,
      "grad_norm": 3.438328266143799,
      "learning_rate": 2.247286289209597e-05,
      "loss": 1.2728,
      "step": 607
    },
    {
      "epoch": 2.086658086658087,
      "grad_norm": 2.148148536682129,
      "learning_rate": 2.2318051658526366e-05,
      "loss": 1.1654,
      "step": 608
    },
    {
      "epoch": 2.09009009009009,
      "grad_norm": 2.971231698989868,
      "learning_rate": 2.216362215397393e-05,
      "loss": 1.531,
      "step": 609
    },
    {
      "epoch": 2.0935220935220937,
      "grad_norm": 2.488325834274292,
      "learning_rate": 2.2009576507993273e-05,
      "loss": 1.3522,
      "step": 610
    },
    {
      "epoch": 2.096954096954097,
      "grad_norm": 2.6689023971557617,
      "learning_rate": 2.1855916844845826e-05,
      "loss": 1.3698,
      "step": 611
    },
    {
      "epoch": 2.1003861003861,
      "grad_norm": 3.472546100616455,
      "learning_rate": 2.1702645283470236e-05,
      "loss": 1.3896,
      "step": 612
    },
    {
      "epoch": 2.103818103818104,
      "grad_norm": 3.190436363220215,
      "learning_rate": 2.1549763937453444e-05,
      "loss": 1.3711,
      "step": 613
    },
    {
      "epoch": 2.107250107250107,
      "grad_norm": 3.068131446838379,
      "learning_rate": 2.1397274915001254e-05,
      "loss": 1.3248,
      "step": 614
    },
    {
      "epoch": 2.1106821106821108,
      "grad_norm": 4.186660289764404,
      "learning_rate": 2.124518031890948e-05,
      "loss": 1.1867,
      "step": 615
    },
    {
      "epoch": 2.114114114114114,
      "grad_norm": 3.130547285079956,
      "learning_rate": 2.1093482246534897e-05,
      "loss": 1.3844,
      "step": 616
    },
    {
      "epoch": 2.1175461175461177,
      "grad_norm": 3.2508246898651123,
      "learning_rate": 2.0942182789766173e-05,
      "loss": 1.3108,
      "step": 617
    },
    {
      "epoch": 2.120978120978121,
      "grad_norm": 2.9003689289093018,
      "learning_rate": 2.0791284034995297e-05,
      "loss": 1.1505,
      "step": 618
    },
    {
      "epoch": 2.1244101244101246,
      "grad_norm": 3.289644479751587,
      "learning_rate": 2.064078806308848e-05,
      "loss": 1.1593,
      "step": 619
    },
    {
      "epoch": 2.127842127842128,
      "grad_norm": 2.749830484390259,
      "learning_rate": 2.0490696949357772e-05,
      "loss": 1.3387,
      "step": 620
    },
    {
      "epoch": 2.1312741312741315,
      "grad_norm": 2.7221736907958984,
      "learning_rate": 2.0341012763532243e-05,
      "loss": 1.1916,
      "step": 621
    },
    {
      "epoch": 2.1347061347061347,
      "grad_norm": 3.048496961593628,
      "learning_rate": 2.019173756972949e-05,
      "loss": 1.2646,
      "step": 622
    },
    {
      "epoch": 2.138138138138138,
      "grad_norm": 2.152845621109009,
      "learning_rate": 2.004287342642721e-05,
      "loss": 1.0518,
      "step": 623
    },
    {
      "epoch": 2.1415701415701416,
      "grad_norm": 3.7284510135650635,
      "learning_rate": 1.989442238643478e-05,
      "loss": 1.401,
      "step": 624
    },
    {
      "epoch": 2.145002145002145,
      "grad_norm": 2.8814234733581543,
      "learning_rate": 1.974638649686495e-05,
      "loss": 1.4121,
      "step": 625
    },
    {
      "epoch": 2.1484341484341485,
      "grad_norm": 3.2562880516052246,
      "learning_rate": 1.9598767799105637e-05,
      "loss": 1.4873,
      "step": 626
    },
    {
      "epoch": 2.1518661518661517,
      "grad_norm": 3.0776944160461426,
      "learning_rate": 1.945156832879174e-05,
      "loss": 1.1613,
      "step": 627
    },
    {
      "epoch": 2.1552981552981554,
      "grad_norm": 4.29723596572876,
      "learning_rate": 1.930479011577711e-05,
      "loss": 1.5044,
      "step": 628
    },
    {
      "epoch": 2.1587301587301586,
      "grad_norm": 3.1823811531066895,
      "learning_rate": 1.91584351841065e-05,
      "loss": 1.2431,
      "step": 629
    },
    {
      "epoch": 2.1621621621621623,
      "grad_norm": 2.8258872032165527,
      "learning_rate": 1.9012505551987765e-05,
      "loss": 1.3046,
      "step": 630
    },
    {
      "epoch": 2.1655941655941655,
      "grad_norm": 2.8293778896331787,
      "learning_rate": 1.8867003231763846e-05,
      "loss": 1.2929,
      "step": 631
    },
    {
      "epoch": 2.169026169026169,
      "grad_norm": 2.8796939849853516,
      "learning_rate": 1.872193022988526e-05,
      "loss": 1.3169,
      "step": 632
    },
    {
      "epoch": 2.1724581724581724,
      "grad_norm": 2.819246292114258,
      "learning_rate": 1.8577288546882167e-05,
      "loss": 1.4859,
      "step": 633
    },
    {
      "epoch": 2.175890175890176,
      "grad_norm": 3.07414174079895,
      "learning_rate": 1.843308017733704e-05,
      "loss": 1.3845,
      "step": 634
    },
    {
      "epoch": 2.1793221793221793,
      "grad_norm": 2.8706564903259277,
      "learning_rate": 1.828930710985694e-05,
      "loss": 1.3006,
      "step": 635
    },
    {
      "epoch": 2.1827541827541825,
      "grad_norm": 2.9581117630004883,
      "learning_rate": 1.8145971327046275e-05,
      "loss": 1.1662,
      "step": 636
    },
    {
      "epoch": 2.186186186186186,
      "grad_norm": 3.104335308074951,
      "learning_rate": 1.8003074805479313e-05,
      "loss": 1.1869,
      "step": 637
    },
    {
      "epoch": 2.1896181896181894,
      "grad_norm": 3.1789534091949463,
      "learning_rate": 1.7860619515673033e-05,
      "loss": 1.2706,
      "step": 638
    },
    {
      "epoch": 2.193050193050193,
      "grad_norm": 5.0912699699401855,
      "learning_rate": 1.771860742205988e-05,
      "loss": 1.3081,
      "step": 639
    },
    {
      "epoch": 2.1964821964821963,
      "grad_norm": 4.382167339324951,
      "learning_rate": 1.757704048296072e-05,
      "loss": 1.313,
      "step": 640
    },
    {
      "epoch": 2.1999141999142,
      "grad_norm": 2.8877246379852295,
      "learning_rate": 1.743592065055781e-05,
      "loss": 1.3977,
      "step": 641
    },
    {
      "epoch": 2.2033462033462032,
      "grad_norm": 5.241063117980957,
      "learning_rate": 1.7295249870867896e-05,
      "loss": 1.3177,
      "step": 642
    },
    {
      "epoch": 2.206778206778207,
      "grad_norm": 4.070691108703613,
      "learning_rate": 1.7155030083715363e-05,
      "loss": 1.1586,
      "step": 643
    },
    {
      "epoch": 2.21021021021021,
      "grad_norm": 2.962562322616577,
      "learning_rate": 1.7015263222705492e-05,
      "loss": 1.154,
      "step": 644
    },
    {
      "epoch": 2.213642213642214,
      "grad_norm": 2.4141042232513428,
      "learning_rate": 1.687595121519778e-05,
      "loss": 1.1507,
      "step": 645
    },
    {
      "epoch": 2.217074217074217,
      "grad_norm": 2.5266990661621094,
      "learning_rate": 1.6737095982279442e-05,
      "loss": 1.2606,
      "step": 646
    },
    {
      "epoch": 2.2205062205062207,
      "grad_norm": 2.891242265701294,
      "learning_rate": 1.659869943873876e-05,
      "loss": 1.5428,
      "step": 647
    },
    {
      "epoch": 2.223938223938224,
      "grad_norm": 3.0429062843322754,
      "learning_rate": 1.646076349303884e-05,
      "loss": 1.2839,
      "step": 648
    },
    {
      "epoch": 2.227370227370227,
      "grad_norm": 2.748051643371582,
      "learning_rate": 1.6323290047291194e-05,
      "loss": 1.2583,
      "step": 649
    },
    {
      "epoch": 2.230802230802231,
      "grad_norm": 2.7809901237487793,
      "learning_rate": 1.6186280997229568e-05,
      "loss": 1.1341,
      "step": 650
    },
    {
      "epoch": 2.234234234234234,
      "grad_norm": 2.8791439533233643,
      "learning_rate": 1.604973823218376e-05,
      "loss": 1.0834,
      "step": 651
    },
    {
      "epoch": 2.2376662376662377,
      "grad_norm": 2.87526798248291,
      "learning_rate": 1.5913663635053576e-05,
      "loss": 1.2611,
      "step": 652
    },
    {
      "epoch": 2.241098241098241,
      "grad_norm": 3.5860228538513184,
      "learning_rate": 1.577805908228293e-05,
      "loss": 1.3473,
      "step": 653
    },
    {
      "epoch": 2.2445302445302446,
      "grad_norm": 2.8543691635131836,
      "learning_rate": 1.56429264438338e-05,
      "loss": 1.2812,
      "step": 654
    },
    {
      "epoch": 2.247962247962248,
      "grad_norm": 2.7079670429229736,
      "learning_rate": 1.5508267583160678e-05,
      "loss": 1.187,
      "step": 655
    },
    {
      "epoch": 2.2513942513942515,
      "grad_norm": 3.1646766662597656,
      "learning_rate": 1.537408435718462e-05,
      "loss": 1.2818,
      "step": 656
    },
    {
      "epoch": 2.2548262548262548,
      "grad_norm": 3.3990767002105713,
      "learning_rate": 1.5240378616267886e-05,
      "loss": 1.2516,
      "step": 657
    },
    {
      "epoch": 2.2582582582582584,
      "grad_norm": 6.715524196624756,
      "learning_rate": 1.510715220418823e-05,
      "loss": 1.3896,
      "step": 658
    },
    {
      "epoch": 2.2616902616902617,
      "grad_norm": 3.031691312789917,
      "learning_rate": 1.4974406958113558e-05,
      "loss": 1.3069,
      "step": 659
    },
    {
      "epoch": 2.2651222651222653,
      "grad_norm": 4.0388898849487305,
      "learning_rate": 1.4842144708576605e-05,
      "loss": 1.3475,
      "step": 660
    },
    {
      "epoch": 2.2685542685542686,
      "grad_norm": 5.848029613494873,
      "learning_rate": 1.4710367279449661e-05,
      "loss": 1.1229,
      "step": 661
    },
    {
      "epoch": 2.271986271986272,
      "grad_norm": 2.5390665531158447,
      "learning_rate": 1.457907648791943e-05,
      "loss": 1.126,
      "step": 662
    },
    {
      "epoch": 2.2754182754182755,
      "grad_norm": 2.9947872161865234,
      "learning_rate": 1.4448274144461965e-05,
      "loss": 1.1699,
      "step": 663
    },
    {
      "epoch": 2.2788502788502787,
      "grad_norm": 3.521867036819458,
      "learning_rate": 1.4317962052817729e-05,
      "loss": 1.4222,
      "step": 664
    },
    {
      "epoch": 2.2822822822822824,
      "grad_norm": 2.886566400527954,
      "learning_rate": 1.4188142009966686e-05,
      "loss": 1.3893,
      "step": 665
    },
    {
      "epoch": 2.2857142857142856,
      "grad_norm": 3.0759239196777344,
      "learning_rate": 1.4058815806103542e-05,
      "loss": 1.2475,
      "step": 666
    },
    {
      "epoch": 2.2891462891462893,
      "grad_norm": 2.843618869781494,
      "learning_rate": 1.3929985224613052e-05,
      "loss": 1.3288,
      "step": 667
    },
    {
      "epoch": 2.2925782925782925,
      "grad_norm": 3.095327854156494,
      "learning_rate": 1.3801652042045415e-05,
      "loss": 1.3092,
      "step": 668
    },
    {
      "epoch": 2.296010296010296,
      "grad_norm": 3.736011505126953,
      "learning_rate": 1.367381802809185e-05,
      "loss": 1.361,
      "step": 669
    },
    {
      "epoch": 2.2994422994422994,
      "grad_norm": 2.8946597576141357,
      "learning_rate": 1.3546484945560029e-05,
      "loss": 1.352,
      "step": 670
    },
    {
      "epoch": 2.302874302874303,
      "grad_norm": 3.0385186672210693,
      "learning_rate": 1.3419654550349987e-05,
      "loss": 1.2009,
      "step": 671
    },
    {
      "epoch": 2.3063063063063063,
      "grad_norm": 2.8469150066375732,
      "learning_rate": 1.3293328591429671e-05,
      "loss": 1.4194,
      "step": 672
    },
    {
      "epoch": 2.30973830973831,
      "grad_norm": 3.0281765460968018,
      "learning_rate": 1.3167508810811058e-05,
      "loss": 1.258,
      "step": 673
    },
    {
      "epoch": 2.313170313170313,
      "grad_norm": 4.175450325012207,
      "learning_rate": 1.3042196943525942e-05,
      "loss": 1.402,
      "step": 674
    },
    {
      "epoch": 2.3166023166023164,
      "grad_norm": 4.884052753448486,
      "learning_rate": 1.2917394717602121e-05,
      "loss": 1.1605,
      "step": 675
    },
    {
      "epoch": 2.32003432003432,
      "grad_norm": 2.7955501079559326,
      "learning_rate": 1.2793103854039517e-05,
      "loss": 1.2486,
      "step": 676
    },
    {
      "epoch": 2.3234663234663233,
      "grad_norm": 3.6322569847106934,
      "learning_rate": 1.2669326066786458e-05,
      "loss": 1.446,
      "step": 677
    },
    {
      "epoch": 2.326898326898327,
      "grad_norm": 3.1716806888580322,
      "learning_rate": 1.2546063062716068e-05,
      "loss": 1.326,
      "step": 678
    },
    {
      "epoch": 2.33033033033033,
      "grad_norm": 3.2151339054107666,
      "learning_rate": 1.2423316541602632e-05,
      "loss": 1.3672,
      "step": 679
    },
    {
      "epoch": 2.333762333762334,
      "grad_norm": 3.2190120220184326,
      "learning_rate": 1.2301088196098331e-05,
      "loss": 1.4021,
      "step": 680
    },
    {
      "epoch": 2.337194337194337,
      "grad_norm": 3.8785288333892822,
      "learning_rate": 1.2179379711709737e-05,
      "loss": 1.2928,
      "step": 681
    },
    {
      "epoch": 2.340626340626341,
      "grad_norm": 2.998224973678589,
      "learning_rate": 1.205819276677464e-05,
      "loss": 1.3873,
      "step": 682
    },
    {
      "epoch": 2.344058344058344,
      "grad_norm": 2.761502504348755,
      "learning_rate": 1.1937529032438904e-05,
      "loss": 1.2385,
      "step": 683
    },
    {
      "epoch": 2.3474903474903477,
      "grad_norm": 4.165109634399414,
      "learning_rate": 1.1817390172633403e-05,
      "loss": 1.3697,
      "step": 684
    },
    {
      "epoch": 2.350922350922351,
      "grad_norm": 4.064977645874023,
      "learning_rate": 1.1697777844051105e-05,
      "loss": 1.3093,
      "step": 685
    },
    {
      "epoch": 2.3543543543543546,
      "grad_norm": 7.382278919219971,
      "learning_rate": 1.1578693696124193e-05,
      "loss": 1.3846,
      "step": 686
    },
    {
      "epoch": 2.357786357786358,
      "grad_norm": 2.9292287826538086,
      "learning_rate": 1.1460139371001338e-05,
      "loss": 1.4431,
      "step": 687
    },
    {
      "epoch": 2.361218361218361,
      "grad_norm": 2.807321548461914,
      "learning_rate": 1.1342116503525058e-05,
      "loss": 1.3175,
      "step": 688
    },
    {
      "epoch": 2.3646503646503647,
      "grad_norm": 2.836714267730713,
      "learning_rate": 1.122462672120914e-05,
      "loss": 1.1857,
      "step": 689
    },
    {
      "epoch": 2.368082368082368,
      "grad_norm": 3.241259813308716,
      "learning_rate": 1.1107671644216305e-05,
      "loss": 1.2465,
      "step": 690
    },
    {
      "epoch": 2.3715143715143716,
      "grad_norm": 4.03470516204834,
      "learning_rate": 1.0991252885335651e-05,
      "loss": 1.3373,
      "step": 691
    },
    {
      "epoch": 2.374946374946375,
      "grad_norm": 2.5123445987701416,
      "learning_rate": 1.0875372049960698e-05,
      "loss": 1.2685,
      "step": 692
    },
    {
      "epoch": 2.3783783783783785,
      "grad_norm": 2.8289108276367188,
      "learning_rate": 1.0760030736066951e-05,
      "loss": 1.165,
      "step": 693
    },
    {
      "epoch": 2.3818103818103817,
      "grad_norm": 2.3550925254821777,
      "learning_rate": 1.0645230534190149e-05,
      "loss": 1.2858,
      "step": 694
    },
    {
      "epoch": 2.3852423852423854,
      "grad_norm": 2.4928946495056152,
      "learning_rate": 1.0530973027404073e-05,
      "loss": 1.2969,
      "step": 695
    },
    {
      "epoch": 2.3886743886743886,
      "grad_norm": 5.102880477905273,
      "learning_rate": 1.0417259791298939e-05,
      "loss": 1.0439,
      "step": 696
    },
    {
      "epoch": 2.392106392106392,
      "grad_norm": 2.656374454498291,
      "learning_rate": 1.0304092393959513e-05,
      "loss": 1.2089,
      "step": 697
    },
    {
      "epoch": 2.3955383955383955,
      "grad_norm": 3.400047779083252,
      "learning_rate": 1.0191472395943553e-05,
      "loss": 1.2017,
      "step": 698
    },
    {
      "epoch": 2.398970398970399,
      "grad_norm": 3.0024609565734863,
      "learning_rate": 1.0079401350260287e-05,
      "loss": 1.4256,
      "step": 699
    },
    {
      "epoch": 2.4024024024024024,
      "grad_norm": 3.190159559249878,
      "learning_rate": 9.967880802348988e-06,
      "loss": 1.2819,
      "step": 700
    },
    {
      "epoch": 2.4058344058344057,
      "grad_norm": 3.003763198852539,
      "learning_rate": 9.856912290057668e-06,
      "loss": 1.2841,
      "step": 701
    },
    {
      "epoch": 2.4092664092664093,
      "grad_norm": 3.1304690837860107,
      "learning_rate": 9.746497343621857e-06,
      "loss": 1.4319,
      "step": 702
    },
    {
      "epoch": 2.4126984126984126,
      "grad_norm": 3.155585527420044,
      "learning_rate": 9.63663748564353e-06,
      "loss": 1.22,
      "step": 703
    },
    {
      "epoch": 2.4161304161304162,
      "grad_norm": 2.915252685546875,
      "learning_rate": 9.527334231070084e-06,
      "loss": 1.3022,
      "step": 704
    },
    {
      "epoch": 2.4195624195624195,
      "grad_norm": 3.370206356048584,
      "learning_rate": 9.41858908717344e-06,
      "loss": 1.1125,
      "step": 705
    },
    {
      "epoch": 2.422994422994423,
      "grad_norm": 2.580293655395508,
      "learning_rate": 9.310403553529334e-06,
      "loss": 1.304,
      "step": 706
    },
    {
      "epoch": 2.4264264264264264,
      "grad_norm": 3.0237159729003906,
      "learning_rate": 9.20277912199648e-06,
      "loss": 1.2766,
      "step": 707
    },
    {
      "epoch": 2.42985842985843,
      "grad_norm": 2.811258316040039,
      "learning_rate": 9.095717276696214e-06,
      "loss": 1.3722,
      "step": 708
    },
    {
      "epoch": 2.4332904332904333,
      "grad_norm": 3.5717251300811768,
      "learning_rate": 8.98921949399179e-06,
      "loss": 1.2132,
      "step": 709
    },
    {
      "epoch": 2.4367224367224365,
      "grad_norm": 3.7978532314300537,
      "learning_rate": 8.883287242468241e-06,
      "loss": 1.3355,
      "step": 710
    },
    {
      "epoch": 2.44015444015444,
      "grad_norm": 4.116402626037598,
      "learning_rate": 8.777921982911996e-06,
      "loss": 1.2279,
      "step": 711
    },
    {
      "epoch": 2.443586443586444,
      "grad_norm": 2.967719316482544,
      "learning_rate": 8.673125168290713e-06,
      "loss": 1.3426,
      "step": 712
    },
    {
      "epoch": 2.447018447018447,
      "grad_norm": 2.9534823894500732,
      "learning_rate": 8.568898243733398e-06,
      "loss": 1.2851,
      "step": 713
    },
    {
      "epoch": 2.4504504504504503,
      "grad_norm": 2.687187671661377,
      "learning_rate": 8.46524264651028e-06,
      "loss": 1.2575,
      "step": 714
    },
    {
      "epoch": 2.453882453882454,
      "grad_norm": 2.4711790084838867,
      "learning_rate": 8.362159806013175e-06,
      "loss": 1.2759,
      "step": 715
    },
    {
      "epoch": 2.457314457314457,
      "grad_norm": 2.731041431427002,
      "learning_rate": 8.259651143735603e-06,
      "loss": 1.2778,
      "step": 716
    },
    {
      "epoch": 2.460746460746461,
      "grad_norm": 3.6461358070373535,
      "learning_rate": 8.157718073253351e-06,
      "loss": 1.2344,
      "step": 717
    },
    {
      "epoch": 2.464178464178464,
      "grad_norm": 3.369678497314453,
      "learning_rate": 8.056362000204847e-06,
      "loss": 1.2356,
      "step": 718
    },
    {
      "epoch": 2.4676104676104678,
      "grad_norm": 3.3698081970214844,
      "learning_rate": 7.955584322271854e-06,
      "loss": 1.3594,
      "step": 719
    },
    {
      "epoch": 2.471042471042471,
      "grad_norm": 3.009044885635376,
      "learning_rate": 7.85538642916015e-06,
      "loss": 1.3815,
      "step": 720
    },
    {
      "epoch": 2.4744744744744747,
      "grad_norm": 3.0374338626861572,
      "learning_rate": 7.755769702580412e-06,
      "loss": 1.4059,
      "step": 721
    },
    {
      "epoch": 2.477906477906478,
      "grad_norm": 2.7099967002868652,
      "learning_rate": 7.656735516229124e-06,
      "loss": 1.3414,
      "step": 722
    },
    {
      "epoch": 2.481338481338481,
      "grad_norm": 2.7878730297088623,
      "learning_rate": 7.558285235769646e-06,
      "loss": 1.1246,
      "step": 723
    },
    {
      "epoch": 2.484770484770485,
      "grad_norm": 3.0087642669677734,
      "learning_rate": 7.4604202188133795e-06,
      "loss": 1.2122,
      "step": 724
    },
    {
      "epoch": 2.488202488202488,
      "grad_norm": 3.3808608055114746,
      "learning_rate": 7.3631418149010535e-06,
      "loss": 1.3018,
      "step": 725
    },
    {
      "epoch": 2.4916344916344917,
      "grad_norm": 3.181326389312744,
      "learning_rate": 7.266451365484106e-06,
      "loss": 1.1705,
      "step": 726
    },
    {
      "epoch": 2.495066495066495,
      "grad_norm": 3.575660228729248,
      "learning_rate": 7.1703502039062176e-06,
      "loss": 1.0817,
      "step": 727
    },
    {
      "epoch": 2.4984984984984986,
      "grad_norm": 3.3245861530303955,
      "learning_rate": 7.074839655384835e-06,
      "loss": 1.285,
      "step": 728
    },
    {
      "epoch": 2.501930501930502,
      "grad_norm": 3.5354886054992676,
      "learning_rate": 6.979921036993042e-06,
      "loss": 1.3695,
      "step": 729
    },
    {
      "epoch": 2.5053625053625055,
      "grad_norm": 4.003744602203369,
      "learning_rate": 6.885595657641214e-06,
      "loss": 1.5217,
      "step": 730
    },
    {
      "epoch": 2.5087945087945087,
      "grad_norm": 3.564354658126831,
      "learning_rate": 6.791864818059179e-06,
      "loss": 1.4477,
      "step": 731
    },
    {
      "epoch": 2.5122265122265124,
      "grad_norm": 4.075306415557861,
      "learning_rate": 6.698729810778065e-06,
      "loss": 1.5774,
      "step": 732
    },
    {
      "epoch": 2.5156585156585156,
      "grad_norm": 5.144626140594482,
      "learning_rate": 6.6061919201126646e-06,
      "loss": 1.3629,
      "step": 733
    },
    {
      "epoch": 2.5190905190905193,
      "grad_norm": 2.9630720615386963,
      "learning_rate": 6.514252422143591e-06,
      "loss": 1.3411,
      "step": 734
    },
    {
      "epoch": 2.5225225225225225,
      "grad_norm": 3.6379408836364746,
      "learning_rate": 6.422912584699753e-06,
      "loss": 1.3256,
      "step": 735
    },
    {
      "epoch": 2.5259545259545257,
      "grad_norm": 3.056509017944336,
      "learning_rate": 6.3321736673408405e-06,
      "loss": 1.4162,
      "step": 736
    },
    {
      "epoch": 2.5293865293865294,
      "grad_norm": 4.265598297119141,
      "learning_rate": 6.242036921339972e-06,
      "loss": 1.3278,
      "step": 737
    },
    {
      "epoch": 2.532818532818533,
      "grad_norm": 3.210228204727173,
      "learning_rate": 6.152503589666425e-06,
      "loss": 1.2709,
      "step": 738
    },
    {
      "epoch": 2.5362505362505363,
      "grad_norm": 3.3316073417663574,
      "learning_rate": 6.063574906968511e-06,
      "loss": 1.3902,
      "step": 739
    },
    {
      "epoch": 2.5396825396825395,
      "grad_norm": 4.0203447341918945,
      "learning_rate": 5.975252099556544e-06,
      "loss": 1.2993,
      "step": 740
    },
    {
      "epoch": 2.543114543114543,
      "grad_norm": 2.9398443698883057,
      "learning_rate": 5.887536385385917e-06,
      "loss": 1.1633,
      "step": 741
    },
    {
      "epoch": 2.5465465465465464,
      "grad_norm": 3.654632329940796,
      "learning_rate": 5.800428974040312e-06,
      "loss": 1.1823,
      "step": 742
    },
    {
      "epoch": 2.54997854997855,
      "grad_norm": 2.750408887863159,
      "learning_rate": 5.713931066715078e-06,
      "loss": 1.1513,
      "step": 743
    },
    {
      "epoch": 2.5534105534105533,
      "grad_norm": 2.7185122966766357,
      "learning_rate": 5.6280438562005435e-06,
      "loss": 1.0285,
      "step": 744
    },
    {
      "epoch": 2.5568425568425566,
      "grad_norm": 2.861016035079956,
      "learning_rate": 5.542768526865677e-06,
      "loss": 1.2086,
      "step": 745
    },
    {
      "epoch": 2.5602745602745602,
      "grad_norm": 2.569660186767578,
      "learning_rate": 5.458106254641715e-06,
      "loss": 1.1639,
      "step": 746
    },
    {
      "epoch": 2.563706563706564,
      "grad_norm": 2.9734063148498535,
      "learning_rate": 5.374058207005944e-06,
      "loss": 1.0664,
      "step": 747
    },
    {
      "epoch": 2.567138567138567,
      "grad_norm": 2.9306201934814453,
      "learning_rate": 5.29062554296561e-06,
      "loss": 1.206,
      "step": 748
    },
    {
      "epoch": 2.5705705705705704,
      "grad_norm": 2.9184021949768066,
      "learning_rate": 5.207809413041914e-06,
      "loss": 1.3262,
      "step": 749
    },
    {
      "epoch": 2.574002574002574,
      "grad_norm": 3.02815580368042,
      "learning_rate": 5.1256109592542125e-06,
      "loss": 1.2908,
      "step": 750
    },
    {
      "epoch": 2.5774345774345777,
      "grad_norm": 3.1658856868743896,
      "learning_rate": 5.0440313151041364e-06,
      "loss": 1.2763,
      "step": 751
    },
    {
      "epoch": 2.580866580866581,
      "grad_norm": 4.110093593597412,
      "learning_rate": 4.963071605560143e-06,
      "loss": 1.4741,
      "step": 752
    },
    {
      "epoch": 2.584298584298584,
      "grad_norm": 2.803586006164551,
      "learning_rate": 4.882732947041818e-06,
      "loss": 1.2432,
      "step": 753
    },
    {
      "epoch": 2.587730587730588,
      "grad_norm": 3.824662208557129,
      "learning_rate": 4.803016447404629e-06,
      "loss": 1.3018,
      "step": 754
    },
    {
      "epoch": 2.591162591162591,
      "grad_norm": 3.5892648696899414,
      "learning_rate": 4.723923205924558e-06,
      "loss": 1.2249,
      "step": 755
    },
    {
      "epoch": 2.5945945945945947,
      "grad_norm": 3.437448263168335,
      "learning_rate": 4.645454313282965e-06,
      "loss": 1.3503,
      "step": 756
    },
    {
      "epoch": 2.598026598026598,
      "grad_norm": 3.3272268772125244,
      "learning_rate": 4.567610851551568e-06,
      "loss": 1.2922,
      "step": 757
    },
    {
      "epoch": 2.601458601458601,
      "grad_norm": 2.6399238109588623,
      "learning_rate": 4.490393894177508e-06,
      "loss": 0.9922,
      "step": 758
    },
    {
      "epoch": 2.604890604890605,
      "grad_norm": 2.8182778358459473,
      "learning_rate": 4.413804505968533e-06,
      "loss": 1.1946,
      "step": 759
    },
    {
      "epoch": 2.6083226083226085,
      "grad_norm": 3.0050463676452637,
      "learning_rate": 4.3378437430783295e-06,
      "loss": 1.1966,
      "step": 760
    },
    {
      "epoch": 2.6117546117546118,
      "grad_norm": 4.887689590454102,
      "learning_rate": 4.262512652991968e-06,
      "loss": 1.2408,
      "step": 761
    },
    {
      "epoch": 2.615186615186615,
      "grad_norm": 3.9970507621765137,
      "learning_rate": 4.187812274511427e-06,
      "loss": 1.379,
      "step": 762
    },
    {
      "epoch": 2.6186186186186187,
      "grad_norm": 2.826645612716675,
      "learning_rate": 4.113743637741296e-06,
      "loss": 1.2603,
      "step": 763
    },
    {
      "epoch": 2.622050622050622,
      "grad_norm": 2.8498330116271973,
      "learning_rate": 4.040307764074585e-06,
      "loss": 1.1531,
      "step": 764
    },
    {
      "epoch": 2.6254826254826256,
      "grad_norm": 3.5629312992095947,
      "learning_rate": 3.967505666178556e-06,
      "loss": 1.3222,
      "step": 765
    },
    {
      "epoch": 2.628914628914629,
      "grad_norm": 2.39949107170105,
      "learning_rate": 3.895338347980898e-06,
      "loss": 1.0607,
      "step": 766
    },
    {
      "epoch": 2.6323466323466325,
      "grad_norm": 3.2179932594299316,
      "learning_rate": 3.823806804655727e-06,
      "loss": 1.2585,
      "step": 767
    },
    {
      "epoch": 2.6357786357786357,
      "grad_norm": 3.3043341636657715,
      "learning_rate": 3.7529120226100055e-06,
      "loss": 1.207,
      "step": 768
    },
    {
      "epoch": 2.6392106392106394,
      "grad_norm": 3.073673725128174,
      "learning_rate": 3.6826549794698074e-06,
      "loss": 1.314,
      "step": 769
    },
    {
      "epoch": 2.6426426426426426,
      "grad_norm": 3.022268056869507,
      "learning_rate": 3.6130366440669693e-06,
      "loss": 1.2568,
      "step": 770
    },
    {
      "epoch": 2.646074646074646,
      "grad_norm": 2.6287262439727783,
      "learning_rate": 3.544057976425619e-06,
      "loss": 1.1299,
      "step": 771
    },
    {
      "epoch": 2.6495066495066495,
      "grad_norm": 3.087599277496338,
      "learning_rate": 3.4757199277490105e-06,
      "loss": 1.2085,
      "step": 772
    },
    {
      "epoch": 2.652938652938653,
      "grad_norm": 3.120027780532837,
      "learning_rate": 3.408023440406355e-06,
      "loss": 1.3024,
      "step": 773
    },
    {
      "epoch": 2.6563706563706564,
      "grad_norm": 3.5091335773468018,
      "learning_rate": 3.340969447919873e-06,
      "loss": 1.29,
      "step": 774
    },
    {
      "epoch": 2.6598026598026596,
      "grad_norm": 4.269645690917969,
      "learning_rate": 3.2745588749518775e-06,
      "loss": 1.3988,
      "step": 775
    },
    {
      "epoch": 2.6632346632346633,
      "grad_norm": 2.774585485458374,
      "learning_rate": 3.2087926372920573e-06,
      "loss": 1.2309,
      "step": 776
    },
    {
      "epoch": 2.6666666666666665,
      "grad_norm": 4.910566806793213,
      "learning_rate": 3.1436716418448307e-06,
      "loss": 1.1772,
      "step": 777
    },
    {
      "epoch": 2.67009867009867,
      "grad_norm": 3.6239495277404785,
      "learning_rate": 3.079196786616839e-06,
      "loss": 1.093,
      "step": 778
    },
    {
      "epoch": 2.6735306735306734,
      "grad_norm": 3.0379135608673096,
      "learning_rate": 3.0153689607045845e-06,
      "loss": 1.3953,
      "step": 779
    },
    {
      "epoch": 2.676962676962677,
      "grad_norm": 4.87697172164917,
      "learning_rate": 2.9521890442821276e-06,
      "loss": 1.2358,
      "step": 780
    },
    {
      "epoch": 2.6803946803946803,
      "grad_norm": 3.411156415939331,
      "learning_rate": 2.889657908589e-06,
      "loss": 1.3234,
      "step": 781
    },
    {
      "epoch": 2.683826683826684,
      "grad_norm": 3.2104408740997314,
      "learning_rate": 2.8277764159181485e-06,
      "loss": 1.1761,
      "step": 782
    },
    {
      "epoch": 2.687258687258687,
      "grad_norm": 3.2257437705993652,
      "learning_rate": 2.7665454196040664e-06,
      "loss": 1.1844,
      "step": 783
    },
    {
      "epoch": 2.6906906906906904,
      "grad_norm": 4.737374305725098,
      "learning_rate": 2.7059657640110202e-06,
      "loss": 1.2336,
      "step": 784
    },
    {
      "epoch": 2.694122694122694,
      "grad_norm": 2.824472427368164,
      "learning_rate": 2.646038284521413e-06,
      "loss": 1.2472,
      "step": 785
    },
    {
      "epoch": 2.697554697554698,
      "grad_norm": 4.501304626464844,
      "learning_rate": 2.5867638075242453e-06,
      "loss": 1.2955,
      "step": 786
    },
    {
      "epoch": 2.700986700986701,
      "grad_norm": 4.873347282409668,
      "learning_rate": 2.5281431504037556e-06,
      "loss": 1.223,
      "step": 787
    },
    {
      "epoch": 2.7044187044187042,
      "grad_norm": 2.677095651626587,
      "learning_rate": 2.470177121528089e-06,
      "loss": 1.1641,
      "step": 788
    },
    {
      "epoch": 2.707850707850708,
      "grad_norm": 4.811108112335205,
      "learning_rate": 2.4128665202382326e-06,
      "loss": 1.3602,
      "step": 789
    },
    {
      "epoch": 2.711282711282711,
      "grad_norm": 3.600935220718384,
      "learning_rate": 2.356212136836894e-06,
      "loss": 1.4152,
      "step": 790
    },
    {
      "epoch": 2.714714714714715,
      "grad_norm": 3.014711380004883,
      "learning_rate": 2.300214752577712e-06,
      "loss": 1.1803,
      "step": 791
    },
    {
      "epoch": 2.718146718146718,
      "grad_norm": 3.0696933269500732,
      "learning_rate": 2.2448751396543787e-06,
      "loss": 1.295,
      "step": 792
    },
    {
      "epoch": 2.7215787215787217,
      "grad_norm": 2.7728734016418457,
      "learning_rate": 2.1901940611900706e-06,
      "loss": 1.2718,
      "step": 793
    },
    {
      "epoch": 2.725010725010725,
      "grad_norm": 3.3822007179260254,
      "learning_rate": 2.1361722712268772e-06,
      "loss": 1.3446,
      "step": 794
    },
    {
      "epoch": 2.7284427284427286,
      "grad_norm": 2.466613531112671,
      "learning_rate": 2.0828105147154273e-06,
      "loss": 1.2006,
      "step": 795
    },
    {
      "epoch": 2.731874731874732,
      "grad_norm": 2.8500888347625732,
      "learning_rate": 2.0301095275046146e-06,
      "loss": 1.3212,
      "step": 796
    },
    {
      "epoch": 2.735306735306735,
      "grad_norm": 3.3068315982818604,
      "learning_rate": 1.9780700363314253e-06,
      "loss": 1.4559,
      "step": 797
    },
    {
      "epoch": 2.7387387387387387,
      "grad_norm": 3.623490810394287,
      "learning_rate": 1.926692758810955e-06,
      "loss": 1.4994,
      "step": 798
    },
    {
      "epoch": 2.7421707421707424,
      "grad_norm": 3.230107307434082,
      "learning_rate": 1.8759784034264928e-06,
      "loss": 1.2245,
      "step": 799
    },
    {
      "epoch": 2.7456027456027456,
      "grad_norm": 3.3349721431732178,
      "learning_rate": 1.825927669519728e-06,
      "loss": 1.3577,
      "step": 800
    },
    {
      "epoch": 2.749034749034749,
      "grad_norm": 3.2465734481811523,
      "learning_rate": 1.7765412472811771e-06,
      "loss": 1.2676,
      "step": 801
    },
    {
      "epoch": 2.7524667524667525,
      "grad_norm": 5.96055793762207,
      "learning_rate": 1.7278198177405613e-06,
      "loss": 1.0722,
      "step": 802
    },
    {
      "epoch": 2.7558987558987558,
      "grad_norm": 3.664353132247925,
      "learning_rate": 1.679764052757532e-06,
      "loss": 1.2865,
      "step": 803
    },
    {
      "epoch": 2.7593307593307594,
      "grad_norm": 2.6456642150878906,
      "learning_rate": 1.6323746150122997e-06,
      "loss": 1.263,
      "step": 804
    },
    {
      "epoch": 2.7627627627627627,
      "grad_norm": 4.099923610687256,
      "learning_rate": 1.5856521579965867e-06,
      "loss": 1.1137,
      "step": 805
    },
    {
      "epoch": 2.7661947661947663,
      "grad_norm": 3.102048635482788,
      "learning_rate": 1.539597326004527e-06,
      "loss": 1.2785,
      "step": 806
    },
    {
      "epoch": 2.7696267696267696,
      "grad_norm": 3.9695444107055664,
      "learning_rate": 1.4942107541238704e-06,
      "loss": 1.2638,
      "step": 807
    },
    {
      "epoch": 2.7730587730587732,
      "grad_norm": 3.309433937072754,
      "learning_rate": 1.449493068227159e-06,
      "loss": 1.4894,
      "step": 808
    },
    {
      "epoch": 2.7764907764907765,
      "grad_norm": 3.5339407920837402,
      "learning_rate": 1.4054448849631085e-06,
      "loss": 1.364,
      "step": 809
    },
    {
      "epoch": 2.7799227799227797,
      "grad_norm": 3.0705151557922363,
      "learning_rate": 1.3620668117481472e-06,
      "loss": 1.1483,
      "step": 810
    },
    {
      "epoch": 2.7833547833547834,
      "grad_norm": 3.194613218307495,
      "learning_rate": 1.3193594467579728e-06,
      "loss": 1.33,
      "step": 811
    },
    {
      "epoch": 2.786786786786787,
      "grad_norm": 3.251204252243042,
      "learning_rate": 1.2773233789193818e-06,
      "loss": 1.439,
      "step": 812
    },
    {
      "epoch": 2.7902187902187903,
      "grad_norm": 4.036497592926025,
      "learning_rate": 1.2359591879020526e-06,
      "loss": 1.3824,
      "step": 813
    },
    {
      "epoch": 2.7936507936507935,
      "grad_norm": 3.26621150970459,
      "learning_rate": 1.1952674441106482e-06,
      "loss": 1.3796,
      "step": 814
    },
    {
      "epoch": 2.797082797082797,
      "grad_norm": 3.6302595138549805,
      "learning_rate": 1.1552487086768872e-06,
      "loss": 1.3436,
      "step": 815
    },
    {
      "epoch": 2.8005148005148004,
      "grad_norm": 4.530152320861816,
      "learning_rate": 1.1159035334518343e-06,
      "loss": 1.3127,
      "step": 816
    },
    {
      "epoch": 2.803946803946804,
      "grad_norm": 4.543561935424805,
      "learning_rate": 1.0772324609982788e-06,
      "loss": 1.2526,
      "step": 817
    },
    {
      "epoch": 2.8073788073788073,
      "grad_norm": 3.667804002761841,
      "learning_rate": 1.03923602458324e-06,
      "loss": 1.0849,
      "step": 818
    },
    {
      "epoch": 2.810810810810811,
      "grad_norm": 3.2232139110565186,
      "learning_rate": 1.0019147481706625e-06,
      "loss": 1.3299,
      "step": 819
    },
    {
      "epoch": 2.814242814242814,
      "grad_norm": 3.08416748046875,
      "learning_rate": 9.652691464141273e-07,
      "loss": 1.4974,
      "step": 820
    },
    {
      "epoch": 2.817674817674818,
      "grad_norm": 4.025609016418457,
      "learning_rate": 9.292997246497958e-07,
      "loss": 1.513,
      "step": 821
    },
    {
      "epoch": 2.821106821106821,
      "grad_norm": 4.267979621887207,
      "learning_rate": 8.94006978889439e-07,
      "loss": 1.432,
      "step": 822
    },
    {
      "epoch": 2.8245388245388243,
      "grad_norm": 3.4838855266571045,
      "learning_rate": 8.593913958135691e-07,
      "loss": 1.345,
      "step": 823
    },
    {
      "epoch": 2.827970827970828,
      "grad_norm": 3.1995439529418945,
      "learning_rate": 8.25453452764785e-07,
      "loss": 1.3791,
      "step": 824
    },
    {
      "epoch": 2.8314028314028317,
      "grad_norm": 3.760732889175415,
      "learning_rate": 7.921936177411049e-07,
      "loss": 1.4453,
      "step": 825
    },
    {
      "epoch": 2.834834834834835,
      "grad_norm": 2.7276768684387207,
      "learning_rate": 7.596123493895991e-07,
      "loss": 1.2556,
      "step": 826
    },
    {
      "epoch": 2.838266838266838,
      "grad_norm": 2.9255459308624268,
      "learning_rate": 7.277100970000062e-07,
      "loss": 1.3407,
      "step": 827
    },
    {
      "epoch": 2.841698841698842,
      "grad_norm": 5.771472930908203,
      "learning_rate": 6.964873004985717e-07,
      "loss": 1.3197,
      "step": 828
    },
    {
      "epoch": 2.845130845130845,
      "grad_norm": 3.6182003021240234,
      "learning_rate": 6.659443904419637e-07,
      "loss": 1.326,
      "step": 829
    },
    {
      "epoch": 2.8485628485628487,
      "grad_norm": 2.9368889331817627,
      "learning_rate": 6.360817880113334e-07,
      "loss": 1.1357,
      "step": 830
    },
    {
      "epoch": 2.851994851994852,
      "grad_norm": 3.724241256713867,
      "learning_rate": 6.06899905006525e-07,
      "loss": 1.377,
      "step": 831
    },
    {
      "epoch": 2.8554268554268556,
      "grad_norm": 3.823715925216675,
      "learning_rate": 5.783991438403801e-07,
      "loss": 1.276,
      "step": 832
    },
    {
      "epoch": 2.858858858858859,
      "grad_norm": 2.9134669303894043,
      "learning_rate": 5.505798975331933e-07,
      "loss": 1.2894,
      "step": 833
    },
    {
      "epoch": 2.8622908622908625,
      "grad_norm": 3.522534132003784,
      "learning_rate": 5.234425497072981e-07,
      "loss": 1.2788,
      "step": 834
    },
    {
      "epoch": 2.8657228657228657,
      "grad_norm": 2.7879645824432373,
      "learning_rate": 4.969874745817671e-07,
      "loss": 1.307,
      "step": 835
    },
    {
      "epoch": 2.869154869154869,
      "grad_norm": 4.184776306152344,
      "learning_rate": 4.712150369672652e-07,
      "loss": 1.1761,
      "step": 836
    },
    {
      "epoch": 2.8725868725868726,
      "grad_norm": 2.7760372161865234,
      "learning_rate": 4.461255922609986e-07,
      "loss": 1.3293,
      "step": 837
    },
    {
      "epoch": 2.8760188760188763,
      "grad_norm": 3.5623717308044434,
      "learning_rate": 4.2171948644182947e-07,
      "loss": 1.4707,
      "step": 838
    },
    {
      "epoch": 2.8794508794508795,
      "grad_norm": 3.0642480850219727,
      "learning_rate": 3.979970560655133e-07,
      "loss": 1.2685,
      "step": 839
    },
    {
      "epoch": 2.8828828828828827,
      "grad_norm": 2.954220771789551,
      "learning_rate": 3.749586282600359e-07,
      "loss": 1.2099,
      "step": 840
    },
    {
      "epoch": 2.8863148863148864,
      "grad_norm": 3.7304904460906982,
      "learning_rate": 3.5260452072110594e-07,
      "loss": 1.326,
      "step": 841
    },
    {
      "epoch": 2.8897468897468896,
      "grad_norm": 3.531951665878296,
      "learning_rate": 3.3093504170779723e-07,
      "loss": 0.9974,
      "step": 842
    },
    {
      "epoch": 2.8931788931788933,
      "grad_norm": 4.878880023956299,
      "learning_rate": 3.0995049003826325e-07,
      "loss": 1.454,
      "step": 843
    },
    {
      "epoch": 2.8966108966108965,
      "grad_norm": 3.02441668510437,
      "learning_rate": 2.896511550856462e-07,
      "loss": 1.1144,
      "step": 844
    },
    {
      "epoch": 2.9000429000428998,
      "grad_norm": 2.9913268089294434,
      "learning_rate": 2.700373167740744e-07,
      "loss": 1.2097,
      "step": 845
    },
    {
      "epoch": 2.9034749034749034,
      "grad_norm": 3.811717987060547,
      "learning_rate": 2.511092455747932e-07,
      "loss": 1.1527,
      "step": 846
    },
    {
      "epoch": 2.906906906906907,
      "grad_norm": 3.3261005878448486,
      "learning_rate": 2.3286720250246252e-07,
      "loss": 1.3899,
      "step": 847
    },
    {
      "epoch": 2.9103389103389103,
      "grad_norm": 2.7068874835968018,
      "learning_rate": 2.153114391115152e-07,
      "loss": 1.2403,
      "step": 848
    },
    {
      "epoch": 2.9137709137709136,
      "grad_norm": 2.892756700515747,
      "learning_rate": 1.984421974927375e-07,
      "loss": 1.4297,
      "step": 849
    },
    {
      "epoch": 2.9172029172029172,
      "grad_norm": 2.8190383911132812,
      "learning_rate": 1.8225971026987753e-07,
      "loss": 1.2455,
      "step": 850
    },
    {
      "epoch": 2.9206349206349205,
      "grad_norm": 3.116333246231079,
      "learning_rate": 1.6676420059649755e-07,
      "loss": 1.3036,
      "step": 851
    },
    {
      "epoch": 2.924066924066924,
      "grad_norm": 2.756434917449951,
      "learning_rate": 1.5195588215283773e-07,
      "loss": 1.2246,
      "step": 852
    },
    {
      "epoch": 2.9274989274989274,
      "grad_norm": 3.1864383220672607,
      "learning_rate": 1.3783495914291844e-07,
      "loss": 1.2823,
      "step": 853
    },
    {
      "epoch": 2.930930930930931,
      "grad_norm": 2.7921688556671143,
      "learning_rate": 1.244016262916814e-07,
      "loss": 1.15,
      "step": 854
    },
    {
      "epoch": 2.9343629343629343,
      "grad_norm": 3.142005205154419,
      "learning_rate": 1.1165606884234181e-07,
      "loss": 1.1334,
      "step": 855
    },
    {
      "epoch": 2.937794937794938,
      "grad_norm": 3.192143678665161,
      "learning_rate": 9.959846255381266e-08,
      "loss": 1.3889,
      "step": 856
    },
    {
      "epoch": 2.941226941226941,
      "grad_norm": 3.0913584232330322,
      "learning_rate": 8.822897369827333e-08,
      "loss": 1.242,
      "step": 857
    },
    {
      "epoch": 2.9446589446589444,
      "grad_norm": 2.7667527198791504,
      "learning_rate": 7.754775905891576e-08,
      "loss": 1.1711,
      "step": 858
    },
    {
      "epoch": 2.948090948090948,
      "grad_norm": 4.110960960388184,
      "learning_rate": 6.755496592773525e-08,
      "loss": 1.3835,
      "step": 859
    },
    {
      "epoch": 2.9515229515229517,
      "grad_norm": 2.846388816833496,
      "learning_rate": 5.825073210352083e-08,
      "loss": 1.2665,
      "step": 860
    },
    {
      "epoch": 2.954954954954955,
      "grad_norm": 2.8710858821868896,
      "learning_rate": 4.963518588996796e-08,
      "loss": 1.279,
      "step": 861
    },
    {
      "epoch": 2.958386958386958,
      "grad_norm": 3.4410057067871094,
      "learning_rate": 4.170844609387992e-08,
      "loss": 1.1493,
      "step": 862
    },
    {
      "epoch": 2.961818961818962,
      "grad_norm": 2.6700844764709473,
      "learning_rate": 3.4470622023557995e-08,
      "loss": 1.1581,
      "step": 863
    },
    {
      "epoch": 2.965250965250965,
      "grad_norm": 2.315128803253174,
      "learning_rate": 2.792181348726941e-08,
      "loss": 1.0219,
      "step": 864
    },
    {
      "epoch": 2.9686829686829688,
      "grad_norm": 2.6725008487701416,
      "learning_rate": 2.2062110791892798e-08,
      "loss": 1.2894,
      "step": 865
    },
    {
      "epoch": 2.972114972114972,
      "grad_norm": 3.2444818019866943,
      "learning_rate": 1.6891594741663686e-08,
      "loss": 1.3299,
      "step": 866
    },
    {
      "epoch": 2.9755469755469757,
      "grad_norm": 2.912459135055542,
      "learning_rate": 1.2410336637047605e-08,
      "loss": 1.3672,
      "step": 867
    },
    {
      "epoch": 2.978978978978979,
      "grad_norm": 2.785473585128784,
      "learning_rate": 8.618398273779749e-09,
      "loss": 1.3057,
      "step": 868
    },
    {
      "epoch": 2.9824109824109826,
      "grad_norm": 3.459935188293457,
      "learning_rate": 5.515831941993455e-09,
      "loss": 1.4323,
      "step": 869
    },
    {
      "epoch": 2.985842985842986,
      "grad_norm": 3.0918006896972656,
      "learning_rate": 3.102680425520754e-09,
      "loss": 1.1898,
      "step": 870
    },
    {
      "epoch": 2.989274989274989,
      "grad_norm": 3.6506688594818115,
      "learning_rate": 1.3789770012762049e-09,
      "loss": 1.0177,
      "step": 871
    },
    {
      "epoch": 2.9927069927069927,
      "grad_norm": 2.7286884784698486,
      "learning_rate": 3.447454388127991e-10,
      "loss": 1.1826,
      "step": 872
    },
    {
      "epoch": 2.9961389961389964,
      "grad_norm": 3.356982707977295,
      "learning_rate": 0.0,
      "loss": 1.0512,
      "step": 873
    },
    {
      "epoch": 2.9961389961389964,
      "step": 873,
      "total_flos": 1.1144584904199635e+19,
      "train_loss": 1.571273456870622,
      "train_runtime": 47894.2658,
      "train_samples_per_second": 2.336,
      "train_steps_per_second": 0.018
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 873,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.1144584904199635e+19,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
