{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.9460647223332,
  "eval_steps": 500,
  "global_step": 920,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03196164602477027,
      "grad_norm": 1.3855066299438477,
      "learning_rate": 2.884615384615385e-06,
      "loss": 1.3809,
      "step": 10
    },
    {
      "epoch": 0.06392329204954054,
      "grad_norm": 1.6167359352111816,
      "learning_rate": 6.08974358974359e-06,
      "loss": 1.3462,
      "step": 20
    },
    {
      "epoch": 0.09588493807431082,
      "grad_norm": 1.0972557067871094,
      "learning_rate": 9.294871794871795e-06,
      "loss": 1.2911,
      "step": 30
    },
    {
      "epoch": 0.1278465840990811,
      "grad_norm": 1.7273517847061157,
      "learning_rate": 1.25e-05,
      "loss": 1.3023,
      "step": 40
    },
    {
      "epoch": 0.15980823012385137,
      "grad_norm": 1.50972318649292,
      "learning_rate": 1.5705128205128205e-05,
      "loss": 1.2844,
      "step": 50
    },
    {
      "epoch": 0.19176987614862165,
      "grad_norm": 1.1636816263198853,
      "learning_rate": 1.891025641025641e-05,
      "loss": 1.2171,
      "step": 60
    },
    {
      "epoch": 0.22373152217339193,
      "grad_norm": 0.901265025138855,
      "learning_rate": 2.2115384615384616e-05,
      "loss": 1.0927,
      "step": 70
    },
    {
      "epoch": 0.2556931681981622,
      "grad_norm": 0.7921156287193298,
      "learning_rate": 2.5320512820512822e-05,
      "loss": 1.0002,
      "step": 80
    },
    {
      "epoch": 0.2876548142229325,
      "grad_norm": 0.60505610704422,
      "learning_rate": 2.8525641025641025e-05,
      "loss": 0.949,
      "step": 90
    },
    {
      "epoch": 0.31961646024770274,
      "grad_norm": 0.601495623588562,
      "learning_rate": 3.1730769230769234e-05,
      "loss": 0.9258,
      "step": 100
    },
    {
      "epoch": 0.35157810627247305,
      "grad_norm": 0.3815161883831024,
      "learning_rate": 3.4935897435897436e-05,
      "loss": 0.8656,
      "step": 110
    },
    {
      "epoch": 0.3835397522972433,
      "grad_norm": 0.45404112339019775,
      "learning_rate": 3.814102564102564e-05,
      "loss": 0.8682,
      "step": 120
    },
    {
      "epoch": 0.4155013983220136,
      "grad_norm": 0.3541334271430969,
      "learning_rate": 4.134615384615385e-05,
      "loss": 0.8363,
      "step": 130
    },
    {
      "epoch": 0.44746304434678386,
      "grad_norm": 0.3834032714366913,
      "learning_rate": 4.455128205128206e-05,
      "loss": 0.8281,
      "step": 140
    },
    {
      "epoch": 0.4794246903715541,
      "grad_norm": 0.521278977394104,
      "learning_rate": 4.775641025641026e-05,
      "loss": 0.8156,
      "step": 150
    },
    {
      "epoch": 0.5113863363963244,
      "grad_norm": 0.5595067739486694,
      "learning_rate": 5.096153846153846e-05,
      "loss": 0.8033,
      "step": 160
    },
    {
      "epoch": 0.5433479824210947,
      "grad_norm": 0.5253210067749023,
      "learning_rate": 5.4166666666666664e-05,
      "loss": 0.8193,
      "step": 170
    },
    {
      "epoch": 0.575309628445865,
      "grad_norm": 0.5089788436889648,
      "learning_rate": 5.737179487179487e-05,
      "loss": 0.7859,
      "step": 180
    },
    {
      "epoch": 0.6072712744706352,
      "grad_norm": 0.43394535779953003,
      "learning_rate": 6.0576923076923076e-05,
      "loss": 0.7891,
      "step": 190
    },
    {
      "epoch": 0.6392329204954055,
      "grad_norm": 0.48105573654174805,
      "learning_rate": 6.378205128205128e-05,
      "loss": 0.8022,
      "step": 200
    },
    {
      "epoch": 0.6711945665201758,
      "grad_norm": 0.45582297444343567,
      "learning_rate": 6.698717948717949e-05,
      "loss": 0.7854,
      "step": 210
    },
    {
      "epoch": 0.7031562125449461,
      "grad_norm": 0.5584686398506165,
      "learning_rate": 7.019230769230769e-05,
      "loss": 0.773,
      "step": 220
    },
    {
      "epoch": 0.7351178585697163,
      "grad_norm": 0.4524954557418823,
      "learning_rate": 7.339743589743589e-05,
      "loss": 0.7763,
      "step": 230
    },
    {
      "epoch": 0.7670795045944866,
      "grad_norm": 0.5703016519546509,
      "learning_rate": 7.660256410256411e-05,
      "loss": 0.7518,
      "step": 240
    },
    {
      "epoch": 0.7990411506192568,
      "grad_norm": 0.4602644145488739,
      "learning_rate": 7.980769230769231e-05,
      "loss": 0.7908,
      "step": 250
    },
    {
      "epoch": 0.8310027966440272,
      "grad_norm": 0.41008132696151733,
      "learning_rate": 8.301282051282053e-05,
      "loss": 0.7835,
      "step": 260
    },
    {
      "epoch": 0.8629644426687975,
      "grad_norm": 0.4589976668357849,
      "learning_rate": 8.621794871794873e-05,
      "loss": 0.7757,
      "step": 270
    },
    {
      "epoch": 0.8949260886935677,
      "grad_norm": 0.5390843749046326,
      "learning_rate": 8.942307692307693e-05,
      "loss": 0.7882,
      "step": 280
    },
    {
      "epoch": 0.926887734718338,
      "grad_norm": 0.43230140209198,
      "learning_rate": 9.262820512820513e-05,
      "loss": 0.7498,
      "step": 290
    },
    {
      "epoch": 0.9588493807431082,
      "grad_norm": 0.6846857666969299,
      "learning_rate": 9.583333333333334e-05,
      "loss": 0.7651,
      "step": 300
    },
    {
      "epoch": 0.9908110267678786,
      "grad_norm": 0.3025312125682831,
      "learning_rate": 9.903846153846155e-05,
      "loss": 0.7784,
      "step": 310
    },
    {
      "epoch": 1.0255693168198161,
      "grad_norm": 0.3889737129211426,
      "learning_rate": 9.999846665667335e-05,
      "loss": 0.8472,
      "step": 320
    },
    {
      "epoch": 1.0575309628445866,
      "grad_norm": 0.5407306551933289,
      "learning_rate": 9.99909566300429e-05,
      "loss": 0.776,
      "step": 330
    },
    {
      "epoch": 1.0894926088693568,
      "grad_norm": 0.5456501245498657,
      "learning_rate": 9.997718922447667e-05,
      "loss": 0.7717,
      "step": 340
    },
    {
      "epoch": 1.121454254894127,
      "grad_norm": 0.438216894865036,
      "learning_rate": 9.995716616324383e-05,
      "loss": 0.7895,
      "step": 350
    },
    {
      "epoch": 1.1534159009188973,
      "grad_norm": 0.36179059743881226,
      "learning_rate": 9.993088995263531e-05,
      "loss": 0.7351,
      "step": 360
    },
    {
      "epoch": 1.1853775469436676,
      "grad_norm": 0.4258705973625183,
      "learning_rate": 9.989836388165012e-05,
      "loss": 0.7864,
      "step": 370
    },
    {
      "epoch": 1.2173391929684378,
      "grad_norm": 0.3823302984237671,
      "learning_rate": 9.985959202158365e-05,
      "loss": 0.7386,
      "step": 380
    },
    {
      "epoch": 1.249300838993208,
      "grad_norm": 0.45831844210624695,
      "learning_rate": 9.981457922551807e-05,
      "loss": 0.7817,
      "step": 390
    },
    {
      "epoch": 1.2812624850179783,
      "grad_norm": 0.44038277864456177,
      "learning_rate": 9.976333112771487e-05,
      "loss": 0.7798,
      "step": 400
    },
    {
      "epoch": 1.3132241310427486,
      "grad_norm": 0.34715718030929565,
      "learning_rate": 9.970585414290965e-05,
      "loss": 0.7306,
      "step": 410
    },
    {
      "epoch": 1.345185777067519,
      "grad_norm": 0.365780234336853,
      "learning_rate": 9.964215546550908e-05,
      "loss": 0.766,
      "step": 420
    },
    {
      "epoch": 1.3771474230922893,
      "grad_norm": 0.37662550806999207,
      "learning_rate": 9.957224306869053e-05,
      "loss": 0.7548,
      "step": 430
    },
    {
      "epoch": 1.4091090691170596,
      "grad_norm": 0.4111752212047577,
      "learning_rate": 9.949612570340391e-05,
      "loss": 0.7593,
      "step": 440
    },
    {
      "epoch": 1.4410707151418298,
      "grad_norm": 0.405527800321579,
      "learning_rate": 9.941381289727646e-05,
      "loss": 0.7678,
      "step": 450
    },
    {
      "epoch": 1.4730323611666,
      "grad_norm": 0.3408769369125366,
      "learning_rate": 9.932531495342004e-05,
      "loss": 0.7723,
      "step": 460
    },
    {
      "epoch": 1.5049940071913703,
      "grad_norm": 0.4166918992996216,
      "learning_rate": 9.923064294914156e-05,
      "loss": 0.7664,
      "step": 470
    },
    {
      "epoch": 1.5369556532161406,
      "grad_norm": 0.39542579650878906,
      "learning_rate": 9.912980873455643e-05,
      "loss": 0.7361,
      "step": 480
    },
    {
      "epoch": 1.568917299240911,
      "grad_norm": 0.32997310161590576,
      "learning_rate": 9.902282493110529e-05,
      "loss": 0.7539,
      "step": 490
    },
    {
      "epoch": 1.600878945265681,
      "grad_norm": 0.36467525362968445,
      "learning_rate": 9.89097049299741e-05,
      "loss": 0.7771,
      "step": 500
    },
    {
      "epoch": 1.6328405912904516,
      "grad_norm": 0.42533642053604126,
      "learning_rate": 9.879046289041809e-05,
      "loss": 0.7628,
      "step": 510
    },
    {
      "epoch": 1.6648022373152216,
      "grad_norm": 0.46707338094711304,
      "learning_rate": 9.86651137379893e-05,
      "loss": 0.7421,
      "step": 520
    },
    {
      "epoch": 1.696763883339992,
      "grad_norm": 0.47216886281967163,
      "learning_rate": 9.853367316266847e-05,
      "loss": 0.7686,
      "step": 530
    },
    {
      "epoch": 1.7287255293647623,
      "grad_norm": 0.4544522762298584,
      "learning_rate": 9.839615761690107e-05,
      "loss": 0.771,
      "step": 540
    },
    {
      "epoch": 1.7606871753895326,
      "grad_norm": 0.47851043939590454,
      "learning_rate": 9.82525843135379e-05,
      "loss": 0.7462,
      "step": 550
    },
    {
      "epoch": 1.7926488214143028,
      "grad_norm": 0.4728396236896515,
      "learning_rate": 9.810297122368067e-05,
      "loss": 0.7637,
      "step": 560
    },
    {
      "epoch": 1.824610467439073,
      "grad_norm": 0.3352217376232147,
      "learning_rate": 9.794733707443241e-05,
      "loss": 0.7509,
      "step": 570
    },
    {
      "epoch": 1.8565721134638435,
      "grad_norm": 0.3662264943122864,
      "learning_rate": 9.778570134655352e-05,
      "loss": 0.7913,
      "step": 580
    },
    {
      "epoch": 1.8885337594886136,
      "grad_norm": 0.3283475339412689,
      "learning_rate": 9.761808427202328e-05,
      "loss": 0.7654,
      "step": 590
    },
    {
      "epoch": 1.920495405513384,
      "grad_norm": 0.4011043906211853,
      "learning_rate": 9.744450683150742e-05,
      "loss": 0.7083,
      "step": 600
    },
    {
      "epoch": 1.952457051538154,
      "grad_norm": 0.3212358057498932,
      "learning_rate": 9.726499075173201e-05,
      "loss": 0.7747,
      "step": 610
    },
    {
      "epoch": 1.9844186975629245,
      "grad_norm": 0.35136324167251587,
      "learning_rate": 9.707955850276389e-05,
      "loss": 0.7551,
      "step": 620
    },
    {
      "epoch": 2.019176987614862,
      "grad_norm": 0.40859776735305786,
      "learning_rate": 9.688823329519805e-05,
      "loss": 0.8376,
      "step": 630
    },
    {
      "epoch": 2.0511386336396322,
      "grad_norm": 0.36119282245635986,
      "learning_rate": 9.669103907725243e-05,
      "loss": 0.7647,
      "step": 640
    },
    {
      "epoch": 2.0831002796644027,
      "grad_norm": 0.37478360533714294,
      "learning_rate": 9.648800053177027e-05,
      "loss": 0.7762,
      "step": 650
    },
    {
      "epoch": 2.115061925689173,
      "grad_norm": 0.3641397953033447,
      "learning_rate": 9.627914307313052e-05,
      "loss": 0.7425,
      "step": 660
    },
    {
      "epoch": 2.147023571713943,
      "grad_norm": 0.4734686315059662,
      "learning_rate": 9.606449284406685e-05,
      "loss": 0.7398,
      "step": 670
    },
    {
      "epoch": 2.1789852177387137,
      "grad_norm": 0.3648460805416107,
      "learning_rate": 9.584407671239516e-05,
      "loss": 0.7564,
      "step": 680
    },
    {
      "epoch": 2.2109468637634837,
      "grad_norm": 0.5508327484130859,
      "learning_rate": 9.561792226765072e-05,
      "loss": 0.7463,
      "step": 690
    },
    {
      "epoch": 2.242908509788254,
      "grad_norm": 0.412383496761322,
      "learning_rate": 9.538605781763463e-05,
      "loss": 0.7699,
      "step": 700
    },
    {
      "epoch": 2.274870155813024,
      "grad_norm": 0.5046977996826172,
      "learning_rate": 9.514851238487066e-05,
      "loss": 0.7412,
      "step": 710
    },
    {
      "epoch": 2.3068318018377947,
      "grad_norm": 0.4059431552886963,
      "learning_rate": 9.490531570297239e-05,
      "loss": 0.7717,
      "step": 720
    },
    {
      "epoch": 2.3387934478625647,
      "grad_norm": 0.39455029368400574,
      "learning_rate": 9.465649821292149e-05,
      "loss": 0.7631,
      "step": 730
    },
    {
      "epoch": 2.370755093887335,
      "grad_norm": 0.3986467719078064,
      "learning_rate": 9.440209105925739e-05,
      "loss": 0.7738,
      "step": 740
    },
    {
      "epoch": 2.4027167399121057,
      "grad_norm": 0.49419450759887695,
      "learning_rate": 9.414212608617898e-05,
      "loss": 0.7588,
      "step": 750
    },
    {
      "epoch": 2.4346783859368757,
      "grad_norm": 0.428225576877594,
      "learning_rate": 9.387663583355861e-05,
      "loss": 0.7421,
      "step": 760
    },
    {
      "epoch": 2.466640031961646,
      "grad_norm": 0.4350247383117676,
      "learning_rate": 9.360565353286903e-05,
      "loss": 0.7413,
      "step": 770
    },
    {
      "epoch": 2.498601677986416,
      "grad_norm": 0.3852648437023163,
      "learning_rate": 9.332921310302389e-05,
      "loss": 0.7589,
      "step": 780
    },
    {
      "epoch": 2.5305633240111867,
      "grad_norm": 0.3545990288257599,
      "learning_rate": 9.3047349146132e-05,
      "loss": 0.7576,
      "step": 790
    },
    {
      "epoch": 2.5625249700359567,
      "grad_norm": 0.3656037151813507,
      "learning_rate": 9.276009694316632e-05,
      "loss": 0.7306,
      "step": 800
    },
    {
      "epoch": 2.594486616060727,
      "grad_norm": 0.3778512477874756,
      "learning_rate": 9.246749244954767e-05,
      "loss": 0.7548,
      "step": 810
    },
    {
      "epoch": 2.626448262085497,
      "grad_norm": 0.4106166958808899,
      "learning_rate": 9.21695722906443e-05,
      "loss": 0.7567,
      "step": 820
    },
    {
      "epoch": 2.6584099081102677,
      "grad_norm": 0.3946329355239868,
      "learning_rate": 9.186637375718738e-05,
      "loss": 0.7552,
      "step": 830
    },
    {
      "epoch": 2.690371554135038,
      "grad_norm": 0.38399869203567505,
      "learning_rate": 9.155793480060348e-05,
      "loss": 0.7256,
      "step": 840
    },
    {
      "epoch": 2.722333200159808,
      "grad_norm": 0.4251652657985687,
      "learning_rate": 9.124429402826397e-05,
      "loss": 0.7539,
      "step": 850
    },
    {
      "epoch": 2.7542948461845786,
      "grad_norm": 0.3407718539237976,
      "learning_rate": 9.092549069865268e-05,
      "loss": 0.7256,
      "step": 860
    },
    {
      "epoch": 2.7862564922093487,
      "grad_norm": 0.3791249096393585,
      "learning_rate": 9.060156471645187e-05,
      "loss": 0.6958,
      "step": 870
    },
    {
      "epoch": 2.818218138234119,
      "grad_norm": 0.326463520526886,
      "learning_rate": 9.02725566275473e-05,
      "loss": 0.7187,
      "step": 880
    },
    {
      "epoch": 2.850179784258889,
      "grad_norm": 0.40324896574020386,
      "learning_rate": 8.993850761395318e-05,
      "loss": 0.7401,
      "step": 890
    },
    {
      "epoch": 2.8821414302836597,
      "grad_norm": 0.4228034019470215,
      "learning_rate": 8.959945948865734e-05,
      "loss": 0.752,
      "step": 900
    },
    {
      "epoch": 2.9141030763084297,
      "grad_norm": 0.5019176602363586,
      "learning_rate": 8.92554546903875e-05,
      "loss": 0.7148,
      "step": 910
    },
    {
      "epoch": 2.9460647223332,
      "grad_norm": 0.4311498701572418,
      "learning_rate": 8.890653627829924e-05,
      "loss": 0.7049,
      "step": 920
    }
  ],
  "logging_steps": 10,
  "max_steps": 3120,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 10,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5.136218436041441e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
