{
    "name": "c4_original-d=512_l=8_h=4-32.0",
    "dataset_name": "c4_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf1",
    "hyperparameters": {
        "model": "training/open_lm_configs/d=512_l=8_h=4.json",
        "tokens": 50504990720,
        "warmup": 400,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 512,
        "acc": 8,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 78914048,
        "params_no_embed": 53092864,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp"
        ],
        "chinchilla_multiplier": 32.0
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--train-num-samples",
        "10100998144",
        "--workers",
        "2",
        "--dataset-manifest",
        "<scrub>/original_c4/manifest.jsonl",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "512",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "400",
        "--model",
        "training/open_lm_configs/d=512_l=8_h=4.json",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--data-key",
        "txt",
        "--accum-freq",
        "8",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--name",
        "c4_original-d=512_l=8_h=4-32.0",
        "--logs",
        "/admin/<scrub>/scrub_logs",
        "--val-data",
        "/admin/<scrub>/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/admin/<scrub>/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-batch-size",
        "8",
        "--val-data-key",
        "json",
        "txt",
        "--val-num-samples",
        "245760",
        "--fsdp",
        "--fsdp-amp",
        "--report-to",
        "wandb",
        "--wandb-project-name",
        "scrub",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/scrub_experiments_v3"
    ],
    "results": [
        {
            "loss": 4.304671643177668,
            "data_time": 0.03193015232682228,
            "batch_time": 0.3288675881922245,
            "samples_per_second": 1736294.3731379933,
            "samples_per_second_per_gpu": 217036.79664224916,
            "loss_sequences_lower_95": 4.124465624491374,
            "loss_sequences_upper_95": 4.487782936096192,
            "loss_tokens_lower_95": 4.28895617167155,
            "loss_tokens_upper_95": 4.320034980773926,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.304514085873326,
            "data_time": 0.0014795106800713797,
            "batch_time": 0.015359816336815204,
            "samples_per_second": 2241034.995910177,
            "samples_per_second_per_gpu": 280129.3744887721,
            "loss_sequences_lower_95": 3.301653061727197,
            "loss_sequences_upper_95": 3.307291547179967,
            "loss_tokens_lower_95": 3.2938505625000003,
            "loss_tokens_upper_95": 3.3152458333333334,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.7063526815297654,
            "data_time": 0.009898286819458008,
            "batch_time": 0.023521021842956544,
            "samples_per_second": 2207102.30303833,
            "samples_per_second_per_gpu": 275887.7878797912,
            "loss_sequences_lower_95": 3.6708981136399874,
            "loss_sequences_upper_95": 3.7487848647759887,
            "loss_tokens_lower_95": 3.6911856354166668,
            "loss_tokens_upper_95": 3.72179928125,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.409325382389973,
            "data_time": 0.0016265868356353358,
            "batch_time": 0.015089805581067739,
            "samples_per_second": 2318159.3141884306,
            "samples_per_second_per_gpu": 289769.9142735538,
            "loss_sequences_lower_95": 3.382255305653995,
            "loss_sequences_upper_95": 3.4369399061694588,
            "loss_tokens_lower_95": 3.3977236770833334,
            "loss_tokens_upper_95": 3.4205919322916665,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.3565808012626324,
            "data_time": 0.01012171407144858,
            "batch_time": 0.023868126698224194,
            "samples_per_second": 2185089.472649993,
            "samples_per_second_per_gpu": 273136.1840812491,
            "loss_sequences_lower_95": 3.309510514206896,
            "loss_sequences_upper_95": 3.411514313303526,
            "loss_tokens_lower_95": 3.345596859375,
            "loss_tokens_upper_95": 3.3671875989583335,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.8668377138496983,
            "data_time": 0.003790406429249307,
            "batch_time": 0.0174312601270883,
            "samples_per_second": 2281619.7290228973,
            "samples_per_second_per_gpu": 285202.46612786216,
            "loss_sequences_lower_95": 3.8217126655708276,
            "loss_sequences_upper_95": 3.91379358847882,
            "loss_tokens_lower_95": 3.854285927083333,
            "loss_tokens_upper_95": 3.8792793229166667,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.8074933076391413,
            "data_time": 0.0016453866079619813,
            "batch_time": 0.015552004826593944,
            "samples_per_second": 2253684.2228990677,
            "samples_per_second_per_gpu": 281710.52786238346,
            "loss_sequences_lower_95": 3.7683463309151786,
            "loss_sequences_upper_95": 3.8463645368303574,
            "loss_tokens_lower_95": 3.7916989895833333,
            "loss_tokens_upper_95": 3.823524041666667,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.217007354966009,
            "data_time": 0.001703840277275746,
            "batch_time": 0.015103866544441916,
            "samples_per_second": 2328389.5603724574,
            "samples_per_second_per_gpu": 291048.6950465572,
            "loss_sequences_lower_95": 4.201773069371727,
            "loss_sequences_upper_95": 4.233419696907722,
            "loss_tokens_lower_95": 4.205062479166667,
            "loss_tokens_upper_95": 4.228916979166667,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.766179751089918,
            "data_time": 0.01190903451707628,
            "batch_time": 0.02554089209390065,
            "samples_per_second": 2217438.44456675,
            "samples_per_second_per_gpu": 277179.80557084375,
            "loss_sequences_lower_95": 3.7022784969670983,
            "loss_sequences_upper_95": 3.839416950504954,
            "loss_tokens_lower_95": 3.7544641354166663,
            "loss_tokens_upper_95": 3.7778613645833334,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.99283816691915,
            "data_time": 0.009895694442093372,
            "batch_time": 0.02389077376574278,
            "samples_per_second": 2169764.743932225,
            "samples_per_second_per_gpu": 271220.5929915281,
            "loss_sequences_lower_95": 4.929454118932189,
            "loss_sequences_upper_95": 5.070395247267169,
            "loss_tokens_lower_95": 4.97909325,
            "loss_tokens_upper_95": 5.00686059375,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.7232096566718718,
            "data_time": 0.00125999782600936,
            "batch_time": 0.014606385924908259,
            "samples_per_second": 2343633.087992217,
            "samples_per_second_per_gpu": 292954.1359990271,
            "loss_sequences_lower_95": 3.713659716482373,
            "loss_sequences_upper_95": 3.733097218088341,
            "loss_tokens_lower_95": 3.7115207708333333,
            "loss_tokens_upper_95": 3.73488275,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.5369171500454244,
            "data_time": 0.0025380598714607743,
            "batch_time": 0.01601677750866181,
            "samples_per_second": 2312739.2805708684,
            "samples_per_second_per_gpu": 289092.41007135855,
            "loss_sequences_lower_95": 3.519401034887807,
            "loss_sequences_upper_95": 3.5553068911716474,
            "loss_tokens_lower_95": 3.525122479166667,
            "loss_tokens_upper_95": 3.5487189270833333,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.273609857771992,
            "data_time": 0.009810688938547972,
            "batch_time": 0.023782111910492065,
            "samples_per_second": 2161384.6823623152,
            "samples_per_second_per_gpu": 270173.0852952894,
            "loss_sequences_lower_95": 4.210843359771172,
            "loss_sequences_upper_95": 4.346550548004088,
            "loss_tokens_lower_95": 4.259712041666667,
            "loss_tokens_upper_95": 4.287272072916666,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.375501871594586,
            "data_time": 0.010491736856589754,
            "batch_time": 0.02429530060148809,
            "samples_per_second": 2189119.4918347513,
            "samples_per_second_per_gpu": 273639.9364793439,
            "loss_sequences_lower_95": 3.300640458924707,
            "loss_sequences_upper_95": 3.4556226928462324,
            "loss_tokens_lower_95": 3.3633190729166667,
            "loss_tokens_upper_95": 3.387635484375,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.116435408592224,
            "data_time": 0.0813097528048924,
            "batch_time": 0.09860325711114067,
            "samples_per_second": 999238.953651186,
            "samples_per_second_per_gpu": 124904.86920639825,
            "loss_sequences_lower_95": 6.027278952165084,
            "loss_sequences_upper_95": 6.19984996102073,
            "loss_tokens_lower_95": 6.0883808829567645,
            "loss_tokens_upper_95": 6.1447676225142045,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.1228428595962745,
            "data_time": 0.014160085808147083,
            "batch_time": 0.028175596486438404,
            "samples_per_second": 2110829.8150493996,
            "samples_per_second_per_gpu": 263853.72688117495,
            "loss_sequences_lower_95": 4.00158200277879,
            "loss_sequences_upper_95": 4.249652339835209,
            "loss_tokens_lower_95": 4.108766520833334,
            "loss_tokens_upper_95": 4.13686621875,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.063055397023626,
            "data_time": 0.012815475463867188,
            "batch_time": 0.026780424018700916,
            "samples_per_second": 2162811.363575304,
            "samples_per_second_per_gpu": 270351.420446913,
            "loss_sequences_lower_95": 5.990919546967761,
            "loss_sequences_upper_95": 6.142645295880401,
            "loss_tokens_lower_95": 6.050159833333333,
            "loss_tokens_upper_95": 6.075936760416666,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.5486540559862485,
            "data_time": 0.037130206823349,
            "batch_time": 0.05165502429008484,
            "samples_per_second": 1913010.3993228804,
            "samples_per_second_per_gpu": 239126.29991536005,
            "loss_sequences_lower_95": 4.4416541928150615,
            "loss_sequences_upper_95": 4.700695713230821,
            "loss_tokens_lower_95": 4.533707077776799,
            "loss_tokens_upper_95": 4.563741577648726,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.2634322042564365,
            "data_time": 0.0019528571185327066,
            "batch_time": 0.015575994086156945,
            "samples_per_second": 2269730.439391751,
            "samples_per_second_per_gpu": 283716.30492396886,
            "loss_sequences_lower_95": 5.242795121888799,
            "loss_sequences_upper_95": 5.284513994289453,
            "loss_tokens_lower_95": 5.242307508278735,
            "loss_tokens_upper_95": 5.284346110663545,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.032975790225707,
            "data_time": 0.002236836370389173,
            "batch_time": 0.01587573922933287,
            "samples_per_second": 2261454.019630522,
            "samples_per_second_per_gpu": 282681.75245381525,
            "loss_sequences_lower_95": 3.04093635831072,
            "loss_sequences_upper_95": 3.0666924405776985,
            "loss_tokens_lower_95": 3.008395884925964,
            "loss_tokens_upper_95": 3.0272063685199373,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.4401164111392255,
            "data_time": 0.003064265237676996,
            "batch_time": 0.01678109685842099,
            "samples_per_second": 2250770.7642036113,
            "samples_per_second_per_gpu": 281346.3455254514,
            "loss_sequences_lower_95": 4.675171867157977,
            "loss_sequences_upper_95": 4.963576928860858,
            "loss_tokens_lower_95": 3.9276173480499463,
            "loss_tokens_upper_95": 4.141320332510572,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.656849354952573,
            "data_time": 0.003933580314859431,
            "batch_time": 0.017857480714929864,
            "samples_per_second": 2199841.7643885775,
            "samples_per_second_per_gpu": 274980.2205485722,
            "loss_sequences_lower_95": 4.779796256510417,
            "loss_sequences_upper_95": 4.988195808919271,
            "loss_tokens_lower_95": 4.356683249803459,
            "loss_tokens_upper_95": 4.5030242482311325,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.3413615872048354,
            "data_time": 0.00458064769727612,
            "batch_time": 0.018468477067904235,
            "samples_per_second": 2212106.6297157784,
            "samples_per_second_per_gpu": 276513.3287144723,
            "loss_sequences_lower_95": 3.3772147201063776,
            "loss_sequences_upper_95": 3.442347699932477,
            "loss_tokens_lower_95": 3.250405251538654,
            "loss_tokens_upper_95": 3.283086887815394,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.8415483030405913,
            "data_time": 0.02314694651535579,
            "batch_time": 0.038014584353991916,
            "samples_per_second": 2010151.9125711306,
            "samples_per_second_per_gpu": 251268.98907139132,
            "loss_sequences_lower_95": 3.72577561118386,
            "loss_sequences_upper_95": 4.012703677090731,
            "loss_tokens_lower_95": 3.7437408120654037,
            "loss_tokens_upper_95": 3.8213461623249017,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.796043128383403,
            "data_time": 0.020147982984781265,
            "batch_time": 0.03423963859677315,
            "samples_per_second": 2022714.7560904245,
            "samples_per_second_per_gpu": 252839.34451130306,
            "loss_sequences_lower_95": 3.7901746477399554,
            "loss_sequences_upper_95": 4.006700246382733,
            "loss_tokens_lower_95": 3.6674436410375764,
            "loss_tokens_upper_95": 3.768994444767688,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.3769817248980205,
            "data_time": 0.017266953602815285,
            "batch_time": 0.03150899746479132,
            "samples_per_second": 2010889.1655217486,
            "samples_per_second_per_gpu": 251361.14569021857,
            "loss_sequences_lower_95": 4.348150472005208,
            "loss_sequences_upper_95": 4.4711309305826825,
            "loss_tokens_lower_95": 4.216086588742854,
            "loss_tokens_upper_95": 4.438155045291254,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.800472305159911,
            "data_time": 0.001762838277299624,
            "batch_time": 0.015420461895310753,
            "samples_per_second": 2264054.0773997842,
            "samples_per_second_per_gpu": 283006.75967497303,
            "loss_sequences_lower_95": 5.808242137520915,
            "loss_sequences_upper_95": 5.889769675228212,
            "loss_tokens_lower_95": 5.661374720408882,
            "loss_tokens_upper_95": 5.745713222382818,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.51162493886169,
            "data_time": 0.002910823830022108,
            "batch_time": 0.016827132877887497,
            "samples_per_second": 2215172.768070095,
            "samples_per_second_per_gpu": 276896.5960087619,
            "loss_sequences_lower_95": 5.050302781641283,
            "loss_sequences_upper_95": 5.361105860443629,
            "loss_tokens_lower_95": 3.778983893814572,
            "loss_tokens_upper_95": 3.917184816892314,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.168460723788258,
            "data_time": 0.005199213688438003,
            "batch_time": 0.018866922807049106,
            "samples_per_second": 2222426.2302376344,
            "samples_per_second_per_gpu": 277803.2787797043,
            "loss_sequences_lower_95": 4.584622536096149,
            "loss_sequences_upper_95": 4.941819789222483,
            "loss_tokens_lower_95": 3.7600205664251574,
            "loss_tokens_upper_95": 3.926242493798403,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.086460448835538,
            "data_time": 0.023438721895217896,
            "batch_time": 0.03793318143912724,
            "samples_per_second": 1988401.244753865,
            "samples_per_second_per_gpu": 248550.15559423313,
            "loss_sequences_lower_95": 5.9830454926512555,
            "loss_sequences_upper_95": 6.187581408078267,
            "loss_tokens_lower_95": 5.986448537051405,
            "loss_tokens_upper_95": 6.187279729973779,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.4288441920280457,
            "data_time": 0.0484315432035006,
            "batch_time": 0.06342673760194045,
            "samples_per_second": 1717140.5744002564,
            "samples_per_second_per_gpu": 214642.57180003205,
            "loss_sequences_lower_95": 3.284000343322754,
            "loss_sequences_upper_95": 3.6595716247558596,
            "loss_tokens_lower_95": 3.1219708328383553,
            "loss_tokens_upper_95": 3.5739053961629303,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.068424780390767,
            "data_time": 0.0033745368321736655,
            "batch_time": 0.016965582570659114,
            "samples_per_second": 2269663.5742794136,
            "samples_per_second_per_gpu": 283707.9467849267,
            "loss_sequences_lower_95": 5.010525784448568,
            "loss_sequences_upper_95": 5.126417889785376,
            "loss_tokens_lower_95": 5.008356981072479,
            "loss_tokens_upper_95": 5.127646564339177,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.361408616752531,
            "data_time": 0.00502589592925876,
            "batch_time": 0.018768267763769452,
            "samples_per_second": 2232869.374077667,
            "samples_per_second_per_gpu": 279108.6717597084,
            "loss_sequences_lower_95": 5.305833251353271,
            "loss_sequences_upper_95": 5.415837068424959,
            "loss_tokens_lower_95": 5.303725184035243,
            "loss_tokens_upper_95": 5.4173399229307435,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.4298068039175993,
            "data_time": 0.003629450406678037,
            "batch_time": 0.017255550841790812,
            "samples_per_second": 2248140.2219522335,
            "samples_per_second_per_gpu": 281017.5277440292,
            "loss_sequences_lower_95": 3.583788484691665,
            "loss_sequences_upper_95": 3.70917052226435,
            "loss_tokens_lower_95": 3.240683512102931,
            "loss_tokens_upper_95": 3.295073748325991,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.515821394443512,
            "data_time": 0.011039407923817635,
            "batch_time": 0.02471169363707304,
            "samples_per_second": 2171087.644972396,
            "samples_per_second_per_gpu": 271385.9556215495,
            "loss_sequences_lower_95": 5.723662463378906,
            "loss_sequences_upper_95": 6.3104655639648435,
            "loss_tokens_lower_95": 4.875141047358434,
            "loss_tokens_upper_95": 5.243007261527445,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.832756921648979,
            "data_time": 0.14708876609802246,
            "batch_time": 0.16474053263664246,
            "samples_per_second": 842489.3261283376,
            "samples_per_second_per_gpu": 105311.1657660422,
            "loss_sequences_lower_95": 3.614209181070328,
            "loss_sequences_upper_95": 4.0965500831604,
            "loss_tokens_lower_95": 3.4167156175635327,
            "loss_tokens_upper_95": 4.138601903805787,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.13859171702944,
            "data_time": 0.026537428510949968,
            "batch_time": 0.041088002793332364,
            "samples_per_second": 1860341.0386648832,
            "samples_per_second_per_gpu": 232542.6298331104,
            "loss_sequences_lower_95": 5.605639534435053,
            "loss_sequences_upper_95": 6.441660072063577,
            "loss_tokens_lower_95": 3.6625529774146526,
            "loss_tokens_upper_95": 4.124413966164917,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 2.784447277461977,
            "data_time": 0.0030171190285020405,
            "batch_time": 0.01683002420597606,
            "samples_per_second": 2227276.2139292383,
            "samples_per_second_per_gpu": 278409.5267411548,
            "loss_sequences_lower_95": 2.755390990837634,
            "loss_sequences_upper_95": 2.8132651819418277,
            "loss_tokens_lower_95": 2.7549373308400913,
            "loss_tokens_upper_95": 2.813681538233126,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.20024260139076,
            "data_time": 0.0028619602478702354,
            "batch_time": 0.016556256771272683,
            "samples_per_second": 2256629.1569762602,
            "samples_per_second_per_gpu": 282078.64462203253,
            "loss_sequences_lower_95": 3.169491892806678,
            "loss_sequences_upper_95": 3.333622849781074,
            "loss_tokens_lower_95": 3.0279777145802953,
            "loss_tokens_upper_95": 3.1869788581161136,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.3186019355560834,
            "data_time": 0.01842453247971005,
            "batch_time": 0.03247590528594123,
            "samples_per_second": 1984194.3048366206,
            "samples_per_second_per_gpu": 248024.28810457757,
            "loss_sequences_lower_95": 3.1680457034827154,
            "loss_sequences_upper_95": 3.5667192843370823,
            "loss_tokens_lower_95": 3.053522327010219,
            "loss_tokens_upper_95": 3.353586749871719,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.6965142996553935,
            "data_time": 0.004858515784144401,
            "batch_time": 0.01859484761953354,
            "samples_per_second": 2213016.3782587177,
            "samples_per_second_per_gpu": 276627.0472823397,
            "loss_sequences_lower_95": 3.7249988920216306,
            "loss_sequences_upper_95": 3.8726966863931778,
            "loss_tokens_lower_95": 3.5472884836868595,
            "loss_tokens_upper_95": 3.6908265914570726,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 2.8765589340430933,
            "data_time": 0.03219746975671677,
            "batch_time": 0.046586524872552784,
            "samples_per_second": 1912823.3000276561,
            "samples_per_second_per_gpu": 239102.91250345702,
            "loss_sequences_lower_95": 2.735240936279297,
            "loss_sequences_upper_95": 3.1655573868170017,
            "loss_tokens_lower_95": 2.6167370943999795,
            "loss_tokens_upper_95": 2.962808655681268,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.690694062393602,
            "data_time": 0.0022663670066257404,
            "batch_time": 0.0159498076414138,
            "samples_per_second": 2258405.4822508716,
            "samples_per_second_per_gpu": 282300.68528135895,
            "loss_sequences_lower_95": 4.674538774942488,
            "loss_sequences_upper_95": 4.706556174516153,
            "loss_tokens_lower_95": 4.674720022129426,
            "loss_tokens_upper_95": 4.706879793927535,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 0.9863325224339383,
            "data_time": 0.04666137261824174,
            "batch_time": 0.061048117550936615,
            "samples_per_second": 1789609.6330225554,
            "samples_per_second_per_gpu": 223701.20412781942,
            "loss_sequences_lower_95": 0.9346157573959203,
            "loss_sequences_upper_95": 1.0762791328059818,
            "loss_tokens_lower_95": 0.838621497569089,
            "loss_tokens_upper_95": 1.0428170710771574,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.881493080499537,
            "data_time": 0.0016217273843458291,
            "batch_time": 0.015234601105448586,
            "samples_per_second": 2272791.9678905774,
            "samples_per_second_per_gpu": 284098.9959863222,
            "loss_sequences_lower_95": 5.2706112933208855,
            "loss_sequences_upper_95": 5.317195013594078,
            "loss_tokens_lower_95": 4.280881032398453,
            "loss_tokens_upper_95": 4.330995877659575,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.568293792247772,
            "data_time": 0.005722060090019589,
            "batch_time": 0.019566068100550817,
            "samples_per_second": 2213237.332116491,
            "samples_per_second_per_gpu": 276654.6665145614,
            "loss_sequences_lower_95": 6.555139038085938,
            "loss_sequences_upper_95": 6.860970434570312,
            "loss_tokens_lower_95": 6.280992264537413,
            "loss_tokens_upper_95": 6.567293445324585,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.315523088496664,
            "data_time": 0.022451453289743198,
            "batch_time": 0.03674524921481892,
            "samples_per_second": 2003284.7447832578,
            "samples_per_second_per_gpu": 250410.59309790723,
            "loss_sequences_lower_95": 5.1492540309740145,
            "loss_sequences_upper_95": 5.483882990298064,
            "loss_tokens_lower_95": 5.147226841138757,
            "loss_tokens_upper_95": 5.480975461213485,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.233725030133218,
            "data_time": 0.0044775400535169855,
            "batch_time": 0.018324801002640324,
            "samples_per_second": 2221199.049030926,
            "samples_per_second_per_gpu": 277649.88112886576,
            "loss_sequences_lower_95": 6.188101519960346,
            "loss_sequences_upper_95": 6.278023626154119,
            "loss_tokens_lower_95": 6.189226037227747,
            "loss_tokens_upper_95": 6.277672304095644,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 1.1028207450707754,
            "data_time": 0.004368778238905237,
            "batch_time": 0.01786336524689451,
            "samples_per_second": 2271057.9564802684,
            "samples_per_second_per_gpu": 283882.24456003355,
            "loss_sequences_lower_95": 1.1319565185546874,
            "loss_sequences_upper_95": 1.1767840270996093,
            "loss_tokens_lower_95": 1.037455620920243,
            "loss_tokens_upper_95": 1.0942729318289817,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.468850536573501,
            "data_time": 0.022810776318822588,
            "batch_time": 0.037194630929401944,
            "samples_per_second": 1910298.5535058205,
            "samples_per_second_per_gpu": 238787.31918822756,
            "loss_sequences_lower_95": 6.133846203031994,
            "loss_sequences_upper_95": 6.807180321103051,
            "loss_tokens_lower_95": 6.134113595145089,
            "loss_tokens_upper_95": 6.814831935337612,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 2.5818233229219913,
            "data_time": 0.15401680767536163,
            "batch_time": 0.1711539626121521,
            "samples_per_second": 801379.341484244,
            "samples_per_second_per_gpu": 100172.4176855305,
            "loss_sequences_lower_95": 2.3522464752197267,
            "loss_sequences_upper_95": 3.460184407234192,
            "loss_tokens_lower_95": 2.01488492041519,
            "loss_tokens_upper_95": 2.5283286623610666,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.571170433521271,
            "data_time": 0.0058073075044722785,
            "batch_time": 0.019361035218314518,
            "samples_per_second": 2243149.26413291,
            "samples_per_second_per_gpu": 280393.65801661374,
            "loss_sequences_lower_95": 7.490145495605469,
            "loss_sequences_upper_95": 7.8178986328125,
            "loss_tokens_lower_95": 7.322162268004441,
            "loss_tokens_upper_95": 7.6093434186958016,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.845387073993683,
            "data_time": 0.005566364242917015,
            "batch_time": 0.019749929507573444,
            "samples_per_second": 2257091.55649552,
            "samples_per_second_per_gpu": 282136.44456194,
            "loss_sequences_lower_95": 6.9366502197265625,
            "loss_sequences_upper_95": 7.135746838378906,
            "loss_tokens_lower_95": 6.621470499205841,
            "loss_tokens_upper_95": 6.8047401791163304,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.951281261651899,
            "data_time": 0.003862440466482105,
            "batch_time": 0.017744208339066012,
            "samples_per_second": 2208598.944855355,
            "samples_per_second_per_gpu": 276074.8681069194,
            "loss_sequences_lower_95": 4.915194541529605,
            "loss_sequences_upper_95": 4.987205852696531,
            "loss_tokens_lower_95": 4.915819166684127,
            "loss_tokens_upper_95": 4.986880400066522,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.086092257829306,
            "data_time": 0.008457869561417225,
            "batch_time": 0.02211681377491562,
            "samples_per_second": 2200321.2885940913,
            "samples_per_second_per_gpu": 275040.1610742614,
            "loss_sequences_lower_95": 4.980360677458358,
            "loss_sequences_upper_95": 5.189591921382968,
            "loss_tokens_lower_95": 4.978140075964862,
            "loss_tokens_upper_95": 5.187327507710493,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.264921094417572,
            "data_time": 0.005724877119064331,
            "batch_time": 0.01937942277817499,
            "samples_per_second": 2231429.595792037,
            "samples_per_second_per_gpu": 278928.6994740046,
            "loss_sequences_lower_95": 7.181767834472657,
            "loss_sequences_upper_95": 7.34993671875,
            "loss_tokens_lower_95": 7.181019262695313,
            "loss_tokens_upper_95": 7.351873730468751,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.5923102965756417,
            "data_time": 0.0022473962185288196,
            "batch_time": 0.015835608561683468,
            "samples_per_second": 2269811.200049919,
            "samples_per_second_per_gpu": 283726.4000062399,
            "loss_sequences_lower_95": 4.189418201572847,
            "loss_sequences_upper_95": 4.295188096839523,
            "loss_tokens_lower_95": 2.860874364815723,
            "loss_tokens_upper_95": 2.9309147953280297,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.532505532698845,
            "data_time": 0.018845108577183314,
            "batch_time": 0.03277646814073835,
            "samples_per_second": 2055038.1032451268,
            "samples_per_second_per_gpu": 256879.76290564085,
            "loss_sequences_lower_95": 5.347147790709538,
            "loss_sequences_upper_95": 5.719255931341826,
            "loss_tokens_lower_95": 5.342991034664324,
            "loss_tokens_upper_95": 5.719430917768336,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.4056607040704465,
            "data_time": 0.011279080994427204,
            "batch_time": 0.02496184315532446,
            "samples_per_second": 2200763.8770136777,
            "samples_per_second_per_gpu": 275095.4846267097,
            "loss_sequences_lower_95": 5.277097431257659,
            "loss_sequences_upper_95": 5.5297052959367345,
            "loss_tokens_lower_95": 5.280135486079198,
            "loss_tokens_upper_95": 5.5278463445925246,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.367669547528503,
            "data_time": 0.0023810014512485847,
            "batch_time": 0.016077610038763047,
            "samples_per_second": 2251999.9301608815,
            "samples_per_second_per_gpu": 281499.9912701102,
            "loss_sequences_lower_95": 4.912412497161938,
            "loss_sequences_upper_95": 5.030902471952117,
            "loss_tokens_lower_95": 3.594500775607655,
            "loss_tokens_upper_95": 3.6791886679940977,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.3582111313229515,
            "data_time": 0.027494107683499653,
            "batch_time": 0.04250164826711019,
            "samples_per_second": 1886148.5237577271,
            "samples_per_second_per_gpu": 235768.5654697159,
            "loss_sequences_lower_95": 5.167518236271288,
            "loss_sequences_upper_95": 5.541851951962425,
            "loss_tokens_lower_95": 5.170191309691736,
            "loss_tokens_upper_95": 5.544081640495825,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.512806810005725,
            "data_time": 0.0038230289615263143,
            "batch_time": 0.017564848229125306,
            "samples_per_second": 2234920.1466622837,
            "samples_per_second_per_gpu": 279365.01833278546,
            "loss_sequences_lower_95": 5.470759105624045,
            "loss_sequences_upper_95": 5.553238663512997,
            "loss_tokens_lower_95": 5.471481746941897,
            "loss_tokens_upper_95": 5.554271415687118,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.522586175538962,
            "data_time": 0.023301497372713954,
            "batch_time": 0.037327510660344905,
            "samples_per_second": 1922577.9231606338,
            "samples_per_second_per_gpu": 240322.24039507922,
            "loss_sequences_lower_95": 5.346951916148361,
            "loss_sequences_upper_95": 5.6978999461942506,
            "loss_tokens_lower_95": 5.34108772648191,
            "loss_tokens_upper_95": 5.700743991888842,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.7968986411889394,
            "data_time": 0.08008118718862534,
            "batch_time": 0.09594174474477768,
            "samples_per_second": 1330220.781054861,
            "samples_per_second_per_gpu": 166277.5976318576,
            "loss_sequences_lower_95": 3.5455662218729653,
            "loss_sequences_upper_95": 4.277604586283366,
            "loss_tokens_lower_95": 3.1591911051008434,
            "loss_tokens_upper_95": 4.126528602176242,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.458829712867737,
            "data_time": 0.08114054799079895,
            "batch_time": 0.09621217101812363,
            "samples_per_second": 1463233.198874624,
            "samples_per_second_per_gpu": 182904.149859328,
            "loss_sequences_lower_95": 3.2530136171976727,
            "loss_sequences_upper_95": 4.02398686726888,
            "loss_tokens_lower_95": 2.6426092169258033,
            "loss_tokens_upper_95": 3.690669987710674,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 2.232508313848976,
            "data_time": 0.0034131824163354937,
            "batch_time": 0.01721777306927731,
            "samples_per_second": 2231941.007375604,
            "samples_per_second_per_gpu": 278992.6259219505,
            "loss_sequences_lower_95": 2.217420458337169,
            "loss_sequences_upper_95": 2.24813590902292,
            "loss_tokens_lower_95": 2.2171417209361195,
            "loss_tokens_upper_95": 2.2478033240519144,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 0.6741540418506119,
            "data_time": 0.0016433921045495331,
            "batch_time": 0.015296158843410877,
            "samples_per_second": 2260719.7796418183,
            "samples_per_second_per_gpu": 282589.9724552273,
            "loss_sequences_lower_95": 0.7921577650309016,
            "loss_sequences_upper_95": 0.814240669397097,
            "loss_tokens_lower_95": 0.5418166486897478,
            "loss_tokens_upper_95": 0.5529181388031793,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.624923645980715,
            "data_time": 0.0399819016456604,
            "batch_time": 0.05461578816175461,
            "samples_per_second": 1907733.7372735357,
            "samples_per_second_per_gpu": 238466.71715919196,
            "loss_sequences_lower_95": 4.6474841140386625,
            "loss_sequences_upper_95": 5.049246131716751,
            "loss_tokens_lower_95": 4.256058933423914,
            "loss_tokens_upper_95": 4.464141901176001,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.034163165736842,
            "data_time": 0.12015803654988606,
            "batch_time": 0.1362081141698928,
            "samples_per_second": 964575.6428780267,
            "samples_per_second_per_gpu": 120571.95535975334,
            "loss_sequences_lower_95": 6.634957926982158,
            "loss_sequences_upper_95": 7.615816085403029,
            "loss_tokens_lower_95": 6.317697623923973,
            "loss_tokens_upper_95": 7.443062261887537,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.428858672700277,
            "data_time": 0.03229774179912749,
            "batch_time": 0.04642844767797561,
            "samples_per_second": 1976480.2267264265,
            "samples_per_second_per_gpu": 247060.0283408033,
            "loss_sequences_lower_95": 4.398271151286799,
            "loss_sequences_upper_95": 4.757533850320955,
            "loss_tokens_lower_95": 4.056698315285955,
            "loss_tokens_upper_95": 4.23298828946331,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.54738793576636,
            "data_time": 0.03186112642288208,
            "batch_time": 0.046758671601613365,
            "samples_per_second": 1837714.1785125576,
            "samples_per_second_per_gpu": 229714.2723140697,
            "loss_sequences_lower_95": 4.531392362641125,
            "loss_sequences_upper_95": 4.85477406571551,
            "loss_tokens_lower_95": 4.198191234919227,
            "loss_tokens_upper_95": 4.344568780143221,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.6333318657991365,
            "data_time": 0.030224825654711043,
            "batch_time": 0.044777574993315195,
            "samples_per_second": 1948379.3553144927,
            "samples_per_second_per_gpu": 243547.4194143116,
            "loss_sequences_lower_95": 4.5837296276557735,
            "loss_sequences_upper_95": 4.997995981355992,
            "loss_tokens_lower_95": 4.240250716557648,
            "loss_tokens_upper_95": 4.47204029682109,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.637161362461928,
            "data_time": 0.030267726807367234,
            "batch_time": 0.04467057046436128,
            "samples_per_second": 1951428.622593052,
            "samples_per_second_per_gpu": 243928.5778241315,
            "loss_sequences_lower_95": 4.610891919019745,
            "loss_sequences_upper_95": 4.905187448641149,
            "loss_tokens_lower_95": 4.314208204798238,
            "loss_tokens_upper_95": 4.452312614464685,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.824091360435723,
            "data_time": 0.030711050386782044,
            "batch_time": 0.045220248493147486,
            "samples_per_second": 2010106.1170975058,
            "samples_per_second_per_gpu": 251263.26463718823,
            "loss_sequences_lower_95": 4.814237606451378,
            "loss_sequences_upper_95": 5.12242424058618,
            "loss_tokens_lower_95": 4.562442337397631,
            "loss_tokens_upper_95": 4.676455606536999,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.838710883768593,
            "data_time": 0.02981026399703253,
            "batch_time": 0.04415049155553182,
            "samples_per_second": 1977411.1323754496,
            "samples_per_second_per_gpu": 247176.3915469312,
            "loss_sequences_lower_95": 4.862084505034656,
            "loss_sequences_upper_95": 5.177954427207388,
            "loss_tokens_lower_95": 4.507155312876733,
            "loss_tokens_upper_95": 4.641904891406721,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-32.0/params.txt",
    "uuid": "52432727-eedb-4b3e-b9ef-cf73660109a9",
    "creation_date": "2023_12_14-04_59_30"
}