{
    "name": "c4_original-d=512_l=8_h=4-1.0",
    "dataset_name": "c4_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf1",
    "hyperparameters": {
        "model": "training/open_lm_configs/d=512_l=8_h=4.json",
        "tokens": 1578280960,
        "warmup": 400,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 512,
        "acc": 8,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 78914048,
        "params_no_embed": 53092864,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp"
        ],
        "chinchilla_multiplier": 1.0
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--train-num-samples",
        "315656192",
        "--workers",
        "2",
        "--dataset-manifest",
        "<scrub>/original_c4/manifest.jsonl",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "512",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "400",
        "--model",
        "training/open_lm_configs/d=512_l=8_h=4.json",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--data-key",
        "txt",
        "--accum-freq",
        "8",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--name",
        "c4_original-d=512_l=8_h=4-1.0",
        "--logs",
        "/admin/<scrub>/scrub_logs",
        "--val-data",
        "/admin/<scrub>/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/admin/<scrub>/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-batch-size",
        "8",
        "--val-data-key",
        "json",
        "txt",
        "--val-num-samples",
        "245760",
        "--fsdp",
        "--fsdp-amp",
        "--report-to",
        "wandb",
        "--wandb-project-name",
        "scrub",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/scrub_experiments_v3"
    ],
    "results": [
        {
            "loss": 4.565644518534342,
            "data_time": 0.029993582516908646,
            "batch_time": 0.3292251043021679,
            "samples_per_second": 1724035.03150363,
            "samples_per_second_per_gpu": 215504.37893795376,
            "loss_sequences_lower_95": 4.439572575887044,
            "loss_sequences_upper_95": 4.692388369242351,
            "loss_tokens_lower_95": 4.550265668233235,
            "loss_tokens_upper_95": 4.581329383850098,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.800284412392762,
            "data_time": 0.0015726642681789845,
            "batch_time": 0.015558822125324773,
            "samples_per_second": 2218434.3775203936,
            "samples_per_second_per_gpu": 277304.2971900492,
            "loss_sequences_lower_95": 3.7977479330639183,
            "loss_sequences_upper_95": 3.80276757851563,
            "loss_tokens_lower_95": 3.7892141145833333,
            "loss_tokens_upper_95": 3.8114258541666666,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.9876155926256764,
            "data_time": 0.009429572105407714,
            "batch_time": 0.023260327339172365,
            "samples_per_second": 2172981.3982986035,
            "samples_per_second_per_gpu": 271622.67478732544,
            "loss_sequences_lower_95": 3.958918531768176,
            "loss_sequences_upper_95": 4.019535123863999,
            "loss_tokens_lower_95": 3.97327540625,
            "loss_tokens_upper_95": 4.00240103125,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.8644753402041405,
            "data_time": 0.0015033361943144548,
            "batch_time": 0.014978928099337378,
            "samples_per_second": 2312030.781059532,
            "samples_per_second_per_gpu": 289003.8476324415,
            "loss_sequences_lower_95": 3.8460076312822165,
            "loss_sequences_upper_95": 3.883205662048969,
            "loss_tokens_lower_95": 3.852847802083333,
            "loss_tokens_upper_95": 3.8759253125,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.826109914818509,
            "data_time": 0.009023726223949417,
            "batch_time": 0.02310001137722061,
            "samples_per_second": 2126319.4751662565,
            "samples_per_second_per_gpu": 265789.93439578207,
            "loss_sequences_lower_95": 3.7883126501638875,
            "loss_sequences_upper_95": 3.868023905394762,
            "loss_tokens_lower_95": 3.81483703125,
            "loss_tokens_upper_95": 3.8371119791666666,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.296546358708862,
            "data_time": 0.003446812538996987,
            "batch_time": 0.017089607598988907,
            "samples_per_second": 2284693.894461773,
            "samples_per_second_per_gpu": 285586.7368077216,
            "loss_sequences_lower_95": 4.257698512385282,
            "loss_sequences_upper_95": 4.336418520099529,
            "loss_tokens_lower_95": 4.283874395833333,
            "loss_tokens_upper_95": 4.309048072916666,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.216567428744569,
            "data_time": 0.0015308764591497076,
            "batch_time": 0.014980719003941845,
            "samples_per_second": 2319926.355901676,
            "samples_per_second_per_gpu": 289990.7944877095,
            "loss_sequences_lower_95": 4.181369389748086,
            "loss_sequences_upper_95": 4.251658312739158,
            "loss_tokens_lower_95": 4.200887822916667,
            "loss_tokens_upper_95": 4.23186284375,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.381892152456713,
            "data_time": 0.0015428451153584875,
            "batch_time": 0.015013393524150776,
            "samples_per_second": 2312844.5866672904,
            "samples_per_second_per_gpu": 289105.5733334113,
            "loss_sequences_lower_95": 4.369194208115183,
            "loss_sequences_upper_95": 4.395426251636126,
            "loss_tokens_lower_95": 4.370007875,
            "loss_tokens_upper_95": 4.393955020833333,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.181719190706082,
            "data_time": 0.01025281444428459,
            "batch_time": 0.024037281672159832,
            "samples_per_second": 2197735.2760335486,
            "samples_per_second_per_gpu": 274716.9095041936,
            "loss_sequences_lower_95": 4.130628551700251,
            "loss_sequences_upper_95": 4.238921039860424,
            "loss_tokens_lower_95": 4.16995146875,
            "loss_tokens_upper_95": 4.1934635,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.274071304223283,
            "data_time": 0.008710692636668682,
            "batch_time": 0.02282654494047165,
            "samples_per_second": 2170654.8509524977,
            "samples_per_second_per_gpu": 271331.8563690622,
            "loss_sequences_lower_95": 5.229294703788908,
            "loss_sequences_upper_95": 5.328862777529026,
            "loss_tokens_lower_95": 5.260964614583333,
            "loss_tokens_upper_95": 5.287281052083333,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.192753832650966,
            "data_time": 0.001226605503942647,
            "batch_time": 0.014653699369945405,
            "samples_per_second": 2326030.3498495086,
            "samples_per_second_per_gpu": 290753.7937311886,
            "loss_sequences_lower_95": 4.1852910041155615,
            "loss_sequences_upper_95": 4.200388042067802,
            "loss_tokens_lower_95": 4.180861197916667,
            "loss_tokens_upper_95": 4.2047800625,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.032566556529371,
            "data_time": 0.0023584993157557506,
            "batch_time": 0.015945380772281744,
            "samples_per_second": 2295391.839786953,
            "samples_per_second_per_gpu": 286923.9799733691,
            "loss_sequences_lower_95": 4.020471288004997,
            "loss_sequences_upper_95": 4.044704161758773,
            "loss_tokens_lower_95": 4.020828937499999,
            "loss_tokens_upper_95": 4.044325916666667,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.546067705019008,
            "data_time": 0.008654854514382103,
            "batch_time": 0.022460669868077213,
            "samples_per_second": 2173808.045755816,
            "samples_per_second_per_gpu": 271726.005719477,
            "loss_sequences_lower_95": 4.496099853515625,
            "loss_sequences_upper_95": 4.602355870368757,
            "loss_tokens_lower_95": 4.532850229166666,
            "loss_tokens_upper_95": 4.559001645833333,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.838505978496886,
            "data_time": 0.0087730666080794,
            "batch_time": 0.02288719477406536,
            "samples_per_second": 2137099.182512154,
            "samples_per_second_per_gpu": 267137.39781401923,
            "loss_sequences_lower_95": 3.7754685598089837,
            "loss_sequences_upper_95": 3.9027205333204713,
            "loss_tokens_lower_95": 3.8260169479166666,
            "loss_tokens_upper_95": 3.8506461250000004,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.320695660331032,
            "data_time": 0.07106448071343559,
            "batch_time": 0.08680394717625209,
            "samples_per_second": 1057421.7680679983,
            "samples_per_second_per_gpu": 132177.7210084998,
            "loss_sequences_lower_95": 5.252467727661133,
            "loss_sequences_upper_95": 5.389101938767867,
            "loss_tokens_lower_95": 5.2911681522022596,
            "loss_tokens_upper_95": 5.350673857602207,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.493580405983216,
            "data_time": 0.012126978148113598,
            "batch_time": 0.02616734125397422,
            "samples_per_second": 2093403.2332836024,
            "samples_per_second_per_gpu": 261675.4041604503,
            "loss_sequences_lower_95": 4.402100516894816,
            "loss_sequences_upper_95": 4.584613570944561,
            "loss_tokens_lower_95": 4.479563177083333,
            "loss_tokens_upper_95": 4.507108458333333,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.15763217352311,
            "data_time": 0.011717557907104492,
            "batch_time": 0.026057106753190357,
            "samples_per_second": 2127895.9993577506,
            "samples_per_second_per_gpu": 265986.99991971883,
            "loss_sequences_lower_95": 6.0990972433367,
            "loss_sequences_upper_95": 6.217776062494846,
            "loss_tokens_lower_95": 6.145693572916667,
            "loss_tokens_upper_95": 6.169587854166667,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.609030297545136,
            "data_time": 0.03127383813261986,
            "batch_time": 0.04602915421128273,
            "samples_per_second": 1863211.1979648068,
            "samples_per_second_per_gpu": 232901.39974560085,
            "loss_sequences_lower_95": 4.517320213943231,
            "loss_sequences_upper_95": 4.753670239057697,
            "loss_tokens_lower_95": 4.5943717706398886,
            "loss_tokens_upper_95": 4.6236412798772095,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.655233574701398,
            "data_time": 0.0020268855442490286,
            "batch_time": 0.015768239465552746,
            "samples_per_second": 2246329.1901412616,
            "samples_per_second_per_gpu": 280791.1487676577,
            "loss_sequences_lower_95": 4.634940317162353,
            "loss_sequences_upper_95": 4.675962834153789,
            "loss_tokens_lower_95": 4.634846270340764,
            "loss_tokens_upper_95": 4.675661241721264,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.48783019709174,
            "data_time": 0.002058653315161444,
            "batch_time": 0.015747400700666343,
            "samples_per_second": 2253345.065525973,
            "samples_per_second_per_gpu": 281668.13319074665,
            "loss_sequences_lower_95": 3.499041467243826,
            "loss_sequences_upper_95": 3.5253385779725153,
            "loss_tokens_lower_95": 3.462303516871434,
            "loss_tokens_upper_95": 3.482115537940119,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.723610083252741,
            "data_time": 0.0029681737865624174,
            "batch_time": 0.016678401369963132,
            "samples_per_second": 2242986.2316799224,
            "samples_per_second_per_gpu": 280373.2789599903,
            "loss_sequences_lower_95": 5.937735260687293,
            "loss_sequences_upper_95": 6.238777465532002,
            "loss_tokens_lower_95": 5.2499708334264055,
            "loss_tokens_upper_95": 5.463325924767691,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.86892608054479,
            "data_time": 0.003898066726136715,
            "batch_time": 0.017684002981541005,
            "samples_per_second": 2226591.3357894346,
            "samples_per_second_per_gpu": 278323.9169736793,
            "loss_sequences_lower_95": 6.010634440104167,
            "loss_sequences_upper_95": 6.21656337890625,
            "loss_tokens_lower_95": 5.496327363404088,
            "loss_tokens_upper_95": 5.635229350923742,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.0346076099867165,
            "data_time": 0.004180715814075499,
            "batch_time": 0.01791253133057469,
            "samples_per_second": 2232325.989869937,
            "samples_per_second_per_gpu": 279040.7487337421,
            "loss_sequences_lower_95": 4.084740248442238,
            "loss_sequences_upper_95": 4.160008368155918,
            "loss_tokens_lower_95": 3.9248781163791016,
            "loss_tokens_upper_95": 3.959882946098716,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.361897086013447,
            "data_time": 0.020736398441450938,
            "batch_time": 0.03493425462927137,
            "samples_per_second": 2043074.7171335246,
            "samples_per_second_per_gpu": 255384.33964169057,
            "loss_sequences_lower_95": 4.268826897361062,
            "loss_sequences_upper_95": 4.527542655251242,
            "loss_tokens_lower_95": 4.24938520356747,
            "loss_tokens_upper_95": 4.329891248551346,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.294298776315183,
            "data_time": 0.01853940077126026,
            "batch_time": 0.0327155776321888,
            "samples_per_second": 2021981.568316352,
            "samples_per_second_per_gpu": 252747.696039544,
            "loss_sequences_lower_95": 4.281225062779018,
            "loss_sequences_upper_95": 4.505501970563616,
            "loss_tokens_lower_95": 4.159906733983582,
            "loss_tokens_upper_95": 4.262660435267857,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.881150833765665,
            "data_time": 0.01583883395561805,
            "batch_time": 0.03021906430904682,
            "samples_per_second": 2005699.8164293093,
            "samples_per_second_per_gpu": 250712.47705366366,
            "loss_sequences_lower_95": 4.816880391438802,
            "loss_sequences_upper_95": 4.930579559326172,
            "loss_tokens_lower_95": 4.758420475039111,
            "loss_tokens_upper_95": 4.999746858764196,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.491088926576558,
            "data_time": 0.001790343254479047,
            "batch_time": 0.015507869253054389,
            "samples_per_second": 2252596.6933873314,
            "samples_per_second_per_gpu": 281574.58667341643,
            "loss_sequences_lower_95": 7.510391401598101,
            "loss_sequences_upper_95": 7.5866246163144035,
            "loss_tokens_lower_95": 7.336550771357925,
            "loss_tokens_upper_95": 7.416794000698791,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.321902312915333,
            "data_time": 0.002658416960863459,
            "batch_time": 0.016552037440690417,
            "samples_per_second": 2221020.4850278227,
            "samples_per_second_per_gpu": 277627.56062847783,
            "loss_sequences_lower_95": 5.941883341471354,
            "loss_sequences_upper_95": 6.256494859894518,
            "loss_tokens_lower_95": 4.496363357973575,
            "loss_tokens_upper_95": 4.640209162657528,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.788722163988055,
            "data_time": 0.004624372398531115,
            "batch_time": 0.018519951282320795,
            "samples_per_second": 2188219.06489879,
            "samples_per_second_per_gpu": 273527.38311234873,
            "loss_sequences_lower_95": 5.253772212214031,
            "loss_sequences_upper_95": 5.609251617327485,
            "loss_tokens_lower_95": 4.355743394416545,
            "loss_tokens_upper_95": 4.523267286952445,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.098119918614218,
            "data_time": 0.02085921381201063,
            "batch_time": 0.03554646457944598,
            "samples_per_second": 1976454.039650943,
            "samples_per_second_per_gpu": 247056.75495636786,
            "loss_sequences_lower_95": 6.028601464397831,
            "loss_sequences_upper_95": 6.167071811902469,
            "loss_tokens_lower_95": 6.02951442770762,
            "loss_tokens_upper_95": 6.1661080695722745,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.9624987411499024,
            "data_time": 0.04507071238297682,
            "batch_time": 0.06056859401556162,
            "samples_per_second": 1604130.6979986043,
            "samples_per_second_per_gpu": 200516.33724982553,
            "loss_sequences_lower_95": 3.81955891418457,
            "loss_sequences_upper_95": 4.232051551818848,
            "loss_tokens_lower_95": 3.6210393042385256,
            "loss_tokens_upper_95": 4.097115439208548,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.0148591244843015,
            "data_time": 0.003135120941817395,
            "batch_time": 0.016970093264901565,
            "samples_per_second": 2226565.7364499387,
            "samples_per_second_per_gpu": 278320.71705624234,
            "loss_sequences_lower_95": 4.95778340053656,
            "loss_sequences_upper_95": 5.073207178184173,
            "loss_tokens_lower_95": 4.956234232064035,
            "loss_tokens_upper_95": 5.072837818657241,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.825543561399618,
            "data_time": 0.004524740464924599,
            "batch_time": 0.018267174529406998,
            "samples_per_second": 2228882.850112423,
            "samples_per_second_per_gpu": 278610.35626405285,
            "loss_sequences_lower_95": 4.771039503192823,
            "loss_sequences_upper_95": 4.880162984362203,
            "loss_tokens_lower_95": 4.76928774921939,
            "loss_tokens_upper_95": 4.881958757630144,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.953752137242256,
            "data_time": 0.003234365073959977,
            "batch_time": 0.017143907351318755,
            "samples_per_second": 2202346.0821304414,
            "samples_per_second_per_gpu": 275293.2602663052,
            "loss_sequences_lower_95": 4.111394612807314,
            "loss_sequences_upper_95": 4.236789176202054,
            "loss_tokens_lower_95": 3.7660262635641693,
            "loss_tokens_upper_95": 3.822812357239856,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.017888736724854,
            "data_time": 0.00961957685649395,
            "batch_time": 0.023503179661929607,
            "samples_per_second": 2132325.030568195,
            "samples_per_second_per_gpu": 266540.6288210244,
            "loss_sequences_lower_95": 6.190288989257812,
            "loss_sequences_upper_95": 6.751069213867187,
            "loss_tokens_lower_95": 5.3595617813120455,
            "loss_tokens_upper_95": 5.723532005527176,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.487105548381805,
            "data_time": 0.13457877933979034,
            "batch_time": 0.1511252522468567,
            "samples_per_second": 1006923.4647165884,
            "samples_per_second_per_gpu": 125865.43308957355,
            "loss_sequences_lower_95": 4.196130311489105,
            "loss_sequences_upper_95": 4.873877358436585,
            "loss_tokens_lower_95": 3.986279393338609,
            "loss_tokens_upper_95": 4.8191703182527394,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.896443395779051,
            "data_time": 0.024777161314132365,
            "batch_time": 0.03852176412622979,
            "samples_per_second": 1892574.0677079582,
            "samples_per_second_per_gpu": 236571.75846349477,
            "loss_sequences_lower_95": 6.437389101927308,
            "loss_sequences_upper_95": 7.394782844631151,
            "loss_tokens_lower_95": 4.25036480031664,
            "loss_tokens_upper_95": 4.740406167977557,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.364015041349652,
            "data_time": 0.0028003056844075522,
            "batch_time": 0.016589956771996286,
            "samples_per_second": 2213558.9869789174,
            "samples_per_second_per_gpu": 276694.8733723647,
            "loss_sequences_lower_95": 3.3424540388474773,
            "loss_sequences_upper_95": 3.385069664871396,
            "loss_tokens_lower_95": 3.3421811645027852,
            "loss_tokens_upper_95": 3.3854106138136877,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.460784696409067,
            "data_time": 0.0025012075877902024,
            "batch_time": 0.016307385984575467,
            "samples_per_second": 2237393.787589128,
            "samples_per_second_per_gpu": 279674.223448641,
            "loss_sequences_lower_95": 4.430573626710169,
            "loss_sequences_upper_95": 4.633681722115758,
            "loss_tokens_lower_95": 4.224475413623884,
            "loss_tokens_upper_95": 4.421640044687614,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.6899072320470006,
            "data_time": 0.016559690237045288,
            "batch_time": 0.030728542142444186,
            "samples_per_second": 1990750.750934015,
            "samples_per_second_per_gpu": 248843.84386675188,
            "loss_sequences_lower_95": 3.5410440801264165,
            "loss_sequences_upper_95": 3.9448302663726245,
            "loss_tokens_lower_95": 3.4033440998370286,
            "loss_tokens_upper_95": 3.7112393445377543,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.9799924805678057,
            "data_time": 0.004458977654576301,
            "batch_time": 0.0183085847645998,
            "samples_per_second": 2191323.5601439616,
            "samples_per_second_per_gpu": 273915.4450179952,
            "loss_sequences_lower_95": 4.008326544155732,
            "loss_sequences_upper_95": 4.15533031051142,
            "loss_tokens_lower_95": 3.837885101159334,
            "loss_tokens_upper_95": 3.987852836348546,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.5426963306054837,
            "data_time": 0.028658407075064524,
            "batch_time": 0.0428654636655535,
            "samples_per_second": 1963764.9229829076,
            "samples_per_second_per_gpu": 245470.61537286345,
            "loss_sequences_lower_95": 3.359601872141768,
            "loss_sequences_upper_95": 3.857194788863019,
            "loss_tokens_lower_95": 3.2432215779290505,
            "loss_tokens_upper_95": 3.637056673681561,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.1955610294298165,
            "data_time": 0.002238178938996349,
            "batch_time": 0.016005998414085714,
            "samples_per_second": 2234904.7204450206,
            "samples_per_second_per_gpu": 279363.0900556276,
            "loss_sequences_lower_95": 4.177204454562787,
            "loss_sequences_upper_95": 4.213780988619599,
            "loss_tokens_lower_95": 4.177120912463743,
            "loss_tokens_upper_95": 4.213815429101445,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 1.5742942644554434,
            "data_time": 0.043544977361505685,
            "batch_time": 0.05834332812916149,
            "samples_per_second": 1774360.529191444,
            "samples_per_second_per_gpu": 221795.0661489305,
            "loss_sequences_lower_95": 1.497193753140644,
            "loss_sequences_upper_95": 1.7209184850303871,
            "loss_tokens_lower_95": 1.3618245619712368,
            "loss_tokens_upper_95": 1.653033044321144,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.805131915324139,
            "data_time": 0.00163597238233683,
            "batch_time": 0.01536230164621422,
            "samples_per_second": 2247834.233119922,
            "samples_per_second_per_gpu": 280979.2791399903,
            "loss_sequences_lower_95": 6.251282326880241,
            "loss_sequences_upper_95": 6.302831877702437,
            "loss_tokens_lower_95": 5.1288832688588,
            "loss_tokens_upper_95": 5.182665377176016,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.173166557312012,
            "data_time": 0.005512803792953491,
            "batch_time": 0.019851790061072697,
            "samples_per_second": 2218255.7224291232,
            "samples_per_second_per_gpu": 277281.9653036404,
            "loss_sequences_lower_95": 7.115024853515624,
            "loss_sequences_upper_95": 7.385050317382812,
            "loss_tokens_lower_95": 6.957969482610692,
            "loss_tokens_upper_95": 7.201332558427591,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.967702309981636,
            "data_time": 0.02006314366550769,
            "batch_time": 0.03445176350868354,
            "samples_per_second": 2005869.3323159576,
            "samples_per_second_per_gpu": 250733.6665394947,
            "loss_sequences_lower_95": 4.8049761564835265,
            "loss_sequences_upper_95": 5.131700850777004,
            "loss_tokens_lower_95": 4.80940149721892,
            "loss_tokens_upper_95": 5.130715252420177,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.093184690403215,
            "data_time": 0.0041459152497440935,
            "batch_time": 0.01803250987845731,
            "samples_per_second": 2208827.4835655894,
            "samples_per_second_per_gpu": 276103.4354456987,
            "loss_sequences_lower_95": 7.018519601532907,
            "loss_sequences_upper_95": 7.166819901899858,
            "loss_tokens_lower_95": 7.019645885120738,
            "loss_tokens_upper_95": 7.164852294921875,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 1.1023065348068872,
            "data_time": 0.0036139009480780746,
            "batch_time": 0.017591519241637373,
            "samples_per_second": 2206629.2760400423,
            "samples_per_second_per_gpu": 275828.6595050053,
            "loss_sequences_lower_95": 1.1498753906249999,
            "loss_sequences_upper_95": 1.2211341308593748,
            "loss_tokens_lower_95": 1.0173494651766957,
            "loss_tokens_upper_95": 1.075198176927021,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.073321574074882,
            "data_time": 0.02102844842842647,
            "batch_time": 0.03582720884255001,
            "samples_per_second": 1847832.9435071903,
            "samples_per_second_per_gpu": 230979.1179383988,
            "loss_sequences_lower_95": 5.781148739769344,
            "loss_sequences_upper_95": 6.365928475516183,
            "loss_tokens_lower_95": 5.781223289853051,
            "loss_tokens_upper_95": 6.36586930047898,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 3.0826688408851624,
            "data_time": 0.14269623160362244,
            "batch_time": 0.1596263200044632,
            "samples_per_second": 856080.4604504075,
            "samples_per_second_per_gpu": 107010.05755630093,
            "loss_sequences_lower_95": 2.8233795285224916,
            "loss_sequences_upper_95": 4.110901808738708,
            "loss_tokens_lower_95": 2.4712041292485503,
            "loss_tokens_upper_95": 3.038278940731717,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.598711791038514,
            "data_time": 0.005290105229332333,
            "batch_time": 0.0190534113891541,
            "samples_per_second": 2210484.453532749,
            "samples_per_second_per_gpu": 276310.5566915936,
            "loss_sequences_lower_95": 7.5561320678710935,
            "loss_sequences_upper_95": 7.897249829101563,
            "loss_tokens_lower_95": 7.273733484361781,
            "loss_tokens_upper_95": 7.57870067202702,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 6.934211656570435,
            "data_time": 0.005266270467213222,
            "batch_time": 0.018958322585575164,
            "samples_per_second": 2224700.755411689,
            "samples_per_second_per_gpu": 278087.59442646115,
            "loss_sequences_lower_95": 6.9965975463867185,
            "loss_sequences_upper_95": 7.20568212890625,
            "loss_tokens_lower_95": 6.720438506881416,
            "loss_tokens_upper_95": 6.900784379694542,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.246030422812375,
            "data_time": 0.0036483512674287013,
            "batch_time": 0.017432726027574827,
            "samples_per_second": 2226906.088866376,
            "samples_per_second_per_gpu": 278363.261108297,
            "loss_sequences_lower_95": 4.212556619672938,
            "loss_sequences_upper_95": 4.279031774128918,
            "loss_tokens_lower_95": 4.212997816074946,
            "loss_tokens_upper_95": 4.2792553228057955,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.854554704806771,
            "data_time": 0.00783641316739454,
            "batch_time": 0.02175178844763073,
            "samples_per_second": 2161974.8532049307,
            "samples_per_second_per_gpu": 270246.85665061633,
            "loss_sequences_lower_95": 4.755279606644826,
            "loss_sequences_upper_95": 4.951333435339862,
            "loss_tokens_lower_95": 4.752393853206605,
            "loss_tokens_upper_95": 4.952122148317493,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 9.474499080657958,
            "data_time": 0.004956756319318499,
            "batch_time": 0.018799167776864672,
            "samples_per_second": 2201646.97819181,
            "samples_per_second_per_gpu": 275205.8722739763,
            "loss_sequences_lower_95": 9.40387744140625,
            "loss_sequences_upper_95": 9.547438134765624,
            "loss_tokens_lower_95": 9.403816137695314,
            "loss_tokens_upper_95": 9.54465322265625,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.323399534875081,
            "data_time": 0.0022961411899790197,
            "batch_time": 0.015971664850563092,
            "samples_per_second": 2254191.5909608323,
            "samples_per_second_per_gpu": 281773.94887010404,
            "loss_sequences_lower_95": 4.928502496378312,
            "loss_sequences_upper_95": 5.035446502483444,
            "loss_tokens_lower_95": 3.5887328606886917,
            "loss_tokens_upper_95": 3.6589698700704902,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.4367407578140945,
            "data_time": 0.017739641666412353,
            "batch_time": 0.032306790351867676,
            "samples_per_second": 2015081.3691191513,
            "samples_per_second_per_gpu": 251885.1711398939,
            "loss_sequences_lower_95": 5.2562147567521285,
            "loss_sequences_upper_95": 5.617388198624796,
            "loss_tokens_lower_95": 5.257518586116051,
            "loss_tokens_upper_95": 5.615860008125874,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.1414695524701886,
            "data_time": 0.009928547777235508,
            "batch_time": 0.024255860596895218,
            "samples_per_second": 2117569.896735262,
            "samples_per_second_per_gpu": 264696.23709190777,
            "loss_sequences_lower_95": 5.022242886412378,
            "loss_sequences_upper_95": 5.258999370500153,
            "loss_tokens_lower_95": 5.022109590418198,
            "loss_tokens_upper_95": 5.260333778530944,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.8563093377969535,
            "data_time": 0.002256544687414682,
            "batch_time": 0.016035863714654555,
            "samples_per_second": 2238121.566444373,
            "samples_per_second_per_gpu": 279765.19580554665,
            "loss_sequences_lower_95": 5.376214372533822,
            "loss_sequences_upper_95": 5.491995918573061,
            "loss_tokens_lower_95": 4.084494520337647,
            "loss_tokens_upper_95": 4.169553189271181,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.015302804412034,
            "data_time": 0.02571601668993632,
            "batch_time": 0.04052413006623586,
            "samples_per_second": 1956289.671889227,
            "samples_per_second_per_gpu": 244536.20898615336,
            "loss_sequences_lower_95": 4.87019369135458,
            "loss_sequences_upper_95": 5.154758425112124,
            "loss_tokens_lower_95": 4.874742481695912,
            "loss_tokens_upper_95": 5.155899168953063,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.789387602324879,
            "data_time": 0.0035555966753371615,
            "batch_time": 0.017472593630044306,
            "samples_per_second": 2207811.6191124343,
            "samples_per_second_per_gpu": 275976.4523890543,
            "loss_sequences_lower_95": 5.7630387220470185,
            "loss_sequences_upper_95": 5.81599582497133,
            "loss_tokens_lower_95": 5.763041738340979,
            "loss_tokens_upper_95": 5.815905858777715,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.619054840027707,
            "data_time": 0.023236749388954855,
            "batch_time": 0.03824193911118941,
            "samples_per_second": 1806995.3400548873,
            "samples_per_second_per_gpu": 225874.41750686092,
            "loss_sequences_lower_95": 5.429121191524765,
            "loss_sequences_upper_95": 5.8109143599723145,
            "loss_tokens_lower_95": 5.423889515700849,
            "loss_tokens_upper_95": 5.81450370121928,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.782166250546774,
            "data_time": 0.07735549658536911,
            "batch_time": 0.09371555596590042,
            "samples_per_second": 1291918.8375511463,
            "samples_per_second_per_gpu": 161489.85469389329,
            "loss_sequences_lower_95": 4.4056849161783855,
            "loss_sequences_upper_95": 5.339612630208333,
            "loss_tokens_lower_95": 3.942333708869086,
            "loss_tokens_upper_95": 5.166838147905137,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.032996122042338,
            "data_time": 0.07339201122522354,
            "batch_time": 0.08967709541320801,
            "samples_per_second": 1381975.2199189593,
            "samples_per_second_per_gpu": 172746.9024898699,
            "loss_sequences_lower_95": 3.7688478342692058,
            "loss_sequences_upper_95": 4.647030804951985,
            "loss_tokens_lower_95": 3.1035194054078517,
            "loss_tokens_upper_95": 4.329569827304797,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 4.904891566359417,
            "data_time": 0.0034586243800656664,
            "batch_time": 0.017214135607247355,
            "samples_per_second": 2236185.196985059,
            "samples_per_second_per_gpu": 279523.14962313237,
            "loss_sequences_lower_95": 4.878478863678203,
            "loss_sequences_upper_95": 4.932786985686672,
            "loss_tokens_lower_95": 4.878139641246318,
            "loss_tokens_upper_95": 4.931996876150589,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 1.0611999352344574,
            "data_time": 0.0015704222349551887,
            "batch_time": 0.015301419222315761,
            "samples_per_second": 2246854.534489097,
            "samples_per_second_per_gpu": 280856.81681113713,
            "loss_sequences_lower_95": 1.235694477984169,
            "loss_sequences_upper_95": 1.2636001671435837,
            "loss_tokens_lower_95": 0.8759198923816176,
            "loss_tokens_upper_95": 0.8906287602335821,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.205228805541992,
            "data_time": 0.03619517385959625,
            "batch_time": 0.05108335241675377,
            "samples_per_second": 1871912.3985411506,
            "samples_per_second_per_gpu": 233989.04981764383,
            "loss_sequences_lower_95": 5.223758415162094,
            "loss_sequences_upper_95": 5.629233400840459,
            "loss_tokens_lower_95": 4.814059645395788,
            "loss_tokens_upper_95": 5.030983682218742,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 7.550848419601853,
            "data_time": 0.10680277006966728,
            "batch_time": 0.12481120654514857,
            "samples_per_second": 962391.8762159724,
            "samples_per_second_per_gpu": 120298.98452699654,
            "loss_sequences_lower_95": 7.148237795443148,
            "loss_sequences_upper_95": 8.136194919895482,
            "loss_tokens_lower_95": 6.853958657347126,
            "loss_tokens_upper_95": 7.972936747986593,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.04484843626255,
            "data_time": 0.027493400233132497,
            "batch_time": 0.04250791243144444,
            "samples_per_second": 1846024.395821828,
            "samples_per_second_per_gpu": 230753.0494777285,
            "loss_sequences_lower_95": 4.998080193124166,
            "loss_sequences_upper_95": 5.363701555205555,
            "loss_tokens_lower_95": 4.669098392162531,
            "loss_tokens_upper_95": 4.8541742831423464,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.155739896181153,
            "data_time": 0.028853816645486013,
            "batch_time": 0.04329230955668858,
            "samples_per_second": 1923395.144841754,
            "samples_per_second_per_gpu": 240424.39310521926,
            "loss_sequences_lower_95": 5.12817525165837,
            "loss_sequences_upper_95": 5.458355089513266,
            "loss_tokens_lower_95": 4.798531193653249,
            "loss_tokens_upper_95": 4.951160651460068,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.2326439808054666,
            "data_time": 0.028148080621446882,
            "batch_time": 0.043389913581666495,
            "samples_per_second": 1879473.1317230116,
            "samples_per_second_per_gpu": 234934.14146537645,
            "loss_sequences_lower_95": 5.15553232518638,
            "loss_sequences_upper_95": 5.553886357749381,
            "loss_tokens_lower_95": 4.846086449647786,
            "loss_tokens_upper_95": 5.086488522161623,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.1963382683149195,
            "data_time": 0.028347988923390705,
            "batch_time": 0.04264775628135318,
            "samples_per_second": 1914254.5149492894,
            "samples_per_second_per_gpu": 239281.81436866117,
            "loss_sequences_lower_95": 5.152401686877739,
            "loss_sequences_upper_95": 5.445227543900653,
            "loss_tokens_lower_95": 4.879578787069826,
            "loss_tokens_upper_95": 5.020890461098739,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.464079646590333,
            "data_time": 0.0292355219523112,
            "batch_time": 0.043855234428688335,
            "samples_per_second": 1950740.256327801,
            "samples_per_second_per_gpu": 243842.53204097511,
            "loss_sequences_lower_95": 5.437823050362724,
            "loss_sequences_upper_95": 5.717135866532415,
            "loss_tokens_lower_95": 5.211751471556011,
            "loss_tokens_upper_95": 5.3273615274934185,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        },
        {
            "loss": 5.509415097352935,
            "data_time": 0.027675350507100422,
            "batch_time": 0.04242299568085443,
            "samples_per_second": 1907854.6603421967,
            "samples_per_second_per_gpu": 238481.8325427746,
            "loss_sequences_lower_95": 5.534833619652725,
            "loss_sequences_upper_95": 5.853862799667731,
            "loss_tokens_lower_95": 5.141386671658379,
            "loss_tokens_upper_95": 5.269392741768384,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "d=512_l=8_h=4"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/c4_original-d=512_l=8_h=4-1.0/params.txt",
    "uuid": "b426fc01-4b92-47ee-9596-ab5226f2e046",
    "creation_date": "2023_12_13-16_17_51"
}