{
    "name": "rw_original-open_lm_1b-16.0",
    "dataset_name": "rw_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf7",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 460734464000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 1,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp",
            "--fsdp-limit-all-gathers"
        ],
        "chinchilla_multiplier": 16.0,
        "seed": 124
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--workers",
        "2",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--accum-freq",
        "1",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--logs",
        "./logs/1765",
        "--train-num-samples",
        "92146892800",
        "--dataset-manifest",
        "<scrub>/openlm/scrub/datasets/refined_web_tokenized/manifest.jsonl",
        "--data-key",
        "json.gz",
        "--name",
        "rw_original-open_lm_1b-16.0",
        "--fsdp",
        "--fsdp-amp",
        "--fsdp-limit-all-gathers",
        "--val-data",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-data-key",
        "json",
        "txt",
        "--val-tok-ci",
        "--val-seq-ci",
        "--val-max-pop-ci",
        "300000",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/openlm/scrub/experiments/1b_16x_rpj-original/"
    ],
    "results": [
        {
            "loss": 2.2654811163743336,
            "data_time": 0.34142908453941345,
            "batch_time": 2.2188607156276703,
            "samples_per_second": 162735.95992433978,
            "samples_per_second_per_gpu": 20341.994990542473,
            "loss_sequences_lower_95": 2.2125960095723474,
            "loss_sequences_upper_95": 2.3199559593200685,
            "loss_tokens_lower_95": 2.2543850135803223,
            "loss_tokens_upper_95": 2.2766862297058106,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.531392897965929,
            "data_time": 0.005331478658576818,
            "batch_time": 0.5992026413757789,
            "samples_per_second": 438103.9252836142,
            "samples_per_second_per_gpu": 54762.990660451775,
            "loss_sequences_lower_95": 2.528951406781256,
            "loss_sequences_upper_95": 2.5338188169860825,
            "loss_tokens_lower_95": 2.521948104166667,
            "loss_tokens_upper_95": 2.5408021666666665,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3216477827149995,
            "data_time": 0.1441918909549713,
            "batch_time": 0.7623218819499016,
            "samples_per_second": 357924.0982191069,
            "samples_per_second_per_gpu": 44740.51227738836,
            "loss_sequences_lower_95": 2.2522335753148917,
            "loss_sequences_upper_95": 2.4133957718829717,
            "loss_tokens_lower_95": 2.310211864583333,
            "loss_tokens_upper_95": 2.3327946458333333,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.692857653676849,
            "data_time": 0.018543467709892673,
            "batch_time": 0.6032172747348484,
            "samples_per_second": 438540.9060137542,
            "samples_per_second_per_gpu": 54817.613251719275,
            "loss_sequences_lower_95": 2.63636440882732,
            "loss_sequences_upper_95": 2.7498906803721006,
            "loss_tokens_lower_95": 2.6803356458333334,
            "loss_tokens_upper_95": 2.7056223020833334,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6349414001899922,
            "data_time": 0.1427946537733078,
            "batch_time": 0.7051702961325645,
            "samples_per_second": 390254.57768331317,
            "samples_per_second_per_gpu": 48781.822210414146,
            "loss_sequences_lower_95": 2.558082639612637,
            "loss_sequences_upper_95": 2.7341213746857496,
            "loss_tokens_lower_95": 2.624170984375,
            "loss_tokens_upper_95": 2.6457909010416665,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.606223017889006,
            "data_time": 0.04782381157080332,
            "batch_time": 0.6096635833382607,
            "samples_per_second": 426183.07611607155,
            "samples_per_second_per_gpu": 53272.88451450894,
            "loss_sequences_lower_95": 2.552115525816834,
            "loss_sequences_upper_95": 2.6698567901964014,
            "loss_tokens_lower_95": 2.5953504947916666,
            "loss_tokens_upper_95": 2.617102640625,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.124961083373245,
            "data_time": 0.016645102947950362,
            "batch_time": 0.5791626095771789,
            "samples_per_second": 435240.12296509725,
            "samples_per_second_per_gpu": 54405.01537063716,
            "loss_sequences_lower_95": 2.098287149234694,
            "loss_sequences_upper_95": 2.15222509765625,
            "loss_tokens_lower_95": 2.1128336666666665,
            "loss_tokens_upper_95": 2.13724915625,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.0726435661315916,
            "data_time": 0.017240598797798157,
            "batch_time": 0.5947683108480353,
            "samples_per_second": 437248.4119882769,
            "samples_per_second_per_gpu": 54656.05149853461,
            "loss_sequences_lower_95": 3.0398515318226442,
            "loss_sequences_upper_95": 3.107768498445681,
            "loss_tokens_lower_95": 3.061362984375,
            "loss_tokens_upper_95": 3.0839532604166666,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7065925210472046,
            "data_time": 0.13628413528203964,
            "batch_time": 0.700463205575943,
            "samples_per_second": 390974.22330161004,
            "samples_per_second_per_gpu": 48871.777912701255,
            "loss_sequences_lower_95": 2.593645762621872,
            "loss_sequences_upper_95": 2.846826798353738,
            "loss_tokens_lower_95": 2.6951056510416667,
            "loss_tokens_upper_95": 2.71802803125,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.6826510434094155,
            "data_time": 0.13795724511146545,
            "batch_time": 0.7176346182823181,
            "samples_per_second": 391239.2839433758,
            "samples_per_second_per_gpu": 48904.910492921976,
            "loss_sequences_lower_95": 3.527628045967916,
            "loss_sequences_upper_95": 3.868691811542737,
            "loss_tokens_lower_95": 3.6686663229166667,
            "loss_tokens_upper_95": 3.6964348437499996,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7157944728858636,
            "data_time": 0.012619007764191463,
            "batch_time": 0.5913525357328612,
            "samples_per_second": 438937.5233107092,
            "samples_per_second_per_gpu": 54867.19041383865,
            "loss_sequences_lower_95": 2.7001365407551905,
            "loss_sequences_upper_95": 2.732435492272081,
            "loss_tokens_lower_95": 2.7047745520833333,
            "loss_tokens_upper_95": 2.72651515625,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.552498603949493,
            "data_time": 0.03077756017446518,
            "batch_time": 0.5825924545526504,
            "samples_per_second": 431187.52672133816,
            "samples_per_second_per_gpu": 53898.44084016727,
            "loss_sequences_lower_95": 2.5168220160951815,
            "loss_sequences_upper_95": 2.589618934208728,
            "loss_tokens_lower_95": 2.541193177083333,
            "loss_tokens_upper_95": 2.5636095885416665,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1353666850084223,
            "data_time": 0.1345062330365181,
            "batch_time": 0.6995844095945358,
            "samples_per_second": 391690.3001481306,
            "samples_per_second_per_gpu": 48961.28751851633,
            "loss_sequences_lower_95": 3.020494918358979,
            "loss_sequences_upper_95": 3.2784721451888945,
            "loss_tokens_lower_95": 3.1229945989583334,
            "loss_tokens_upper_95": 3.1478034843750002,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.376242092087895,
            "data_time": 0.1279933974146843,
            "batch_time": 0.6903859823942184,
            "samples_per_second": 393403.54307436704,
            "samples_per_second_per_gpu": 49175.44288429588,
            "loss_sequences_lower_95": 2.271997865882765,
            "loss_sequences_upper_95": 2.508002927201343,
            "loss_tokens_lower_95": 2.3651218437500003,
            "loss_tokens_upper_95": 2.3877795677083333,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.932269437746568,
            "data_time": 0.21661536395549774,
            "batch_time": 0.33207982778549194,
            "samples_per_second": 235264.5383340937,
            "samples_per_second_per_gpu": 29408.067291761712,
            "loss_sequences_lower_95": 2.842672343687578,
            "loss_sequences_upper_95": 3.0578813899647104,
            "loss_tokens_lower_95": 2.912741201574152,
            "loss_tokens_upper_95": 2.9522432587363503,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4545675206809974,
            "data_time": 0.1332903951406479,
            "batch_time": 0.531456783413887,
            "samples_per_second": 381029.15701434115,
            "samples_per_second_per_gpu": 47628.644626792644,
            "loss_sequences_lower_95": 2.3920911738893382,
            "loss_sequences_upper_95": 2.518871901264691,
            "loss_tokens_lower_95": 2.443575140625,
            "loss_tokens_upper_95": 2.46572690625,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.733554394704371,
            "data_time": 0.1345115453004837,
            "batch_time": 0.5665647983551025,
            "samples_per_second": 394976.055365574,
            "samples_per_second_per_gpu": 49372.00692069675,
            "loss_sequences_lower_95": 4.628020161248763,
            "loss_sequences_upper_95": 4.879080707479592,
            "loss_tokens_lower_95": 4.7212465,
            "loss_tokens_upper_95": 4.745752145833333,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6672858507906803,
            "data_time": 0.2600373923778534,
            "batch_time": 0.5371103584766388,
            "samples_per_second": 304256.7310691832,
            "samples_per_second_per_gpu": 38032.0913836479,
            "loss_sequences_lower_95": 2.485702333293977,
            "loss_sequences_upper_95": 3.0011967893506664,
            "loss_tokens_lower_95": 2.6545608770651894,
            "loss_tokens_upper_95": 2.680153781077901,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.675899848870412,
            "data_time": 0.008723026514053345,
            "batch_time": 0.5902118625965985,
            "samples_per_second": 444769.4393656237,
            "samples_per_second_per_gpu": 55596.17992070296,
            "loss_sequences_lower_95": 1.6710422513152507,
            "loss_sequences_upper_95": 1.6807607590175901,
            "loss_tokens_lower_95": 1.6711336832137516,
            "loss_tokens_upper_95": 1.6807166600088128,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.225780175112557,
            "data_time": 0.010639571398496629,
            "batch_time": 0.5838633548468352,
            "samples_per_second": 442169.8539071757,
            "samples_per_second_per_gpu": 55271.231738396964,
            "loss_sequences_lower_95": 2.238322235089188,
            "loss_sequences_upper_95": 2.261480264335105,
            "loss_tokens_lower_95": 2.216320561330686,
            "loss_tokens_upper_95": 2.2328980196157904,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4310521594634527,
            "data_time": 0.03454465005132887,
            "batch_time": 0.5692567427953085,
            "samples_per_second": 432885.27602775086,
            "samples_per_second_per_gpu": 54110.65950346886,
            "loss_sequences_lower_95": 1.788450487773825,
            "loss_sequences_upper_95": 1.9976878602717585,
            "loss_tokens_lower_95": 1.3371520952466047,
            "loss_tokens_upper_95": 1.477459357595455,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7542452483574549,
            "data_time": 0.027699757367372513,
            "batch_time": 0.5961874984204769,
            "samples_per_second": 438025.8072638709,
            "samples_per_second_per_gpu": 54753.22590798386,
            "loss_sequences_lower_95": 1.7065068359375,
            "loss_sequences_upper_95": 1.857771378580729,
            "loss_tokens_lower_95": 1.702780906053459,
            "loss_tokens_upper_95": 1.8186904296875002,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5894797067013178,
            "data_time": 0.05096760640541712,
            "batch_time": 0.5536668077111244,
            "samples_per_second": 417976.0996632269,
            "samples_per_second_per_gpu": 52247.012457903365,
            "loss_sequences_lower_95": 1.6620248234931767,
            "loss_sequences_upper_95": 1.7036781386230098,
            "loss_tokens_lower_95": 1.5694252186229551,
            "loss_tokens_upper_95": 1.5953752472836609,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5255215200510892,
            "data_time": 0.2757440507411957,
            "batch_time": 0.7806315273046494,
            "samples_per_second": 329494.92742451746,
            "samples_per_second_per_gpu": 41186.86592806468,
            "loss_sequences_lower_95": 1.542969662059437,
            "loss_sequences_upper_95": 1.625278091430664,
            "loss_tokens_lower_95": 1.5010254152285096,
            "loss_tokens_upper_95": 1.5401310835621789,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.129580386804075,
            "data_time": 0.2894502133131027,
            "batch_time": 0.84846992790699,
            "samples_per_second": 334866.50508336554,
            "samples_per_second_per_gpu": 41858.31313542069,
            "loss_sequences_lower_95": 2.1240496141083383,
            "loss_sequences_upper_95": 2.2684203354193238,
            "loss_tokens_lower_95": 2.0950617178243482,
            "loss_tokens_upper_95": 2.172919118541596,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2520292091369627,
            "data_time": 0.14167465269565582,
            "batch_time": 0.4886198788881302,
            "samples_per_second": 366229.3613284691,
            "samples_per_second_per_gpu": 45778.67016605864,
            "loss_sequences_lower_95": 2.25751676940918,
            "loss_sequences_upper_95": 2.3453138987223308,
            "loss_tokens_lower_95": 2.173922816254153,
            "loss_tokens_upper_95": 2.3239737153495343,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.0242143042426277,
            "data_time": 0.007376712560653686,
            "batch_time": 0.5870233181864023,
            "samples_per_second": 444392.7289622317,
            "samples_per_second_per_gpu": 55549.091120278965,
            "loss_sequences_lower_95": 3.062691025832267,
            "loss_sequences_upper_95": 3.1391206343268663,
            "loss_tokens_lower_95": 2.974518059943738,
            "loss_tokens_upper_95": 3.0508539940841657,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2193492059454774,
            "data_time": 0.03375618308782578,
            "batch_time": 0.5780183389782906,
            "samples_per_second": 430807.4637350604,
            "samples_per_second_per_gpu": 53850.93296688255,
            "loss_sequences_lower_95": 2.779165038034972,
            "loss_sequences_upper_95": 2.983420400908499,
            "loss_tokens_lower_95": 2.138854487087378,
            "loss_tokens_upper_95": 2.239824253120388,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.405694637892596,
            "data_time": 0.05970706939697266,
            "batch_time": 0.5965435832738877,
            "samples_per_second": 421231.8886560866,
            "samples_per_second_per_gpu": 52653.98608201082,
            "loss_sequences_lower_95": 2.772239320514145,
            "loss_sequences_upper_95": 3.012922475769251,
            "loss_tokens_lower_95": 2.3265049311836985,
            "loss_tokens_upper_95": 2.450208232494353,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.863297288276289,
            "data_time": 0.29953132569789886,
            "batch_time": 0.80275958776474,
            "samples_per_second": 324434.4779908096,
            "samples_per_second_per_gpu": 40554.3097488512,
            "loss_sequences_lower_95": 4.769302200944456,
            "loss_sequences_upper_95": 4.95675646638217,
            "loss_tokens_lower_95": 4.771036031365939,
            "loss_tokens_upper_95": 4.956662586508276,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.441112084388733,
            "data_time": 0.24532409012317657,
            "batch_time": 0.4823460876941681,
            "samples_per_second": 284224.7722640032,
            "samples_per_second_per_gpu": 35528.0965330004,
            "loss_sequences_lower_95": 2.350277294158935,
            "loss_sequences_upper_95": 2.6807771301269527,
            "loss_tokens_lower_95": 2.2447132909020713,
            "loss_tokens_upper_95": 2.612305959520698,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5678716339742882,
            "data_time": 0.039432113990187645,
            "batch_time": 0.5964769832789898,
            "samples_per_second": 431796.86785191594,
            "samples_per_second_per_gpu": 53974.60848148949,
            "loss_sequences_lower_95": 1.5573089443429344,
            "loss_sequences_upper_95": 1.578847819926673,
            "loss_tokens_lower_95": 1.5572102854171332,
            "loss_tokens_upper_95": 1.578634053194169,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8284419662731775,
            "data_time": 0.061797702312469484,
            "batch_time": 0.6194039136171341,
            "samples_per_second": 423039.80754214234,
            "samples_per_second_per_gpu": 52879.97594276779,
            "loss_sequences_lower_95": 1.8151009524483646,
            "loss_sequences_upper_95": 1.84175595995054,
            "loss_tokens_lower_95": 1.8156023705034936,
            "loss_tokens_upper_95": 1.8416741698418944,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4189475131372116,
            "data_time": 0.03929598070681095,
            "batch_time": 0.5643627941608429,
            "samples_per_second": 426390.14707217045,
            "samples_per_second_per_gpu": 53298.76838402131,
            "loss_sequences_lower_95": 2.684845448889333,
            "loss_sequences_upper_95": 2.8165646159739697,
            "loss_tokens_lower_95": 2.382695446459861,
            "loss_tokens_upper_95": 2.434588538863614,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.979525688648224,
            "data_time": 0.15379026532173157,
            "batch_time": 0.7225649878382683,
            "samples_per_second": 390561.29425595666,
            "samples_per_second_per_gpu": 48820.16178199458,
            "loss_sequences_lower_95": 4.396637963867188,
            "loss_sequences_upper_95": 4.924520068359374,
            "loss_tokens_lower_95": 3.7573496022656765,
            "loss_tokens_upper_95": 4.098339229762988,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.399207815527916,
            "data_time": 0.22740136086940765,
            "batch_time": 0.31559114158153534,
            "samples_per_second": 215813.04717365914,
            "samples_per_second_per_gpu": 26976.630896707393,
            "loss_sequences_lower_95": 2.1992893755435943,
            "loss_sequences_upper_95": 2.619278198480606,
            "loss_tokens_lower_95": 2.040398064975081,
            "loss_tokens_upper_95": 2.6818996999455593,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8916332187323737,
            "data_time": 0.2681870609521866,
            "batch_time": 0.6724215596914291,
            "samples_per_second": 314831.5077489193,
            "samples_per_second_per_gpu": 39353.93846861491,
            "loss_sequences_lower_95": 4.016543096783518,
            "loss_sequences_upper_95": 4.668320991253031,
            "loss_tokens_lower_95": 2.6292236603368093,
            "loss_tokens_upper_95": 2.973831593062993,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7436538247262197,
            "data_time": 0.03514680597517225,
            "batch_time": 0.6136443697743945,
            "samples_per_second": 435044.9336417618,
            "samples_per_second_per_gpu": 54380.61670522022,
            "loss_sequences_lower_95": 1.7232116805876951,
            "loss_sequences_upper_95": 1.7643342304354726,
            "loss_tokens_lower_95": 1.7234489470915588,
            "loss_tokens_upper_95": 1.764671592995713,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2369745869999913,
            "data_time": 0.017784319463230315,
            "batch_time": 0.5777359888667152,
            "samples_per_second": 437405.2670009456,
            "samples_per_second_per_gpu": 54675.6583751182,
            "loss_sequences_lower_95": 1.245211079540286,
            "loss_sequences_upper_95": 1.3320843848167936,
            "loss_tokens_lower_95": 1.1889693540767123,
            "loss_tokens_upper_95": 1.2733517303633681,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3551732891208523,
            "data_time": 0.1573091745376587,
            "batch_time": 0.47557390481233597,
            "samples_per_second": 324962.05902078224,
            "samples_per_second_per_gpu": 40620.25737759778,
            "loss_sequences_lower_95": 2.309639681302584,
            "loss_sequences_upper_95": 2.7290106189993275,
            "loss_tokens_lower_95": 2.196983045212766,
            "loss_tokens_upper_95": 2.456600124983739,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9647291013360872,
            "data_time": 0.059774792194366454,
            "batch_time": 0.6371500343084335,
            "samples_per_second": 424541.1110948169,
            "samples_per_second_per_gpu": 53067.63888685211,
            "loss_sequences_lower_95": 3.0913389330881267,
            "loss_sequences_upper_95": 3.2534323724687995,
            "loss_tokens_lower_95": 2.8950361109797864,
            "loss_tokens_upper_95": 3.029248120745083,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.65532289045613,
            "data_time": 0.26390691101551056,
            "batch_time": 0.6454712748527527,
            "samples_per_second": 312969.66564502555,
            "samples_per_second_per_gpu": 39121.208205628194,
            "loss_sequences_lower_95": 1.6616075004019388,
            "loss_sequences_upper_95": 2.041832421465618,
            "loss_tokens_lower_95": 1.5163301551905968,
            "loss_tokens_upper_95": 1.7632263233377727,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.162252563122488,
            "data_time": 0.010765436301037061,
            "batch_time": 0.5834200371021762,
            "samples_per_second": 440113.2705419865,
            "samples_per_second_per_gpu": 55014.15881774831,
            "loss_sequences_lower_95": 5.148797454803461,
            "loss_sequences_upper_95": 5.175361029237097,
            "loss_tokens_lower_95": 5.148743313115748,
            "loss_tokens_upper_95": 5.175662368801885,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0492374706036836,
            "data_time": 0.24041809141635895,
            "batch_time": 0.4818008840084076,
            "samples_per_second": 287776.954246598,
            "samples_per_second_per_gpu": 35972.11928082475,
            "loss_sequences_lower_95": 1.0426852068854766,
            "loss_sequences_upper_95": 1.1718376548544875,
            "loss_tokens_lower_95": 0.9145200637614349,
            "loss_tokens_upper_95": 1.1450832839387954,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.8678360091065462,
            "data_time": 0.005513783494631449,
            "batch_time": 0.5849963392814,
            "samples_per_second": 445234.3748135861,
            "samples_per_second_per_gpu": 55654.296851698266,
            "loss_sequences_lower_95": 4.67681938507272,
            "loss_sequences_upper_95": 4.719310581679114,
            "loss_tokens_lower_95": 3.7774986399903288,
            "loss_tokens_upper_95": 3.81997791344294,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.631687201976776,
            "data_time": 0.07709964737296104,
            "batch_time": 0.644970428198576,
            "samples_per_second": 419688.3264949654,
            "samples_per_second_per_gpu": 52461.04081187068,
            "loss_sequences_lower_95": 4.781609301757813,
            "loss_sequences_upper_95": 5.011176586914063,
            "loss_tokens_lower_95": 4.493987881335194,
            "loss_tokens_upper_95": 4.707789332130946,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8228303422098575,
            "data_time": 0.28285758197307587,
            "batch_time": 0.8082152009010315,
            "samples_per_second": 332545.069791808,
            "samples_per_second_per_gpu": 41568.133723976,
            "loss_sequences_lower_95": 1.785124057272206,
            "loss_sequences_upper_95": 1.8587518907629925,
            "loss_tokens_lower_95": 1.7860451142684273,
            "loss_tokens_upper_95": 1.8594742517885954,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.033296949755062,
            "data_time": 0.05085711429516474,
            "batch_time": 0.5526282290617625,
            "samples_per_second": 419189.45143184136,
            "samples_per_second_per_gpu": 52398.68142898017,
            "loss_sequences_lower_95": 3.99808867483428,
            "loss_sequences_upper_95": 4.0678178544477985,
            "loss_tokens_lower_95": 3.9987704467773435,
            "loss_tokens_upper_95": 4.067533828272964,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0732207543055217,
            "data_time": 0.05573208381732305,
            "batch_time": 0.6246119265755018,
            "samples_per_second": 427780.92294769385,
            "samples_per_second_per_gpu": 53472.61536846173,
            "loss_sequences_lower_95": 1.1237738179524739,
            "loss_sequences_upper_95": 1.1573165140787762,
            "loss_tokens_lower_95": 1.0388331348164266,
            "loss_tokens_upper_95": 1.1011269468724991,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.697541394687835,
            "data_time": 0.27217842638492584,
            "batch_time": 0.7545260041952133,
            "samples_per_second": 326946.86866547837,
            "samples_per_second_per_gpu": 40868.358583184796,
            "loss_sequences_lower_95": 4.363454197474889,
            "loss_sequences_upper_95": 5.050572655087426,
            "loss_tokens_lower_95": 4.361674208868118,
            "loss_tokens_upper_95": 5.046369745163691,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2304450869560242,
            "data_time": 0.2277401089668274,
            "batch_time": 0.3160416781902313,
            "samples_per_second": 215508.26948973563,
            "samples_per_second_per_gpu": 26938.533686216953,
            "loss_sequences_lower_95": 1.1105958431959153,
            "loss_sequences_upper_95": 1.5269788086414335,
            "loss_tokens_lower_95": 0.9459087190923002,
            "loss_tokens_upper_95": 1.246798866507933,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.462788917064667,
            "data_time": 0.07544713094830513,
            "batch_time": 0.643154688179493,
            "samples_per_second": 420040.38037383463,
            "samples_per_second_per_gpu": 52505.04754672933,
            "loss_sequences_lower_95": 6.465380773925781,
            "loss_sequences_upper_95": 6.785299169921875,
            "loss_tokens_lower_95": 6.323181038741937,
            "loss_tokens_upper_95": 6.595872970038864,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.469625491142273,
            "data_time": 0.07355302944779396,
            "batch_time": 0.6416403688490391,
            "samples_per_second": 420163.5931285959,
            "samples_per_second_per_gpu": 52520.44914107449,
            "loss_sequences_lower_95": 6.782216308593751,
            "loss_sequences_upper_95": 7.025488037109375,
            "loss_tokens_lower_95": 6.330400226805051,
            "loss_tokens_upper_95": 6.573439798369429,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.955461431260961,
            "data_time": 0.028384221096833546,
            "batch_time": 0.5938219067951044,
            "samples_per_second": 437616.9739627919,
            "samples_per_second_per_gpu": 54702.12174534899,
            "loss_sequences_lower_95": 5.933239954795927,
            "loss_sequences_upper_95": 5.978043106902657,
            "loss_tokens_lower_95": 5.93331189566397,
            "loss_tokens_upper_95": 5.977706335248701,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9073698912531183,
            "data_time": 0.09886908531188965,
            "batch_time": 0.5935380657513937,
            "samples_per_second": 408832.82036741293,
            "samples_per_second_per_gpu": 51104.10254592662,
            "loss_sequences_lower_95": 1.884467321533578,
            "loss_sequences_upper_95": 1.9307558027463758,
            "loss_tokens_lower_95": 1.8844861993042554,
            "loss_tokens_upper_95": 1.9306059712821622,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.6873247761726375,
            "data_time": 0.07465143501758575,
            "batch_time": 0.6446701399981976,
            "samples_per_second": 418628.66402816377,
            "samples_per_second_per_gpu": 52328.58300352047,
            "loss_sequences_lower_95": 6.628155615234375,
            "loss_sequences_upper_95": 6.7489585815429685,
            "loss_tokens_lower_95": 6.6282302734375005,
            "loss_tokens_upper_95": 6.74665107421875,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.814600751833072,
            "data_time": 0.010488494166306086,
            "batch_time": 0.5837822872258368,
            "samples_per_second": 443533.3057730726,
            "samples_per_second_per_gpu": 55441.66322163407,
            "loss_sequences_lower_95": 2.5928991987937557,
            "loss_sequences_upper_95": 2.6626937834451865,
            "loss_tokens_lower_95": 1.7257104207879217,
            "loss_tokens_upper_95": 1.7723560496136908,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9083562835828582,
            "data_time": 0.16545126267841884,
            "batch_time": 0.5205434475626264,
            "samples_per_second": 327051.9552604653,
            "samples_per_second_per_gpu": 40881.494407558166,
            "loss_sequences_lower_95": 1.8716829385330427,
            "loss_sequences_upper_95": 1.945856601088794,
            "loss_tokens_lower_95": 1.8719817773619694,
            "loss_tokens_upper_95": 1.945889583986197,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8595558493745092,
            "data_time": 0.14354681968688965,
            "batch_time": 0.7233152464032173,
            "samples_per_second": 392963.5776519556,
            "samples_per_second_per_gpu": 49120.44720649445,
            "loss_sequences_lower_95": 1.8280356732536764,
            "loss_sequences_upper_95": 1.8908069266524967,
            "loss_tokens_lower_95": 1.8282191497204352,
            "loss_tokens_upper_95": 1.8901529558967143,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.656560125468026,
            "data_time": 0.012545166537165642,
            "batch_time": 0.5822978462092578,
            "samples_per_second": 440858.6260104018,
            "samples_per_second_per_gpu": 55107.32825130023,
            "loss_sequences_lower_95": 3.864504834002725,
            "loss_sequences_upper_95": 3.970314261555806,
            "loss_tokens_lower_95": 2.5279157105230596,
            "loss_tokens_upper_95": 2.598465554198592,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.539645326200616,
            "data_time": 0.2682514488697052,
            "batch_time": 0.7055434584617615,
            "samples_per_second": 320502.2402946415,
            "samples_per_second_per_gpu": 40062.78003683019,
            "loss_sequences_lower_95": 5.4421890823929395,
            "loss_sequences_upper_95": 5.637856167586392,
            "loss_tokens_lower_95": 5.442203582279266,
            "loss_tokens_upper_95": 5.636219585761822,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.278783689945116,
            "data_time": 0.026691224712591905,
            "batch_time": 0.6004346276705081,
            "samples_per_second": 437221.3228935313,
            "samples_per_second_per_gpu": 54652.66536169141,
            "loss_sequences_lower_95": 3.2494766132693043,
            "loss_sequences_upper_95": 3.3091264767894684,
            "loss_tokens_lower_95": 3.2496746658185205,
            "loss_tokens_upper_95": 3.308295905903574,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0125702613765752,
            "data_time": 0.29458199441432953,
            "batch_time": 0.7719478607177734,
            "samples_per_second": 319162.9822586848,
            "samples_per_second_per_gpu": 39895.3727823356,
            "loss_sequences_lower_95": 1.9730394900423809,
            "loss_sequences_upper_95": 2.0532983724353384,
            "loss_tokens_lower_95": 1.973888749057807,
            "loss_tokens_upper_95": 2.052847108563173,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.087700714667638,
            "data_time": 0.22679556906223297,
            "batch_time": 0.375978946685791,
            "samples_per_second": 256831.44652053216,
            "samples_per_second_per_gpu": 32103.93081506652,
            "loss_sequences_lower_95": 1.0044731044769286,
            "loss_sequences_upper_95": 1.3292625586191813,
            "loss_tokens_lower_95": 0.8908104684617784,
            "loss_tokens_upper_95": 1.2335786501566568,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.9032248665889104,
            "data_time": 0.23101897537708282,
            "batch_time": 0.380053848028183,
            "samples_per_second": 256534.29782646435,
            "samples_per_second_per_gpu": 32066.787228308043,
            "loss_sequences_lower_95": 0.8951848840713501,
            "loss_sequences_upper_95": 1.1824977811177568,
            "loss_tokens_lower_95": 0.7190723440620337,
            "loss_tokens_upper_95": 1.0444947403468443,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.12499104936625,
            "data_time": 0.024064786732196808,
            "batch_time": 0.5782669835856983,
            "samples_per_second": 435614.80631727714,
            "samples_per_second_per_gpu": 54451.85078965964,
            "loss_sequences_lower_95": 4.106101674682438,
            "loss_sequences_upper_95": 4.144133994730303,
            "loss_tokens_lower_95": 4.105960145031757,
            "loss_tokens_upper_95": 4.143896232683634,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.26382788349176606,
            "data_time": 0.004958513131828808,
            "batch_time": 0.5883825628257735,
            "samples_per_second": 445037.97293297015,
            "samples_per_second_per_gpu": 55629.74661662127,
            "loss_sequences_lower_95": 0.3860644646450049,
            "loss_sequences_upper_95": 0.39861496150114545,
            "loss_tokens_lower_95": 0.2536045941710578,
            "loss_tokens_upper_95": 0.25969945897947094,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0133601229960525,
            "data_time": 0.27767668664455414,
            "batch_time": 0.562614694237709,
            "samples_per_second": 305142.243109726,
            "samples_per_second_per_gpu": 38142.78038871575,
            "loss_sequences_lower_95": 2.052080313614973,
            "loss_sequences_upper_95": 2.3432057478296473,
            "loss_tokens_lower_95": 1.9077619477180439,
            "loss_tokens_upper_95": 2.0885827525773,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.014728385048944,
            "data_time": 0.2274097353219986,
            "batch_time": 0.32648690044879913,
            "samples_per_second": 223721.45083361305,
            "samples_per_second_per_gpu": 27965.18135420163,
            "loss_sequences_lower_95": 3.4661206632047086,
            "loss_sequences_upper_95": 4.655147036990604,
            "loss_tokens_lower_95": 3.1032393514374155,
            "loss_tokens_upper_95": 4.850897292148919,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9318442228363781,
            "data_time": 0.26378293335437775,
            "batch_time": 0.6437793672084808,
            "samples_per_second": 314212.7254513298,
            "samples_per_second_per_gpu": 39276.59068141622,
            "loss_sequences_lower_95": 2.0436414439503743,
            "loss_sequences_upper_95": 2.2890282886784252,
            "loss_tokens_lower_95": 1.8298287401768818,
            "loss_tokens_upper_95": 1.9790283983389403,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.045209742173916,
            "data_time": 0.2659405618906021,
            "batch_time": 0.6459241509437561,
            "samples_per_second": 314130.56816281227,
            "samples_per_second_per_gpu": 39266.321020351534,
            "loss_sequences_lower_95": 2.1771236698801926,
            "loss_sequences_upper_95": 2.4064360688372353,
            "loss_tokens_lower_95": 1.9523866170509852,
            "loss_tokens_upper_95": 2.0831434741757797,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9187600758017562,
            "data_time": 0.2630939334630966,
            "batch_time": 0.6431385725736618,
            "samples_per_second": 314188.7223604447,
            "samples_per_second_per_gpu": 39273.59029505559,
            "loss_sequences_lower_95": 1.9551524836842609,
            "loss_sequences_upper_95": 2.2235765689756812,
            "loss_tokens_lower_95": 1.8111763970066308,
            "loss_tokens_upper_95": 2.001485161893734,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1812258326425784,
            "data_time": 0.2612014561891556,
            "batch_time": 0.6410582661628723,
            "samples_per_second": 314540.85924687126,
            "samples_per_second_per_gpu": 39317.60740585891,
            "loss_sequences_lower_95": 2.2746352265520793,
            "loss_sequences_upper_95": 2.496959518804783,
            "loss_tokens_lower_95": 2.090382261662468,
            "loss_tokens_upper_95": 2.213227791949596,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9672680441637216,
            "data_time": 0.2603752017021179,
            "batch_time": 0.6332740783691406,
            "samples_per_second": 312928.469519728,
            "samples_per_second_per_gpu": 39116.058689966,
            "loss_sequences_lower_95": 2.042881220468083,
            "loss_sequences_upper_95": 2.225341000764266,
            "loss_tokens_lower_95": 1.8899141116280347,
            "loss_tokens_upper_95": 1.9892184643718924,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7854654054816177,
            "data_time": 0.259333536028862,
            "batch_time": 0.6397641748189926,
            "samples_per_second": 314942.2726456043,
            "samples_per_second_per_gpu": 39367.784080700534,
            "loss_sequences_lower_95": 1.9823821091070406,
            "loss_sequences_upper_95": 2.2017032204604727,
            "loss_tokens_lower_95": 1.7042341106982746,
            "loss_tokens_upper_95": 1.7993948108565023,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/params.txt",
    "uuid": "6520e2d5-7160-4b01-8893-210fcaf5b5d4",
    "creation_date": "2024_01_29-13_20_48"
}