{
    "name": "c4_original-open_lm_1b-1.0",
    "dataset_name": "c4_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf1",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 28795904000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 2,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp"
        ],
        "chinchilla_multiplier": 1.0
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--train-num-samples",
        "5759180800",
        "--workers",
        "2",
        "--dataset-manifest",
        "<scrub>/original_c4/manifest.jsonl",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--data-key",
        "txt",
        "--accum-freq",
        "2",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--name",
        "c4_original-open_lm_1b-1.0",
        "--logs",
        "/admin/<scrub>/scrub_logs",
        "--val-data",
        "/admin/<scrub>/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/admin/<scrub>/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-batch-size",
        "8",
        "--val-data-key",
        "json",
        "txt",
        "--val-num-samples",
        "245760",
        "--fsdp",
        "--fsdp-amp",
        "--report-to",
        "wandb",
        "--wandb-project-name",
        "scrub",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/scrub_experiments_v3"
    ],
    "results": [
        {
            "loss": 3.318723158041636,
            "data_time": 0.27494171261787415,
            "batch_time": 2.114211395382881,
            "samples_per_second": 163904.45701394073,
            "samples_per_second_per_gpu": 20488.05712674259,
            "loss_sequences_lower_95": 3.2131125640869143,
            "loss_sequences_upper_95": 3.42649959564209,
            "loss_tokens_lower_95": 3.304539731343587,
            "loss_tokens_upper_95": 3.3331171099344887,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6568587118096136,
            "data_time": 0.002754760140612863,
            "batch_time": 0.29374137860835414,
            "samples_per_second": 447259.8801624324,
            "samples_per_second_per_gpu": 55907.48502030405,
            "loss_sequences_lower_95": 2.6540212881180953,
            "loss_sequences_upper_95": 2.6597529961749546,
            "loss_tokens_lower_95": 2.6469764427083335,
            "loss_tokens_upper_95": 2.666821286458333,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2814074793640446,
            "data_time": 0.05601593852043152,
            "batch_time": 0.3574990816414356,
            "samples_per_second": 394383.13840481493,
            "samples_per_second_per_gpu": 49297.89230060187,
            "loss_sequences_lower_95": 3.2618323314433195,
            "loss_sequences_upper_95": 3.30135775196309,
            "loss_tokens_lower_95": 3.2667490520833335,
            "loss_tokens_upper_95": 3.296442057291667,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.630916426796274,
            "data_time": 0.007303647696971893,
            "batch_time": 0.2954754943126126,
            "samples_per_second": 446993.2832386725,
            "samples_per_second_per_gpu": 55874.16040483406,
            "loss_sequences_lower_95": 2.6217221730025773,
            "loss_sequences_upper_95": 2.640008064191366,
            "loss_tokens_lower_95": 2.621522703125,
            "loss_tokens_upper_95": 2.640342973958333,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.668572883741919,
            "data_time": 0.05475934222340584,
            "batch_time": 0.3372012861073017,
            "samples_per_second": 413651.62313245423,
            "samples_per_second_per_gpu": 51706.45289155678,
            "loss_sequences_lower_95": 2.634209353569326,
            "loss_sequences_upper_95": 2.7021321780327137,
            "loss_tokens_lower_95": 2.6588849895833335,
            "loss_tokens_upper_95": 2.67836046875,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1566325977981537,
            "data_time": 0.019225746393203735,
            "batch_time": 0.30333265165487927,
            "samples_per_second": 433465.78986833926,
            "samples_per_second_per_gpu": 54183.22373354241,
            "loss_sequences_lower_95": 3.1231643728623175,
            "loss_sequences_upper_95": 3.191095074433846,
            "loss_tokens_lower_95": 3.144790463541667,
            "loss_tokens_upper_95": 3.168438098958333,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.971003735625014,
            "data_time": 0.00713828091438,
            "batch_time": 0.29100643977140767,
            "samples_per_second": 445580.5752808365,
            "samples_per_second_per_gpu": 55697.57191010456,
            "loss_sequences_lower_95": 2.9409653818558676,
            "loss_sequences_upper_95": 3.0005575474330355,
            "loss_tokens_lower_95": 2.956827234375,
            "loss_tokens_upper_95": 2.9851680885416667,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.4221912470912432,
            "data_time": 0.007566136357031371,
            "batch_time": 0.2917332559039718,
            "samples_per_second": 444973.4685015975,
            "samples_per_second_per_gpu": 55621.68356269969,
            "loss_sequences_lower_95": 3.4148365714986912,
            "loss_sequences_upper_95": 3.429678925678992,
            "loss_tokens_lower_95": 3.4107384739583333,
            "loss_tokens_upper_95": 3.434050265625,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1135703984314835,
            "data_time": 0.05451476573944092,
            "batch_time": 0.3333250395953655,
            "samples_per_second": 415219.2446884618,
            "samples_per_second_per_gpu": 51902.40558605773,
            "loss_sequences_lower_95": 3.070291894432006,
            "loss_sequences_upper_95": 3.1573144680116236,
            "loss_tokens_lower_95": 3.1023959531250003,
            "loss_tokens_upper_95": 3.1246610572916667,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.172933983708559,
            "data_time": 0.05237891897559166,
            "batch_time": 0.33757391944527626,
            "samples_per_second": 418619.6884918814,
            "samples_per_second_per_gpu": 52327.46106148518,
            "loss_sequences_lower_95": 4.157587273036068,
            "loss_sequences_upper_95": 4.187781268334672,
            "loss_tokens_lower_95": 4.1591172499999995,
            "loss_tokens_upper_95": 4.186432666666666,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.015580514497719,
            "data_time": 0.005568910402379448,
            "batch_time": 0.29455573248053696,
            "samples_per_second": 447466.79625711514,
            "samples_per_second_per_gpu": 55933.34953213939,
            "loss_sequences_lower_95": 3.008913110160854,
            "loss_sequences_upper_95": 3.0223333031768194,
            "loss_tokens_lower_95": 3.0050596666666665,
            "loss_tokens_upper_95": 3.0259035885416665,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.81151151488295,
            "data_time": 0.013764001821216783,
            "batch_time": 0.29981196005093425,
            "samples_per_second": 441791.62691784004,
            "samples_per_second_per_gpu": 55223.953364730005,
            "loss_sequences_lower_95": 2.803387575733418,
            "loss_sequences_upper_95": 2.819218930995549,
            "loss_tokens_lower_95": 2.8008356041666667,
            "loss_tokens_upper_95": 2.8224579895833335,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.5786301596411345,
            "data_time": 0.05889702960848808,
            "batch_time": 0.3379676006734371,
            "samples_per_second": 414453.19379382505,
            "samples_per_second_per_gpu": 51806.64922422813,
            "loss_sequences_lower_95": 3.545880195045084,
            "loss_sequences_upper_95": 3.611012896369485,
            "loss_tokens_lower_95": 3.5657612395833334,
            "loss_tokens_upper_95": 3.59138921875,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6880074635057003,
            "data_time": 0.056261301040649414,
            "batch_time": 0.3345053307712078,
            "samples_per_second": 414692.57050896273,
            "samples_per_second_per_gpu": 51836.57131362034,
            "loss_sequences_lower_95": 2.6348905419623536,
            "loss_sequences_upper_95": 2.7405342055435336,
            "loss_tokens_lower_95": 2.6771805677083336,
            "loss_tokens_upper_95": 2.6991450208333334,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.746119948950681,
            "data_time": 0.19168803095817566,
            "batch_time": 0.3073713332414627,
            "samples_per_second": 239666.48627938976,
            "samples_per_second_per_gpu": 29958.31078492372,
            "loss_sequences_lower_95": 3.684772040627219,
            "loss_sequences_upper_95": 3.807081127166748,
            "loss_tokens_lower_95": 3.7228493257002397,
            "loss_tokens_upper_95": 3.7699219530278985,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.186731723585212,
            "data_time": 0.06955971817175548,
            "batch_time": 0.3325296988089879,
            "samples_per_second": 396770.883917723,
            "samples_per_second_per_gpu": 49596.36048971538,
            "loss_sequences_lower_95": 3.117780795528312,
            "loss_sequences_upper_95": 3.2563794372380648,
            "loss_tokens_lower_95": 3.1744123020833332,
            "loss_tokens_upper_95": 3.1993236666666665,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.563355165295362,
            "data_time": 0.07365160187085469,
            "batch_time": 0.36063942313194275,
            "samples_per_second": 405369.1625944935,
            "samples_per_second_per_gpu": 50671.145324311685,
            "loss_sequences_lower_95": 5.50886972875268,
            "loss_sequences_upper_95": 5.61783643737632,
            "loss_tokens_lower_95": 5.54994121875,
            "loss_tokens_upper_95": 5.576757072916666,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.014191213201304,
            "data_time": 0.20694027841091156,
            "batch_time": 0.4843052476644516,
            "samples_per_second": 316798.94141311664,
            "samples_per_second_per_gpu": 39599.86767663958,
            "loss_sequences_lower_95": 2.977886456348857,
            "loss_sequences_upper_95": 3.0508623717261143,
            "loss_tokens_lower_95": 3.001393183723825,
            "loss_tokens_upper_95": 3.0268580827556675,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5328317715425195,
            "data_time": 0.004205721752210097,
            "batch_time": 0.29126891751180994,
            "samples_per_second": 450737.1219352625,
            "samples_per_second_per_gpu": 56342.140241907815,
            "loss_sequences_lower_95": 2.51775763668815,
            "loss_sequences_upper_95": 2.547659184833891,
            "loss_tokens_lower_95": 2.517797222172767,
            "loss_tokens_upper_95": 2.5474123388762284,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.408454033725885,
            "data_time": 0.005099024765099151,
            "batch_time": 0.2920328616341458,
            "samples_per_second": 448271.0855623195,
            "samples_per_second_per_gpu": 56033.88569528994,
            "loss_sequences_lower_95": 2.4250506515214223,
            "loss_sequences_upper_95": 2.4494574544724657,
            "loss_tokens_lower_95": 2.397202996036312,
            "loss_tokens_upper_95": 2.4146271441353564,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7542755931517284,
            "data_time": 0.015495351132224588,
            "batch_time": 0.29524654851240273,
            "samples_per_second": 443349.8637533368,
            "samples_per_second_per_gpu": 55418.7329691671,
            "loss_sequences_lower_95": 3.3337084280526685,
            "loss_sequences_upper_95": 3.6212043571201877,
            "loss_tokens_lower_95": 2.5612282233775616,
            "loss_tokens_upper_95": 2.763794230346825,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.920142323454221,
            "data_time": 0.011946863805254301,
            "batch_time": 0.29303192906081676,
            "samples_per_second": 445100.35153215186,
            "samples_per_second_per_gpu": 55637.54394151898,
            "loss_sequences_lower_95": 3.1407125813802086,
            "loss_sequences_upper_95": 3.33989404296875,
            "loss_tokens_lower_95": 2.8254183863993707,
            "loss_tokens_upper_95": 2.9643137713738206,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2661094275995852,
            "data_time": 0.022979454560713333,
            "batch_time": 0.29412422532385046,
            "samples_per_second": 433000.7515672945,
            "samples_per_second_per_gpu": 54125.09394591181,
            "loss_sequences_lower_95": 2.342807513534164,
            "loss_sequences_upper_95": 2.3978513265035657,
            "loss_tokens_lower_95": 2.237034849603136,
            "loss_tokens_upper_95": 2.2676231263410287,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6165361534465443,
            "data_time": 0.11288812011480331,
            "batch_time": 0.3677891120314598,
            "samples_per_second": 368624.010237491,
            "samples_per_second_per_gpu": 46078.001279686374,
            "loss_sequences_lower_95": 2.596872086958452,
            "loss_sequences_upper_95": 2.7604501273415303,
            "loss_tokens_lower_95": 2.5634685909629145,
            "loss_tokens_upper_95": 2.622121168938325,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.978859033389967,
            "data_time": 0.11628175526857376,
            "batch_time": 0.39189428091049194,
            "samples_per_second": 382726.8021128669,
            "samples_per_second_per_gpu": 47840.85026410836,
            "loss_sequences_lower_95": 2.983210903868383,
            "loss_sequences_upper_95": 3.1757920868542726,
            "loss_tokens_lower_95": 2.927530976271581,
            "loss_tokens_upper_95": 3.0243381326167906,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.0649328462282814,
            "data_time": 0.08167866865793864,
            "batch_time": 0.3105987310409546,
            "samples_per_second": 386857.448119756,
            "samples_per_second_per_gpu": 48357.1810149695,
            "loss_sequences_lower_95": 3.0725445251464842,
            "loss_sequences_upper_95": 3.1742242635091147,
            "loss_tokens_lower_95": 2.9644268851639755,
            "loss_tokens_upper_95": 3.1474135956451827,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.6760024910842297,
            "data_time": 0.0035521582050143546,
            "batch_time": 0.291652040380352,
            "samples_per_second": 450059.3072508803,
            "samples_per_second_per_gpu": 56257.413406360036,
            "loss_sequences_lower_95": 3.7187541232745684,
            "loss_sequences_upper_95": 3.7955309374384876,
            "loss_tokens_lower_95": 3.611961049859047,
            "loss_tokens_upper_95": 3.6890916069658126,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9008483657933244,
            "data_time": 0.01509840472748405,
            "batch_time": 0.2985481321811676,
            "samples_per_second": 440338.2254042916,
            "samples_per_second_per_gpu": 55042.27817553645,
            "loss_sequences_lower_95": 3.9466425423670297,
            "loss_sequences_upper_95": 4.264789609555844,
            "loss_tokens_lower_95": 2.7256643075276026,
            "loss_tokens_upper_95": 2.854347765348263,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9097735763816703,
            "data_time": 0.02707846015691757,
            "batch_time": 0.2930816635489464,
            "samples_per_second": 423549.922066468,
            "samples_per_second_per_gpu": 52943.7402583085,
            "loss_sequences_lower_95": 3.5393578434966937,
            "loss_sequences_upper_95": 3.894682556777277,
            "loss_tokens_lower_95": 2.795167244600073,
            "loss_tokens_upper_95": 2.9451624783826436,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.533516984007674,
            "data_time": 0.11606674641370773,
            "batch_time": 0.37012535333633423,
            "samples_per_second": 367181.5450305229,
            "samples_per_second_per_gpu": 45897.693128815365,
            "loss_sequences_lower_95": 5.4068201770521185,
            "loss_sequences_upper_95": 5.66164896368436,
            "loss_tokens_lower_95": 5.40414821677012,
            "loss_tokens_upper_95": 5.6646222955015695,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.611983132362366,
            "data_time": 0.21891270577907562,
            "batch_time": 0.4568771570920944,
            "samples_per_second": 288514.3870553051,
            "samples_per_second_per_gpu": 36064.29838191314,
            "loss_sequences_lower_95": 2.5260984954833985,
            "loss_sequences_upper_95": 2.8493909759521485,
            "loss_tokens_lower_95": 2.4048926174320773,
            "loss_tokens_upper_95": 2.7856582013780185,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3797779162562125,
            "data_time": 0.016680313274264336,
            "batch_time": 0.29269283544272184,
            "samples_per_second": 437748.1699119276,
            "samples_per_second_per_gpu": 54718.52123899095,
            "loss_sequences_lower_95": 2.344574443149469,
            "loss_sequences_upper_95": 2.4144900459594103,
            "loss_tokens_lower_95": 2.34551533413031,
            "loss_tokens_upper_95": 2.415250380329612,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.0885959058194548,
            "data_time": 0.025340092182159425,
            "batch_time": 0.30184974819421767,
            "samples_per_second": 432995.5626254847,
            "samples_per_second_per_gpu": 54124.44532818559,
            "loss_sequences_lower_95": 3.037428582368768,
            "loss_sequences_upper_95": 3.140391281840256,
            "loss_tokens_lower_95": 3.0360466684502327,
            "loss_tokens_upper_95": 3.1396102867782556,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.613165015105454,
            "data_time": 0.01713313062985738,
            "batch_time": 0.29390948712825776,
            "samples_per_second": 438641.8405824488,
            "samples_per_second_per_gpu": 54830.2300728061,
            "loss_sequences_lower_95": 2.8665303791697836,
            "loss_sequences_upper_95": 2.994319606112707,
            "loss_tokens_lower_95": 2.55154221045896,
            "loss_tokens_upper_95": 2.601622723269043,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.350409778594971,
            "data_time": 0.060139287263154984,
            "batch_time": 0.3408048264682293,
            "samples_per_second": 417738.795364802,
            "samples_per_second_per_gpu": 52217.34942060025,
            "loss_sequences_lower_95": 4.764143908691406,
            "loss_sequences_upper_95": 5.3360154296875,
            "loss_tokens_lower_95": 4.078102961097783,
            "loss_tokens_upper_95": 4.4325365370744745,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8221511244773865,
            "data_time": 0.19844351708889008,
            "batch_time": 0.2868873178958893,
            "samples_per_second": 219230.13471352958,
            "samples_per_second_per_gpu": 27403.766839191198,
            "loss_sequences_lower_95": 2.57098702788353,
            "loss_sequences_upper_95": 3.054624003171921,
            "loss_tokens_lower_95": 2.4028334431264593,
            "loss_tokens_upper_95": 3.1350883834663477,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.398514424247303,
            "data_time": 0.11615730822086334,
            "batch_time": 0.3188084214925766,
            "samples_per_second": 354932.6477643246,
            "samples_per_second_per_gpu": 44366.58097054058,
            "loss_sequences_lower_95": 4.574629509586027,
            "loss_sequences_upper_95": 5.391596414851046,
            "loss_tokens_lower_95": 2.7223753437786256,
            "loss_tokens_upper_95": 3.1470558966429145,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.313934605941339,
            "data_time": 0.015151868263880411,
            "batch_time": 0.3006991131438149,
            "samples_per_second": 444775.8209351012,
            "samples_per_second_per_gpu": 55596.97761688765,
            "loss_sequences_lower_95": 2.2903017083230943,
            "loss_sequences_upper_95": 2.337521960923848,
            "loss_tokens_lower_95": 2.2908278958981816,
            "loss_tokens_upper_95": 2.3370894939110967,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.810155769291374,
            "data_time": 0.008055135607719421,
            "batch_time": 0.290823592645366,
            "samples_per_second": 447023.74834424164,
            "samples_per_second_per_gpu": 55877.968543030205,
            "loss_sequences_lower_95": 1.818663799834441,
            "loss_sequences_upper_95": 1.9306873963361757,
            "loss_tokens_lower_95": 1.741516571656831,
            "loss_tokens_upper_95": 1.852776260737559,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.696076512336731,
            "data_time": 0.08150923748811086,
            "batch_time": 0.2907326469818751,
            "samples_per_second": 361472.606207503,
            "samples_per_second_per_gpu": 45184.075775937876,
            "loss_sequences_lower_95": 2.633607902107658,
            "loss_sequences_upper_95": 3.0614407996991613,
            "loss_tokens_lower_95": 2.516998437816186,
            "loss_tokens_upper_95": 2.796226305260248,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1443428249931187,
            "data_time": 0.024692408740520477,
            "batch_time": 0.3087321475148201,
            "samples_per_second": 439532.074599965,
            "samples_per_second_per_gpu": 54941.509324995626,
            "loss_sequences_lower_95": 3.254981287690534,
            "loss_sequences_upper_95": 3.409748619940682,
            "loss_tokens_lower_95": 3.0617232560750756,
            "loss_tokens_upper_95": 3.199972618822491,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2346240405629323,
            "data_time": 0.1128491535782814,
            "batch_time": 0.30481989681720734,
            "samples_per_second": 347069.5498532105,
            "samples_per_second_per_gpu": 43383.693731651314,
            "loss_sequences_lower_95": 2.197966310454578,
            "loss_sequences_upper_95": 2.5963557917897293,
            "loss_tokens_lower_95": 2.0457645484712734,
            "loss_tokens_upper_95": 2.3482808547152194,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.5582042750322143,
            "data_time": 0.0049844460706514335,
            "batch_time": 0.28984124875862133,
            "samples_per_second": 449287.18822496705,
            "samples_per_second_per_gpu": 56160.89852812088,
            "loss_sequences_lower_95": 3.5415894409350623,
            "loss_sequences_upper_95": 3.5743269356996397,
            "loss_tokens_lower_95": 3.541956145135277,
            "loss_tokens_upper_95": 3.5747002427829315,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6781407380567014,
            "data_time": 0.23391276597976685,
            "batch_time": 0.47542476654052734,
            "samples_per_second": 290061.9743492898,
            "samples_per_second_per_gpu": 36257.74679366122,
            "loss_sequences_lower_95": 0.6571007237851041,
            "loss_sequences_upper_95": 0.7723878508632622,
            "loss_tokens_lower_95": 0.5821518241077998,
            "loss_tokens_upper_95": 0.7450991346501766,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.56786355180373,
            "data_time": 0.002857385670460985,
            "batch_time": 0.2907515107389278,
            "samples_per_second": 449818.3969503585,
            "samples_per_second_per_gpu": 56227.299618794816,
            "loss_sequences_lower_95": 4.247697881862552,
            "loss_sequences_upper_95": 4.28871652663129,
            "loss_tokens_lower_95": 3.4183214337524177,
            "loss_tokens_upper_95": 3.4597835348162476,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.774121744155884,
            "data_time": 0.03185807913541794,
            "batch_time": 0.31538779474794865,
            "samples_per_second": 430306.28183475137,
            "samples_per_second_per_gpu": 53788.28522934392,
            "loss_sequences_lower_95": 5.859321435546875,
            "loss_sequences_upper_95": 6.1224254150390625,
            "loss_tokens_lower_95": 5.592002572445379,
            "loss_tokens_upper_95": 5.831851165606269,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9776739597320556,
            "data_time": 0.11486224085092545,
            "batch_time": 0.380174919962883,
            "samples_per_second": 371038.320342902,
            "samples_per_second_per_gpu": 46379.79004286275,
            "loss_sequences_lower_95": 1.9071417568040931,
            "loss_sequences_upper_95": 2.04812199468198,
            "loss_tokens_lower_95": 1.9067367752738622,
            "loss_tokens_upper_95": 2.0469191974142324,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.963980319644466,
            "data_time": 0.02344931662082672,
            "batch_time": 0.2957092821598053,
            "samples_per_second": 431604.096706339,
            "samples_per_second_per_gpu": 53950.51208829237,
            "loss_sequences_lower_95": 6.866315770004735,
            "loss_sequences_upper_95": 7.059103633996212,
            "loss_tokens_lower_95": 6.871171911991004,
            "loss_tokens_upper_95": 7.058812551787405,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6251509061257045,
            "data_time": 0.021833054721355438,
            "batch_time": 0.30526671061913174,
            "samples_per_second": 436043.8249050218,
            "samples_per_second_per_gpu": 54505.47811312773,
            "loss_sequences_lower_95": 0.66681804300944,
            "loss_sequences_upper_95": 0.6991518636067707,
            "loss_tokens_lower_95": 0.5993133405706033,
            "loss_tokens_upper_95": 0.638023231753639,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.506229486919585,
            "data_time": 0.11833644658327103,
            "batch_time": 0.3611500859260559,
            "samples_per_second": 367035.2705623032,
            "samples_per_second_per_gpu": 45879.4088202879,
            "loss_sequences_lower_95": 5.16959958031064,
            "loss_sequences_upper_95": 5.846803370884486,
            "loss_tokens_lower_95": 5.1770582798549105,
            "loss_tokens_upper_95": 5.837600315638951,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7792618349194527,
            "data_time": 0.19578172266483307,
            "batch_time": 0.28396059572696686,
            "samples_per_second": 220429.97686868778,
            "samples_per_second_per_gpu": 27553.747108585972,
            "loss_sequences_lower_95": 1.6388927787542342,
            "loss_sequences_upper_95": 2.3707429468631744,
            "loss_tokens_lower_95": 1.3964328286082475,
            "loss_tokens_upper_95": 1.8118162395044701,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 7.532916384696961,
            "data_time": 0.030788972973823547,
            "batch_time": 0.3122917655855417,
            "samples_per_second": 433476.55350037443,
            "samples_per_second_per_gpu": 54184.569187546804,
            "loss_sequences_lower_95": 7.537202526855469,
            "loss_sequences_upper_95": 7.920468603515625,
            "loss_tokens_lower_95": 7.331753194069902,
            "loss_tokens_upper_95": 7.665718566584708,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.359621496200561,
            "data_time": 0.03174580819904804,
            "batch_time": 0.3132294248789549,
            "samples_per_second": 433366.91034137923,
            "samples_per_second_per_gpu": 54170.863792672404,
            "loss_sequences_lower_95": 6.5194555908203125,
            "loss_sequences_upper_95": 6.716305969238282,
            "loss_tokens_lower_95": 6.238799752362919,
            "loss_tokens_upper_95": 6.428736512972583,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.5225258328312803,
            "data_time": 0.012097381676236788,
            "batch_time": 0.2925597404440244,
            "samples_per_second": 442701.9426503041,
            "samples_per_second_per_gpu": 55337.742831288015,
            "loss_sequences_lower_95": 3.4838108493703905,
            "loss_sequences_upper_95": 3.5610118798189743,
            "loss_tokens_lower_95": 3.4839161007427504,
            "loss_tokens_upper_95": 3.561511602858909,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9600989191396627,
            "data_time": 0.04147327339256203,
            "batch_time": 0.30109582104525723,
            "samples_per_second": 410622.8077845994,
            "samples_per_second_per_gpu": 51327.85097307493,
            "loss_sequences_lower_95": 1.9038060295233894,
            "loss_sequences_upper_95": 2.014795226582001,
            "loss_tokens_lower_95": 1.9037619313886087,
            "loss_tokens_upper_95": 2.0161337388092835,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.991024907588959,
            "data_time": 0.03165167011320591,
            "batch_time": 0.3145461492240429,
            "samples_per_second": 431181.66873874643,
            "samples_per_second_per_gpu": 53897.708592343304,
            "loss_sequences_lower_95": 4.896524389648437,
            "loss_sequences_upper_95": 5.090328051757812,
            "loss_tokens_lower_95": 4.8963570922851565,
            "loss_tokens_upper_95": 5.0891453002929685,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.209748544135856,
            "data_time": 0.004899033221853785,
            "batch_time": 0.2915090923926916,
            "samples_per_second": 449802.26460488635,
            "samples_per_second_per_gpu": 56225.283075610794,
            "loss_sequences_lower_95": 3.1171372028736988,
            "loss_sequences_upper_95": 3.212779941757332,
            "loss_tokens_lower_95": 2.0362395249581433,
            "loss_tokens_upper_95": 2.09764089526991,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8084857290360465,
            "data_time": 0.08156357028267601,
            "batch_time": 0.30481907996264374,
            "samples_per_second": 365522.50485564885,
            "samples_per_second_per_gpu": 45690.313106956106,
            "loss_sequences_lower_95": 1.7366620647373485,
            "loss_sequences_upper_95": 1.8795134558606503,
            "loss_tokens_lower_95": 1.7359005287512026,
            "loss_tokens_upper_95": 1.879838761286949,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8041184747920316,
            "data_time": 0.05851718783378601,
            "batch_time": 0.3445434235036373,
            "samples_per_second": 418798.0548650113,
            "samples_per_second_per_gpu": 52349.75685812641,
            "loss_sequences_lower_95": 1.7530813598632813,
            "loss_sequences_upper_95": 1.8557912549785538,
            "loss_tokens_lower_95": 1.7525182836196003,
            "loss_tokens_upper_95": 1.8551228153004367,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9884956033848349,
            "data_time": 0.0056307124240057805,
            "batch_time": 0.29110898668803864,
            "samples_per_second": 448504.85789802315,
            "samples_per_second_per_gpu": 56063.107237252894,
            "loss_sequences_lower_95": 2.5372199248598584,
            "loss_sequences_upper_95": 2.6215127189222414,
            "loss_tokens_lower_95": 1.858540319568684,
            "loss_tokens_upper_95": 1.916984178663572,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.555081470933541,
            "data_time": 0.11713109165430069,
            "batch_time": 0.3364943712949753,
            "samples_per_second": 361959.58372093097,
            "samples_per_second_per_gpu": 45244.94796511637,
            "loss_sequences_lower_95": 3.3805186922588044,
            "loss_sequences_upper_95": 3.7304927523173985,
            "loss_tokens_lower_95": 3.3813843500046503,
            "loss_tokens_upper_95": 3.7302215737640543,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.302327057418473,
            "data_time": 0.011354865936132578,
            "batch_time": 0.29458210273430896,
            "samples_per_second": 445314.62099316367,
            "samples_per_second_per_gpu": 55664.32762414546,
            "loss_sequences_lower_95": 3.2772247631761275,
            "loss_sequences_upper_95": 3.3277804033471905,
            "loss_tokens_lower_95": 3.2772242032205656,
            "loss_tokens_upper_95": 3.3272703062284976,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.871573364271701,
            "data_time": 0.12007321417331696,
            "batch_time": 0.3588406518101692,
            "samples_per_second": 365646.48936776747,
            "samples_per_second_per_gpu": 45705.811170970934,
            "loss_sequences_lower_95": 1.7855683002657103,
            "loss_sequences_upper_95": 1.9549055858723168,
            "loss_tokens_lower_95": 1.7855529229617815,
            "loss_tokens_upper_95": 1.959474833960672,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2493048151334127,
            "data_time": 0.205118328332901,
            "batch_time": 0.35438092052936554,
            "samples_per_second": 260272.57911084444,
            "samples_per_second_per_gpu": 32534.072388855555,
            "loss_sequences_lower_95": 1.166006269454956,
            "loss_sequences_upper_95": 1.5190083154042562,
            "loss_tokens_lower_95": 1.0119654496510824,
            "loss_tokens_upper_95": 1.3684910085466173,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0336682846148808,
            "data_time": 0.20764251053333282,
            "batch_time": 0.35716693103313446,
            "samples_per_second": 260260.93741864557,
            "samples_per_second_per_gpu": 32532.617177330696,
            "loss_sequences_lower_95": 1.0216603962580362,
            "loss_sequences_upper_95": 1.3588055737813314,
            "loss_tokens_lower_95": 0.8053115544694194,
            "loss_tokens_upper_95": 1.1721625231624988,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.599832086464794,
            "data_time": 0.01067106701709606,
            "batch_time": 0.2939612732993232,
            "samples_per_second": 445293.97875648655,
            "samples_per_second_per_gpu": 55661.74734456082,
            "loss_sequences_lower_95": 4.571033013277798,
            "loss_sequences_upper_95": 4.6289102770618555,
            "loss_tokens_lower_95": 4.571182661772827,
            "loss_tokens_upper_95": 4.629029003474779,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.35551905706560516,
            "data_time": 0.002656691412174467,
            "batch_time": 0.2916541519193733,
            "samples_per_second": 449877.5046559717,
            "samples_per_second_per_gpu": 56234.688081996464,
            "loss_sequences_lower_95": 0.44228605447646685,
            "loss_sequences_upper_95": 0.45239248129017645,
            "loss_tokens_lower_95": 0.3434665837904923,
            "loss_tokens_upper_95": 0.3497018681812958,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.280852004299014,
            "data_time": 0.23179063200950623,
            "batch_time": 0.5226337760686874,
            "samples_per_second": 312710.0930756108,
            "samples_per_second_per_gpu": 39088.76163445135,
            "loss_sequences_lower_95": 4.602166555810163,
            "loss_sequences_upper_95": 5.0710597841758425,
            "loss_tokens_lower_95": 4.09378346363762,
            "loss_tokens_upper_95": 4.34214833914809,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 7.111570680463636,
            "data_time": 0.19869904220104218,
            "batch_time": 0.298436239361763,
            "samples_per_second": 227659.93912341632,
            "samples_per_second_per_gpu": 28457.49239042704,
            "loss_sequences_lower_95": 6.729651456265836,
            "loss_sequences_upper_95": 7.672684684959617,
            "loss_tokens_lower_95": 6.3333679199218755,
            "loss_tokens_upper_95": 7.568368002809124,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.130011786774891,
            "data_time": 0.11729452013969421,
            "batch_time": 0.3088652938604355,
            "samples_per_second": 346500.86243574205,
            "samples_per_second_per_gpu": 43312.607804467756,
            "loss_sequences_lower_95": 4.256290631177949,
            "loss_sequences_upper_95": 4.61713582480826,
            "loss_tokens_lower_95": 3.879476376879205,
            "loss_tokens_upper_95": 4.084525163937657,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.156731349665944,
            "data_time": 0.11359977722167969,
            "batch_time": 0.3051493316888809,
            "samples_per_second": 347602.64868610987,
            "samples_per_second_per_gpu": 43450.33108576373,
            "loss_sequences_lower_95": 4.245777967499524,
            "loss_sequences_upper_95": 4.5418631390827455,
            "loss_tokens_lower_95": 3.945316600027853,
            "loss_tokens_upper_95": 4.115328045748066,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.435479683120076,
            "data_time": 0.11940112709999084,
            "batch_time": 0.3109896332025528,
            "samples_per_second": 345821.8656144553,
            "samples_per_second_per_gpu": 43227.733201806914,
            "loss_sequences_lower_95": 4.698537686975992,
            "loss_sequences_upper_95": 5.187556587777487,
            "loss_tokens_lower_95": 4.163036533770306,
            "loss_tokens_upper_95": 4.443556695096859,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.210710220220612,
            "data_time": 0.11025429517030716,
            "batch_time": 0.30179841071367264,
            "samples_per_second": 348158.8356945951,
            "samples_per_second_per_gpu": 43519.85446182439,
            "loss_sequences_lower_95": 4.282232424107994,
            "loss_sequences_upper_95": 4.571604593788705,
            "loss_tokens_lower_95": 4.015357357987734,
            "loss_tokens_upper_95": 4.170556916328977,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.533484284181773,
            "data_time": 0.12220840156078339,
            "batch_time": 0.3106377199292183,
            "samples_per_second": 342851.9966032616,
            "samples_per_second_per_gpu": 42856.4995754077,
            "loss_sequences_lower_95": 4.644404450410641,
            "loss_sequences_upper_95": 4.995670380207322,
            "loss_tokens_lower_95": 4.340575963366481,
            "loss_tokens_upper_95": 4.474236436092425,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.118170950470901,
            "data_time": 0.11557219177484512,
            "batch_time": 0.3073381558060646,
            "samples_per_second": 347045.0493065149,
            "samples_per_second_per_gpu": 43380.63116331436,
            "loss_sequences_lower_95": 4.320624095637624,
            "loss_sequences_upper_95": 4.6485040804234945,
            "loss_tokens_lower_95": 3.917845222790461,
            "loss_tokens_upper_95": 4.048263932056209,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-1.0/params.txt",
    "uuid": "ef59042d-065c-4af4-b1fa-a8a859ed8f9d",
    "creation_date": "2023_12_13-16_58_25"
}