{
    "name": "rw_original-open_lm_1b-1.0",
    "dataset_name": "rw_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf7",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 28795904000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 2,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp"
        ],
        "chinchilla_multiplier": 1.0
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--train-num-samples",
        "5759180800",
        "--workers",
        "2",
        "--dataset-manifest",
        "<scrub>/refined_web_tokenized/manifest.jsonl",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--data-key",
        "json.gz",
        "--accum-freq",
        "2",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--name",
        "rw_original-open_lm_1b-1.0",
        "--logs",
        "/admin/<scrub>/scrub_logs",
        "--val-data",
        "/admin/<scrub>/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/admin/<scrub>/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-batch-size",
        "8",
        "--val-data-key",
        "json",
        "txt",
        "--val-num-samples",
        "245760",
        "--fsdp",
        "--fsdp-amp",
        "--report-to",
        "wandb",
        "--wandb-project-name",
        "scrub",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/scrub_experiments_v3"
    ],
    "results": [
        {
            "loss": 2.5776320238908133,
            "data_time": 0.3108825981616974,
            "batch_time": 2.0169080942869186,
            "samples_per_second": 160769.9146166333,
            "samples_per_second_per_gpu": 20096.23932707916,
            "loss_sequences_lower_95": 2.523696314493815,
            "loss_sequences_upper_95": 2.633267402648926,
            "loss_tokens_lower_95": 2.5658474222819008,
            "loss_tokens_upper_95": 2.5893290519714354,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7633513098392832,
            "data_time": 0.0028451382973694013,
            "batch_time": 0.29631414827001074,
            "samples_per_second": 443517.606629434,
            "samples_per_second_per_gpu": 55439.70082867925,
            "loss_sequences_lower_95": 2.760903786368075,
            "loss_sequences_upper_95": 2.765803784272462,
            "loss_tokens_lower_95": 2.7536293854166667,
            "loss_tokens_upper_95": 2.7734012916666666,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4781281310684826,
            "data_time": 0.0626351349055767,
            "batch_time": 0.3646673411130905,
            "samples_per_second": 391317.4166437599,
            "samples_per_second_per_gpu": 48914.677080469985,
            "loss_sequences_lower_95": 2.444818115234375,
            "loss_sequences_upper_95": 2.516150612344547,
            "loss_tokens_lower_95": 2.4672311302083334,
            "loss_tokens_upper_95": 2.4894819166666666,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7472907931534287,
            "data_time": 0.00868368658580278,
            "batch_time": 0.2975618329487349,
            "samples_per_second": 445516.08619255276,
            "samples_per_second_per_gpu": 55689.510774069095,
            "loss_sequences_lower_95": 2.7257643212387244,
            "loss_sequences_upper_95": 2.7694841686130798,
            "loss_tokens_lower_95": 2.7371292343749998,
            "loss_tokens_upper_95": 2.75740303125,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8060769874549445,
            "data_time": 0.06303081288933754,
            "batch_time": 0.34462086856365204,
            "samples_per_second": 411233.9869346228,
            "samples_per_second_per_gpu": 51404.24836682785,
            "loss_sequences_lower_95": 2.767149642043104,
            "loss_sequences_upper_95": 2.848654205881651,
            "loss_tokens_lower_95": 2.796208838541667,
            "loss_tokens_upper_95": 2.8161500104166666,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.806676255368117,
            "data_time": 0.02183717116713524,
            "batch_time": 0.30182843282818794,
            "samples_per_second": 433330.7925018804,
            "samples_per_second_per_gpu": 54166.34906273505,
            "loss_sequences_lower_95": 2.7747137447022645,
            "loss_sequences_upper_95": 2.8401158315320147,
            "loss_tokens_lower_95": 2.7962159583333337,
            "loss_tokens_upper_95": 2.8171513489583337,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2197355136579398,
            "data_time": 0.008130162190168332,
            "batch_time": 0.29190568969799924,
            "samples_per_second": 445419.35226568714,
            "samples_per_second_per_gpu": 55677.41903321089,
            "loss_sequences_lower_95": 2.197074657206633,
            "loss_sequences_upper_95": 2.2422477877869897,
            "loss_tokens_lower_95": 2.208237453125,
            "loss_tokens_upper_95": 2.2313217135416665,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.237283864845156,
            "data_time": 0.008362344221064919,
            "batch_time": 0.29247440123244334,
            "samples_per_second": 444864.8250417537,
            "samples_per_second_per_gpu": 55608.10313021921,
            "loss_sequences_lower_95": 3.2252464874427353,
            "loss_sequences_upper_95": 3.250030610888416,
            "loss_tokens_lower_95": 3.2270251250000004,
            "loss_tokens_upper_95": 3.247563447916667,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8580285640266854,
            "data_time": 0.059986766427755356,
            "batch_time": 0.3392263129353523,
            "samples_per_second": 413073.4176593274,
            "samples_per_second_per_gpu": 51634.17720741592,
            "loss_sequences_lower_95": 2.8042901015863184,
            "loss_sequences_upper_95": 2.9159555140549576,
            "loss_tokens_lower_95": 2.847723390625,
            "loss_tokens_upper_95": 2.8683202083333335,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.8060546020274106,
            "data_time": 0.06220634654164314,
            "batch_time": 0.34800346940755844,
            "samples_per_second": 415182.1058403823,
            "samples_per_second_per_gpu": 51897.763230047785,
            "loss_sequences_lower_95": 3.758619882937948,
            "loss_sequences_upper_95": 3.861058424395535,
            "loss_tokens_lower_95": 3.7940009375,
            "loss_tokens_upper_95": 3.8178475625,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9064105894363985,
            "data_time": 0.006209820280461557,
            "batch_time": 0.29554883699949835,
            "samples_per_second": 446806.8200375292,
            "samples_per_second_per_gpu": 55850.85250469115,
            "loss_sequences_lower_95": 2.8980717206664557,
            "loss_sequences_upper_95": 2.914669649663389,
            "loss_tokens_lower_95": 2.896421296875,
            "loss_tokens_upper_95": 2.9163183697916666,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.72590261258368,
            "data_time": 0.015130588882847837,
            "batch_time": 0.30419960461164774,
            "samples_per_second": 440174.25045372726,
            "samples_per_second_per_gpu": 55021.78130671591,
            "loss_sequences_lower_95": 2.712516543806617,
            "loss_sequences_upper_95": 2.7395766697144417,
            "loss_tokens_lower_95": 2.715845739583333,
            "loss_tokens_upper_95": 2.736175109375,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2632457315317276,
            "data_time": 0.06101753190159798,
            "batch_time": 0.34038155525922775,
            "samples_per_second": 413783.05313973216,
            "samples_per_second_per_gpu": 51722.88164246652,
            "loss_sequences_lower_95": 3.2170476824468337,
            "loss_sequences_upper_95": 3.3161397071203886,
            "loss_tokens_lower_95": 3.252340333333333,
            "loss_tokens_upper_95": 3.27432525,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5323127515447115,
            "data_time": 0.0640021488070488,
            "batch_time": 0.34306589141488075,
            "samples_per_second": 411655.2358306413,
            "samples_per_second_per_gpu": 51456.90447883016,
            "loss_sequences_lower_95": 2.4700541385565176,
            "loss_sequences_upper_95": 2.596023733601308,
            "loss_tokens_lower_95": 2.5217494322916667,
            "loss_tokens_upper_95": 2.5428414947916664,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2535748156634243,
            "data_time": 0.22714179754257202,
            "batch_time": 0.34301628172397614,
            "samples_per_second": 233795.35415341827,
            "samples_per_second_per_gpu": 29224.419269177284,
            "loss_sequences_lower_95": 3.1954930045387964,
            "loss_sequences_upper_95": 3.31223124590787,
            "loss_tokens_lower_95": 3.233823958310214,
            "loss_tokens_upper_95": 3.2735572121360086,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.674812646370935,
            "data_time": 0.08255832890669505,
            "batch_time": 0.3455476810534795,
            "samples_per_second": 393437.0592177147,
            "samples_per_second_per_gpu": 49179.63240221434,
            "loss_sequences_lower_95": 2.6237553276751546,
            "loss_sequences_upper_95": 2.7257199790665427,
            "loss_tokens_lower_95": 2.6643407135416664,
            "loss_tokens_upper_95": 2.685260703125,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.7952325891379,
            "data_time": 0.08206008871396382,
            "batch_time": 0.3677578866481781,
            "samples_per_second": 403406.58315231046,
            "samples_per_second_per_gpu": 50425.82289403881,
            "loss_sequences_lower_95": 4.746127488430698,
            "loss_sequences_upper_95": 4.850972794102491,
            "loss_tokens_lower_95": 4.7841583125,
            "loss_tokens_upper_95": 4.806400260416667,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.857636164446346,
            "data_time": 0.24370945990085602,
            "batch_time": 0.521482303738594,
            "samples_per_second": 306944.9811911868,
            "samples_per_second_per_gpu": 38368.12264889835,
            "loss_sequences_lower_95": 2.781184618590308,
            "loss_sequences_upper_95": 2.9772651234611134,
            "loss_tokens_lower_95": 2.846211486566262,
            "loss_tokens_upper_95": 2.8691344714555584,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6447342449130369,
            "data_time": 0.004466351731257005,
            "batch_time": 0.29186541844498026,
            "samples_per_second": 450124.30158488464,
            "samples_per_second_per_gpu": 56265.53769811058,
            "loss_sequences_lower_95": 1.6399942958512854,
            "loss_sequences_upper_95": 1.6495484319919704,
            "loss_tokens_lower_95": 1.6398977766755538,
            "loss_tokens_upper_95": 1.6495274604478973,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4278755894316975,
            "data_time": 0.00535924676098401,
            "batch_time": 0.291090699879429,
            "samples_per_second": 450215.4578473603,
            "samples_per_second_per_gpu": 56276.932230920036,
            "loss_sequences_lower_95": 2.4358501422929946,
            "loss_sequences_upper_95": 2.459658846905497,
            "loss_tokens_lower_95": 2.4169961528784825,
            "loss_tokens_upper_95": 2.4345361214248666,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2955388807158004,
            "data_time": 0.01693246645085952,
            "batch_time": 0.2963366324410719,
            "samples_per_second": 443508.16035839234,
            "samples_per_second_per_gpu": 55438.52004479904,
            "loss_sequences_lower_95": 2.7843252435178765,
            "loss_sequences_upper_95": 3.0390231803277783,
            "loss_tokens_lower_95": 2.1220942786514176,
            "loss_tokens_upper_95": 2.301241062719651,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.520453667918841,
            "data_time": 0.012857282534241676,
            "batch_time": 0.29354175987342995,
            "samples_per_second": 445650.3945104459,
            "samples_per_second_per_gpu": 55706.299313805735,
            "loss_sequences_lower_95": 2.6193383911132813,
            "loss_sequences_upper_95": 2.796409073893229,
            "loss_tokens_lower_95": 2.448795916863207,
            "loss_tokens_upper_95": 2.5776732569280663,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.97616649613948,
            "data_time": 0.025513131510127674,
            "batch_time": 0.2962641255422072,
            "samples_per_second": 433027.6766513177,
            "samples_per_second_per_gpu": 54128.45958141471,
            "loss_sequences_lower_95": 2.049017264573659,
            "loss_sequences_upper_95": 2.0950440609970267,
            "loss_tokens_lower_95": 1.9499822981701513,
            "loss_tokens_upper_95": 1.9764340404234422,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8211875449527393,
            "data_time": 0.130597822368145,
            "batch_time": 0.38552675396203995,
            "samples_per_second": 364764.06452269526,
            "samples_per_second_per_gpu": 45595.50806533691,
            "loss_sequences_lower_95": 1.8304623517123135,
            "loss_sequences_upper_95": 1.9197261290116743,
            "loss_tokens_lower_95": 1.787996439773758,
            "loss_tokens_upper_95": 1.8301929151340321,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.497697167980428,
            "data_time": 0.12628192454576492,
            "batch_time": 0.40211252868175507,
            "samples_per_second": 380609.133361844,
            "samples_per_second_per_gpu": 47576.1416702305,
            "loss_sequences_lower_95": 2.5042798723493305,
            "loss_sequences_upper_95": 2.660510926538584,
            "loss_tokens_lower_95": 2.448037545753639,
            "loss_tokens_upper_95": 2.528522157455992,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.640722380479177,
            "data_time": 0.08940455317497253,
            "batch_time": 0.3182327300310135,
            "samples_per_second": 385403.6057665698,
            "samples_per_second_per_gpu": 48175.45072082122,
            "loss_sequences_lower_95": 2.6548035583496095,
            "loss_sequences_upper_95": 2.7565706685384113,
            "loss_tokens_lower_95": 2.5438191606028857,
            "loss_tokens_upper_95": 2.7160065489440086,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.7578483706800423,
            "data_time": 0.0037752105005132326,
            "batch_time": 0.29110618863465654,
            "samples_per_second": 451245.58266644226,
            "samples_per_second_per_gpu": 56405.69783330528,
            "loss_sequences_lower_95": 3.8007178918975444,
            "loss_sequences_upper_95": 3.879304938933246,
            "loss_tokens_lower_95": 3.6921494248411295,
            "loss_tokens_upper_95": 3.7711235942054087,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7343491480406685,
            "data_time": 0.015805792651678387,
            "batch_time": 0.29679984795419795,
            "samples_per_second": 443914.4542213787,
            "samples_per_second_per_gpu": 55489.30677767234,
            "loss_sequences_lower_95": 3.4705500541712704,
            "loss_sequences_upper_95": 3.717733117305871,
            "loss_tokens_lower_95": 2.5992730995614783,
            "loss_tokens_upper_95": 2.7138600639599653,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8263572378675277,
            "data_time": 0.028125815093517303,
            "batch_time": 0.2923744708299637,
            "samples_per_second": 426317.1338078266,
            "samples_per_second_per_gpu": 53289.64172597833,
            "loss_sequences_lower_95": 3.3081454853149,
            "loss_sequences_upper_95": 3.5872454438193255,
            "loss_tokens_lower_95": 2.7148929326370403,
            "loss_tokens_upper_95": 2.8524460700528396,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.7290447335265,
            "data_time": 0.13706886768341064,
            "batch_time": 0.3903026133775711,
            "samples_per_second": 364100.8266403232,
            "samples_per_second_per_gpu": 45512.6033300404,
            "loss_sequences_lower_95": 5.61402533544253,
            "loss_sequences_upper_95": 5.839472564279217,
            "loss_tokens_lower_95": 5.618599934338435,
            "loss_tokens_upper_95": 5.839869599364119,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6643327975273134,
            "data_time": 0.2475343942642212,
            "batch_time": 0.48485177755355835,
            "samples_per_second": 282815.1103135962,
            "samples_per_second_per_gpu": 35351.888789199525,
            "loss_sequences_lower_95": 2.5770660095214843,
            "loss_sequences_upper_95": 2.924502326965332,
            "loss_tokens_lower_95": 2.4579982696151053,
            "loss_tokens_upper_95": 2.8452795038922742,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2032342128524274,
            "data_time": 0.018398430198431015,
            "batch_time": 0.2929258029907942,
            "samples_per_second": 439843.52788474725,
            "samples_per_second_per_gpu": 54980.44098559341,
            "loss_sequences_lower_95": 1.1935286772775797,
            "loss_sequences_upper_95": 1.2129548805884964,
            "loss_tokens_lower_95": 1.1934499469237783,
            "loss_tokens_upper_95": 1.2130971012486407,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7589860593169366,
            "data_time": 0.028070534765720367,
            "batch_time": 0.3031903252005577,
            "samples_per_second": 434916.73091656686,
            "samples_per_second_per_gpu": 54364.59136457086,
            "loss_sequences_lower_95": 1.744416652870022,
            "loss_sequences_upper_95": 1.7734301467883011,
            "loss_tokens_lower_95": 1.7445396979454595,
            "loss_tokens_upper_95": 1.7734851034228474,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.673798665388105,
            "data_time": 0.01920815308888753,
            "batch_time": 0.2952620148658752,
            "samples_per_second": 439321.8712681091,
            "samples_per_second_per_gpu": 54915.23390851364,
            "loss_sequences_lower_95": 2.9043385303318825,
            "loss_sequences_upper_95": 3.034062970216438,
            "loss_tokens_lower_95": 2.623862988787266,
            "loss_tokens_upper_95": 2.6781922146048713,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.333369071006775,
            "data_time": 0.06846440210938454,
            "batch_time": 0.34877025708556175,
            "samples_per_second": 416351.7320207042,
            "samples_per_second_per_gpu": 52043.966502588024,
            "loss_sequences_lower_95": 4.750827880859375,
            "loss_sequences_upper_95": 5.299223022460938,
            "loss_tokens_lower_95": 4.059289308417956,
            "loss_tokens_upper_95": 4.41121104737136,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.862810343503952,
            "data_time": 0.22431793808937073,
            "batch_time": 0.31327754259109497,
            "samples_per_second": 215852.3608349049,
            "samples_per_second_per_gpu": 26981.54510436311,
            "loss_sequences_lower_95": 2.610520577430725,
            "loss_sequences_upper_95": 3.07530335187912,
            "loss_tokens_lower_95": 2.4370781514836457,
            "loss_tokens_upper_95": 3.2076283246621315,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.39931272501233,
            "data_time": 0.1381853073835373,
            "batch_time": 0.3436746597290039,
            "samples_per_second": 343142.16562453244,
            "samples_per_second_per_gpu": 42892.770703066555,
            "loss_sequences_lower_95": 4.325052476203305,
            "loss_sequences_upper_95": 4.965049848885371,
            "loss_tokens_lower_95": 2.9091479244468714,
            "loss_tokens_upper_95": 3.261688025989554,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9044130222408884,
            "data_time": 0.01790898624393675,
            "batch_time": 0.30344125876824063,
            "samples_per_second": 444387.3168697511,
            "samples_per_second_per_gpu": 55548.414608718886,
            "loss_sequences_lower_95": 1.8830332398154215,
            "loss_sequences_upper_95": 1.9262734882693042,
            "loss_tokens_lower_95": 1.8830956188650203,
            "loss_tokens_upper_95": 1.9264662303066296,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6793421251873195,
            "data_time": 0.00831616106556683,
            "batch_time": 0.29133866618319254,
            "samples_per_second": 446486.5925576913,
            "samples_per_second_per_gpu": 55810.824069711416,
            "loss_sequences_lower_95": 1.6911250452937003,
            "loss_sequences_upper_95": 1.7981175540188967,
            "loss_tokens_lower_95": 1.6141049909713363,
            "loss_tokens_upper_95": 1.7168044226212176,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6562056106962126,
            "data_time": 0.08634584645430247,
            "batch_time": 0.29558393855889636,
            "samples_per_second": 360263.8911323195,
            "samples_per_second_per_gpu": 45032.98639153994,
            "loss_sequences_lower_95": 2.617666726583963,
            "loss_sequences_upper_95": 3.053186264317551,
            "loss_tokens_lower_95": 2.47677564876814,
            "loss_tokens_upper_95": 2.7508138736015555,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1510110351168086,
            "data_time": 0.02783586084842682,
            "batch_time": 0.31244404762983324,
            "samples_per_second": 437958.3451681541,
            "samples_per_second_per_gpu": 54744.79314601926,
            "loss_sequences_lower_95": 3.2663523926166635,
            "loss_sequences_upper_95": 3.424289236693469,
            "loss_tokens_lower_95": 3.066117820156224,
            "loss_tokens_upper_95": 3.2037503348777103,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1211282043922237,
            "data_time": 0.1271049678325653,
            "batch_time": 0.3193759322166443,
            "samples_per_second": 343383.51674551313,
            "samples_per_second_per_gpu": 42922.93959318914,
            "loss_sequences_lower_95": 2.084458323222835,
            "loss_sequences_upper_95": 2.496462370709675,
            "loss_tokens_lower_95": 1.9236246827764962,
            "loss_tokens_upper_95": 2.2163185007614956,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.795490304811832,
            "data_time": 0.005456875318959473,
            "batch_time": 0.29149705842255413,
            "samples_per_second": 447403.41313282546,
            "samples_per_second_per_gpu": 55925.42664160318,
            "loss_sequences_lower_95": 5.78080797800185,
            "loss_sequences_upper_95": 5.810062891484547,
            "loss_tokens_lower_95": 5.780906962642528,
            "loss_tokens_upper_95": 5.810144089364748,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0372588721293847,
            "data_time": 0.2578345537185669,
            "batch_time": 0.5005272328853607,
            "samples_per_second": 283269.3966702722,
            "samples_per_second_per_gpu": 35408.67458378403,
            "loss_sequences_lower_95": 1.0268769662357071,
            "loss_sequences_upper_95": 1.1739731705304488,
            "loss_tokens_lower_95": 0.8993750877731777,
            "loss_tokens_upper_95": 1.1362697792638898,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.910116585201937,
            "data_time": 0.0030172305760973674,
            "batch_time": 0.29101636024023775,
            "samples_per_second": 449609.84735646634,
            "samples_per_second_per_gpu": 56201.23091955829,
            "loss_sequences_lower_95": 4.642102137791536,
            "loss_sequences_upper_95": 4.683407240893605,
            "loss_tokens_lower_95": 3.752320756769826,
            "loss_tokens_upper_95": 3.795724431818182,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.149532157182693,
            "data_time": 0.034228552132844925,
            "batch_time": 0.31573912128806114,
            "samples_per_second": 432756.7599756477,
            "samples_per_second_per_gpu": 54094.594996955966,
            "loss_sequences_lower_95": 4.2567975585937505,
            "loss_sequences_upper_95": 4.463121118164063,
            "loss_tokens_lower_95": 4.002783448587345,
            "loss_tokens_upper_95": 4.190844065702339,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8187200556630674,
            "data_time": 0.12977564334869385,
            "batch_time": 0.39378876239061356,
            "samples_per_second": 369574.80144193955,
            "samples_per_second_per_gpu": 46196.85018024244,
            "loss_sequences_lower_95": 1.7797064175813093,
            "loss_sequences_upper_95": 1.859049009240192,
            "loss_tokens_lower_95": 1.7788712874702783,
            "loss_tokens_upper_95": 1.8578084000297215,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.212807192585685,
            "data_time": 0.024551268328319897,
            "batch_time": 0.2954674945636229,
            "samples_per_second": 433418.84043584426,
            "samples_per_second_per_gpu": 54177.35505448053,
            "loss_sequences_lower_95": 6.121943840258049,
            "loss_sequences_upper_95": 6.302417621034564,
            "loss_tokens_lower_95": 6.124463584206321,
            "loss_tokens_upper_95": 6.30193437056108,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.229263899087906,
            "data_time": 0.0242619551718235,
            "batch_time": 0.3068373625477155,
            "samples_per_second": 436887.83120095363,
            "samples_per_second_per_gpu": 54610.978900119204,
            "loss_sequences_lower_95": 1.282384130859375,
            "loss_sequences_upper_95": 1.3209109334309896,
            "loss_tokens_lower_95": 1.1861484183517157,
            "loss_tokens_upper_95": 1.2547411073804522,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.367025731858753,
            "data_time": 0.13238981366157532,
            "batch_time": 0.3747957721352577,
            "samples_per_second": 364752.15077266097,
            "samples_per_second_per_gpu": 45594.01884658262,
            "loss_sequences_lower_95": 5.022687639508929,
            "loss_sequences_upper_95": 5.717329668317522,
            "loss_tokens_lower_95": 5.022401064918155,
            "loss_tokens_upper_95": 5.71358639671689,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6763827316462994,
            "data_time": 0.22771741449832916,
            "batch_time": 0.31620967388153076,
            "samples_per_second": 215763.63227308326,
            "samples_per_second_per_gpu": 26970.454034135408,
            "loss_sequences_lower_95": 1.5383842706680297,
            "loss_sequences_upper_95": 2.200943839550018,
            "loss_tokens_lower_95": 1.3018642842400934,
            "loss_tokens_upper_95": 1.685213031572165,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.88836642408371,
            "data_time": 0.03467722609639168,
            "batch_time": 0.31606845930218697,
            "samples_per_second": 432773.8046050762,
            "samples_per_second_per_gpu": 54096.725575634526,
            "loss_sequences_lower_95": 6.9109240234375,
            "loss_sequences_upper_95": 7.203699816894532,
            "loss_tokens_lower_95": 6.730332407168729,
            "loss_tokens_upper_95": 6.990385238208545,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.581360560894012,
            "data_time": 0.034946974366903305,
            "batch_time": 0.31650825403630733,
            "samples_per_second": 432554.8138564121,
            "samples_per_second_per_gpu": 54069.35173205151,
            "loss_sequences_lower_95": 6.837518188476563,
            "loss_sequences_upper_95": 7.065626965332031,
            "loss_tokens_lower_95": 6.445371890844079,
            "loss_tokens_upper_95": 6.638925967075614,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.182392429514215,
            "data_time": 0.01356181005636851,
            "batch_time": 0.294015280281504,
            "samples_per_second": 442376.1452848539,
            "samples_per_second_per_gpu": 55297.018160606734,
            "loss_sequences_lower_95": 6.153277017028788,
            "loss_sequences_upper_95": 6.211156481782182,
            "loss_tokens_lower_95": 6.153461558859579,
            "loss_tokens_upper_95": 6.210479435551458,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.766799136424028,
            "data_time": 0.049014426849700594,
            "batch_time": 0.3087709945636791,
            "samples_per_second": 408707.06400288094,
            "samples_per_second_per_gpu": 51088.38300036012,
            "loss_sequences_lower_95": 1.7392943210865495,
            "loss_sequences_upper_95": 1.7944091140582998,
            "loss_tokens_lower_95": 1.7384473387546804,
            "loss_tokens_upper_95": 1.7939005908878167,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.674421019077301,
            "data_time": 0.03481863625347614,
            "batch_time": 0.31785060092806816,
            "samples_per_second": 430343.08311853936,
            "samples_per_second_per_gpu": 53792.88538981742,
            "loss_sequences_lower_95": 6.590917211914062,
            "loss_sequences_upper_95": 6.760677905273438,
            "loss_tokens_lower_95": 6.589355419921875,
            "loss_tokens_upper_95": 6.759520483398437,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1327763254730456,
            "data_time": 0.0053088700914957435,
            "batch_time": 0.29248636817357626,
            "samples_per_second": 448829.88647913176,
            "samples_per_second_per_gpu": 56103.73580989147,
            "loss_sequences_lower_95": 2.940445707929281,
            "loss_sequences_upper_95": 3.0143493201942406,
            "loss_tokens_lower_95": 1.9715461729980697,
            "loss_tokens_upper_95": 2.0221364928030225,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6773748117596357,
            "data_time": 0.09751600568944757,
            "batch_time": 0.32105775854804297,
            "samples_per_second": 361054.7548298324,
            "samples_per_second_per_gpu": 45131.84435372905,
            "loss_sequences_lower_95": 1.636962067902978,
            "loss_sequences_upper_95": 1.7177434067227948,
            "loss_tokens_lower_95": 1.636626516883053,
            "loss_tokens_upper_95": 1.7179061092547518,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5978127477215787,
            "data_time": 0.06917063891887665,
            "batch_time": 0.3553682267665863,
            "samples_per_second": 416188.6894207698,
            "samples_per_second_per_gpu": 52023.586177596226,
            "loss_sequences_lower_95": 1.5722502944048713,
            "loss_sequences_upper_95": 1.6235393150179993,
            "loss_tokens_lower_95": 1.5722343594420192,
            "loss_tokens_upper_95": 1.6233284624885111,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.959482024594923,
            "data_time": 0.006396067047876025,
            "batch_time": 0.29192505847840083,
            "samples_per_second": 448222.9164282284,
            "samples_per_second_per_gpu": 56027.86455352855,
            "loss_sequences_lower_95": 4.1379129992053425,
            "loss_sequences_upper_95": 4.246344025487756,
            "loss_tokens_lower_95": 2.7332681551765226,
            "loss_tokens_upper_95": 2.808745835548264,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.390972980115779,
            "data_time": 0.1350254863500595,
            "batch_time": 0.3545835539698601,
            "samples_per_second": 357493.75462431816,
            "samples_per_second_per_gpu": 44686.71932803977,
            "loss_sequences_lower_95": 5.289657786535838,
            "loss_sequences_upper_95": 5.490938927383019,
            "loss_tokens_lower_95": 5.287040581274285,
            "loss_tokens_upper_95": 5.490146810541709,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.9217160154190998,
            "data_time": 0.011899289603416737,
            "batch_time": 0.2951573391373341,
            "samples_per_second": 445175.8031029235,
            "samples_per_second_per_gpu": 55646.97538786544,
            "loss_sequences_lower_95": 3.8963782447558297,
            "loss_sequences_upper_95": 3.947723235617355,
            "loss_tokens_lower_95": 3.89663103856078,
            "loss_tokens_upper_95": 3.9469811078459482,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6864311122199864,
            "data_time": 0.1295371577143669,
            "batch_time": 0.3682308718562126,
            "samples_per_second": 363091.99283443094,
            "samples_per_second_per_gpu": 45386.49910430387,
            "loss_sequences_lower_95": 1.642759500892417,
            "loss_sequences_upper_95": 1.7299578324104976,
            "loss_tokens_lower_95": 1.6419971688279826,
            "loss_tokens_upper_95": 1.7301359935871605,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2855173408985139,
            "data_time": 0.23903082311153412,
            "batch_time": 0.3882296234369278,
            "samples_per_second": 254532.73637572216,
            "samples_per_second_per_gpu": 31816.59204696527,
            "loss_sequences_lower_95": 1.2053394730885822,
            "loss_sequences_upper_95": 1.5351171811421713,
            "loss_tokens_lower_95": 1.0725643502341378,
            "loss_tokens_upper_95": 1.4442076206207273,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2142953515052795,
            "data_time": 0.24949167668819427,
            "batch_time": 0.39903008937835693,
            "samples_per_second": 251874.37061248845,
            "samples_per_second_per_gpu": 31484.296326561056,
            "loss_sequences_lower_95": 1.1987554105122886,
            "loss_sequences_upper_95": 1.6202324644724526,
            "loss_tokens_lower_95": 0.9617500069436064,
            "loss_tokens_upper_95": 1.432739643568403,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.5786954324972298,
            "data_time": 0.01182090077135298,
            "batch_time": 0.2950624023322706,
            "samples_per_second": 445171.0648193518,
            "samples_per_second_per_gpu": 55646.38310241898,
            "loss_sequences_lower_95": 3.5635008758859534,
            "loss_sequences_upper_95": 3.5937984757570876,
            "loss_tokens_lower_95": 3.563937840862021,
            "loss_tokens_upper_95": 3.5933858169757915,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.2558472310567692,
            "data_time": 0.0027158919322412436,
            "batch_time": 0.2932531303141258,
            "samples_per_second": 447520.94473024283,
            "samples_per_second_per_gpu": 55940.118091280354,
            "loss_sequences_lower_95": 0.31741187463403114,
            "loss_sequences_upper_95": 0.3264046124897422,
            "loss_tokens_lower_95": 0.24773082059845455,
            "loss_tokens_upper_95": 0.25316493279653074,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.68700846912354,
            "data_time": 0.2709207981824875,
            "batch_time": 0.5574827492237091,
            "samples_per_second": 304395.5410749317,
            "samples_per_second_per_gpu": 38049.44263436646,
            "loss_sequences_lower_95": 2.819226903239573,
            "loss_sequences_upper_95": 3.1305618826798565,
            "loss_tokens_lower_95": 2.5511714072287215,
            "loss_tokens_upper_95": 2.749938434103261,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.946869167121681,
            "data_time": 0.231567844748497,
            "batch_time": 0.33157816529273987,
            "samples_per_second": 222041.73277674982,
            "samples_per_second_per_gpu": 27755.216597093728,
            "loss_sequences_lower_95": 4.538999691525021,
            "loss_sequences_upper_95": 5.462180534568993,
            "loss_tokens_lower_95": 3.987699805365668,
            "loss_tokens_upper_95": 5.798542597264419,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5590575562744604,
            "data_time": 0.13350830972194672,
            "batch_time": 0.3254939317703247,
            "samples_per_second": 342079.79766242334,
            "samples_per_second_per_gpu": 42759.97470780292,
            "loss_sequences_lower_95": 2.6648114971998265,
            "loss_sequences_upper_95": 2.9228970690471368,
            "loss_tokens_lower_95": 2.395075246235019,
            "loss_tokens_upper_95": 2.5565303859598405,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.697652420619639,
            "data_time": 0.13279934227466583,
            "batch_time": 0.3249213322997093,
            "samples_per_second": 342118.66723377164,
            "samples_per_second_per_gpu": 42764.833404221456,
            "loss_sequences_lower_95": 2.816918712709008,
            "loss_sequences_upper_95": 3.0638006861616924,
            "loss_tokens_lower_95": 2.5487517362129117,
            "loss_tokens_upper_95": 2.689466793668274,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5943396949186557,
            "data_time": 0.13509497046470642,
            "batch_time": 0.32731254398822784,
            "samples_per_second": 341955.4721439028,
            "samples_per_second_per_gpu": 42744.43401798785,
            "loss_sequences_lower_95": 2.6775789586509147,
            "loss_sequences_upper_95": 2.977207379224824,
            "loss_tokens_lower_95": 2.442421827266928,
            "loss_tokens_upper_95": 2.6558201777242667,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8319653461619123,
            "data_time": 0.132045716047287,
            "batch_time": 0.3241037502884865,
            "samples_per_second": 342565.44958621525,
            "samples_per_second_per_gpu": 42820.68119827691,
            "loss_sequences_lower_95": 2.884522479917945,
            "loss_sequences_upper_95": 3.099786683989734,
            "loss_tokens_lower_95": 2.688107432903159,
            "loss_tokens_upper_95": 2.8188155605042837,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.733418211433458,
            "data_time": 0.13089412450790405,
            "batch_time": 0.3200242519378662,
            "samples_per_second": 339085.2171415842,
            "samples_per_second_per_gpu": 42385.652142698025,
            "loss_sequences_lower_95": 2.780540532651155,
            "loss_sequences_upper_95": 2.977721243911648,
            "loss_tokens_lower_95": 2.5971442958924396,
            "loss_tokens_upper_95": 2.725695241664229,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.293411975226751,
            "data_time": 0.1279832124710083,
            "batch_time": 0.32752443104982376,
            "samples_per_second": 341770.8208430804,
            "samples_per_second_per_gpu": 42721.35260538505,
            "loss_sequences_lower_95": 2.442660885322385,
            "loss_sequences_upper_95": 2.6685155775488876,
            "loss_tokens_lower_95": 2.1467445797543703,
            "loss_tokens_upper_95": 2.2452172159904307,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/checkpoints/epoch_7.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-1.0/params.txt",
    "uuid": "adde67a9-ce24-40e1-b5bd-f51895bf663c",
    "creation_date": "2023_12_13-16_57_56"
}