{
    "name": "rpj-open_lm_1b-32.0",
    "dataset_name": "rpj",
    "dataset_uuid": "67db6b77-c7c4-48ae-b431-57254587ed43",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 921468928000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 1,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp",
            "--fsdp-limit-all-gathers"
        ],
        "chinchilla_multiplier": 32.0,
        "seed": 124
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--workers",
        "2",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--accum-freq",
        "1",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--logs",
        "logs/11578",
        "--train-num-samples",
        "184293785600",
        "--dataset-manifest",
        "<scrub>/openlm/scrub/datasets/rpj/manifest.jsonl",
        "--data-key",
        "json.gz",
        "--name",
        "rpj-open_lm_1b-32.0",
        "--fsdp",
        "--fsdp-amp",
        "--fsdp-limit-all-gathers",
        "--val-data",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-data-key",
        "json",
        "txt",
        "--val-tok-ci",
        "--val-seq-ci",
        "--val-max-pop-ci",
        "300000",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/openlm/scrub/experiments/1b_32x_rpj-original/"
    ],
    "results": [
        {
            "loss": 1.964944910009702,
            "data_time": 0.3211192488670349,
            "batch_time": 2.0495457500219345,
            "samples_per_second": 165015.5837450794,
            "samples_per_second_per_gpu": 20626.947968134926,
            "loss_sequences_lower_95": 1.9031376107533773,
            "loss_sequences_upper_95": 2.023862336476644,
            "loss_tokens_lower_95": 1.954604393641154,
            "loss_tokens_upper_95": 1.9751269372304279,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.502053562117363,
            "data_time": 0.005180470722468865,
            "batch_time": 0.5947180263170019,
            "samples_per_second": 441314.8205500726,
            "samples_per_second_per_gpu": 55164.35256875907,
            "loss_sequences_lower_95": 2.4992918988385155,
            "loss_sequences_upper_95": 2.504789996610254,
            "loss_tokens_lower_95": 2.4926991145833335,
            "loss_tokens_upper_95": 2.511524661458333,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1743383923355415,
            "data_time": 0.13599322736263275,
            "batch_time": 0.749525360763073,
            "samples_per_second": 361083.4369816267,
            "samples_per_second_per_gpu": 45135.42962270334,
            "loss_sequences_lower_95": 2.1526570721061864,
            "loss_sequences_upper_95": 2.1957804465780453,
            "loss_tokens_lower_95": 2.1640731927083334,
            "loss_tokens_upper_95": 2.184409046875,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3689861165862722,
            "data_time": 0.01746362212457155,
            "batch_time": 0.6021439746806496,
            "samples_per_second": 438822.92021571746,
            "samples_per_second_per_gpu": 54852.86502696468,
            "loss_sequences_lower_95": 2.3579894843347295,
            "loss_sequences_upper_95": 2.3800017719072164,
            "loss_tokens_lower_95": 2.359770302083333,
            "loss_tokens_upper_95": 2.378162765625,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.509725903784913,
            "data_time": 0.13496361672878265,
            "batch_time": 0.6982705816626549,
            "samples_per_second": 391435.1894382526,
            "samples_per_second_per_gpu": 48929.398679781574,
            "loss_sequences_lower_95": 2.475706957411135,
            "loss_sequences_upper_95": 2.542717374657905,
            "loss_tokens_lower_95": 2.5001837187500002,
            "loss_tokens_upper_95": 2.519444171875,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3258704333626277,
            "data_time": 0.047026420632998146,
            "batch_time": 0.6093151494860649,
            "samples_per_second": 426114.81685235474,
            "samples_per_second_per_gpu": 53264.35210654434,
            "loss_sequences_lower_95": 2.2902053843380883,
            "loss_sequences_upper_95": 2.362121835134209,
            "loss_tokens_lower_95": 2.3164201458333333,
            "loss_tokens_upper_95": 2.335467942708333,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.2111714526950097,
            "data_time": 0.016492604464292526,
            "batch_time": 0.5790879666805268,
            "samples_per_second": 435152.91533017193,
            "samples_per_second_per_gpu": 54394.11441627149,
            "loss_sequences_lower_95": 1.1927946752431442,
            "loss_sequences_upper_95": 1.2293929044762435,
            "loss_tokens_lower_95": 1.2029257395833335,
            "loss_tokens_upper_95": 1.2193409661458332,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9662659946661347,
            "data_time": 0.01835250697637859,
            "batch_time": 0.5959199886572989,
            "samples_per_second": 436909.40741746937,
            "samples_per_second_per_gpu": 54613.67592718367,
            "loss_sequences_lower_95": 2.958248527486911,
            "loss_sequences_upper_95": 2.9742066426701568,
            "loss_tokens_lower_95": 2.9564277083333335,
            "loss_tokens_upper_95": 2.9763396927083337,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.726024273934403,
            "data_time": 0.13171962648630142,
            "batch_time": 0.6967286616563797,
            "samples_per_second": 391757.9769989619,
            "samples_per_second_per_gpu": 48969.747124870235,
            "loss_sequences_lower_95": 2.6811039808319834,
            "loss_sequences_upper_95": 2.7716921674526804,
            "loss_tokens_lower_95": 2.715876807291667,
            "loss_tokens_upper_95": 2.7364030364583334,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.3231362514345073,
            "data_time": 0.1331704705953598,
            "batch_time": 0.7139827683568001,
            "samples_per_second": 391703.6114657732,
            "samples_per_second_per_gpu": 48962.95143322165,
            "loss_sequences_lower_95": 3.289645108309659,
            "loss_sequences_upper_95": 3.3523698407199545,
            "loss_tokens_lower_95": 3.311418223958333,
            "loss_tokens_upper_95": 3.3348975468749997,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4493034305810375,
            "data_time": 0.012962103403847793,
            "batch_time": 0.5897963586552389,
            "samples_per_second": 440275.0931834136,
            "samples_per_second_per_gpu": 55034.3866479267,
            "loss_sequences_lower_95": 2.4415124245730264,
            "loss_sequences_upper_95": 2.4573820469884886,
            "loss_tokens_lower_95": 2.4400165,
            "loss_tokens_upper_95": 2.4587927916666668,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.304931102222028,
            "data_time": 0.03185774385929108,
            "batch_time": 0.582303287088871,
            "samples_per_second": 431595.4080808156,
            "samples_per_second_per_gpu": 53949.42601010195,
            "loss_sequences_lower_95": 2.2950581097394314,
            "loss_sequences_upper_95": 2.3145935643270383,
            "loss_tokens_lower_95": 2.2954569322916667,
            "loss_tokens_upper_95": 2.314172666666667,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.930558956177191,
            "data_time": 0.14340876787900925,
            "batch_time": 0.7099181786179543,
            "samples_per_second": 389695.3106657085,
            "samples_per_second_per_gpu": 48711.91383321356,
            "loss_sequences_lower_95": 2.899086354470398,
            "loss_sequences_upper_95": 2.960302746755356,
            "loss_tokens_lower_95": 2.920509489583333,
            "loss_tokens_upper_95": 2.9408953489583336,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2946585089028972,
            "data_time": 0.13528504967689514,
            "batch_time": 0.6986820250749588,
            "samples_per_second": 391356.65811561805,
            "samples_per_second_per_gpu": 48919.582264452256,
            "loss_sequences_lower_95": 2.23742258106861,
            "loss_sequences_upper_95": 2.349581057670888,
            "loss_tokens_lower_95": 2.284682447916667,
            "loss_tokens_upper_95": 2.3049992343750003,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7600239569490608,
            "data_time": 0.22262544929981232,
            "batch_time": 0.33804138004779816,
            "samples_per_second": 234547.05587434807,
            "samples_per_second_per_gpu": 29318.381984293508,
            "loss_sequences_lower_95": 2.705270173332908,
            "loss_sequences_upper_95": 2.8152215740897435,
            "loss_tokens_lower_95": 2.742161534049294,
            "loss_tokens_upper_95": 2.777942332354459,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8308175461632865,
            "data_time": 0.1317526251077652,
            "batch_time": 0.531542956829071,
            "samples_per_second": 379243.24701866135,
            "samples_per_second_per_gpu": 47405.40587733267,
            "loss_sequences_lower_95": 1.7599680330593455,
            "loss_sequences_upper_95": 1.8998128596269703,
            "loss_tokens_lower_95": 1.8223029739583332,
            "loss_tokens_upper_95": 1.83970553125,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.132473928003638,
            "data_time": 0.1337692216038704,
            "batch_time": 0.5666319504380226,
            "samples_per_second": 394674.91889904527,
            "samples_per_second_per_gpu": 49334.36486238066,
            "loss_sequences_lower_95": 5.075131660401034,
            "loss_sequences_upper_95": 5.185534345883492,
            "loss_tokens_lower_95": 5.12034078125,
            "loss_tokens_upper_95": 5.144661114583333,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3391034837628975,
            "data_time": 0.23044295608997345,
            "batch_time": 0.5079972147941589,
            "samples_per_second": 310531.5092739777,
            "samples_per_second_per_gpu": 38816.438659247215,
            "loss_sequences_lower_95": 2.286506852947298,
            "loss_sequences_upper_95": 2.3830969013151573,
            "loss_tokens_lower_95": 2.328659439086914,
            "loss_tokens_upper_95": 2.3492799852715165,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1520470373991496,
            "data_time": 0.008873902667652476,
            "batch_time": 0.5899350716309114,
            "samples_per_second": 445118.18455380364,
            "samples_per_second_per_gpu": 55639.773069225455,
            "loss_sequences_lower_95": 2.1424020175800726,
            "loss_sequences_upper_95": 2.161533031346808,
            "loss_tokens_lower_95": 2.1426199602921145,
            "loss_tokens_upper_95": 2.1613758195280233,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.307373331899402,
            "data_time": 0.011163568869233132,
            "batch_time": 0.585281740128994,
            "samples_per_second": 441411.69054305245,
            "samples_per_second_per_gpu": 55176.461317881556,
            "loss_sequences_lower_95": 2.31244377617911,
            "loss_sequences_upper_95": 2.3357824480930094,
            "loss_tokens_lower_95": 2.2980369612183704,
            "loss_tokens_upper_95": 2.315064091142497,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4569676175509414,
            "data_time": 0.03649907807509104,
            "batch_time": 0.5732726487848494,
            "samples_per_second": 430772.7067856165,
            "samples_per_second_per_gpu": 53846.588348202065,
            "loss_sequences_lower_95": 1.8941988145398485,
            "loss_sequences_upper_95": 2.121989931765175,
            "loss_tokens_lower_95": 1.3536054747997082,
            "loss_tokens_upper_95": 1.497527632623563,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7756317184766133,
            "data_time": 0.028447197129329044,
            "batch_time": 0.5988541481395563,
            "samples_per_second": 436374.42061579257,
            "samples_per_second_per_gpu": 54546.80257697407,
            "loss_sequences_lower_95": 1.8204902018229168,
            "loss_sequences_upper_95": 1.9791039184570312,
            "loss_tokens_lower_95": 1.7210787391902516,
            "loss_tokens_upper_95": 1.839574789946934,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6461431678268745,
            "data_time": 0.053316342333952584,
            "batch_time": 0.557384284834067,
            "samples_per_second": 416458.70492451685,
            "samples_per_second_per_gpu": 52057.338115564606,
            "loss_sequences_lower_95": 1.7349655787632083,
            "loss_sequences_upper_95": 1.780972218314656,
            "loss_tokens_lower_95": 1.6235632749985884,
            "loss_tokens_upper_95": 1.6520503936876876,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.3576220528645948,
            "data_time": 0.27693992853164673,
            "batch_time": 0.7822374552488327,
            "samples_per_second": 329189.4565877962,
            "samples_per_second_per_gpu": 41148.68207347453,
            "loss_sequences_lower_95": 1.3771296830610795,
            "loss_sequences_upper_95": 1.4550644475763495,
            "loss_tokens_lower_95": 1.3347380887521414,
            "loss_tokens_upper_95": 1.3715215440139072,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.102746764981017,
            "data_time": 0.2966814339160919,
            "batch_time": 0.8551881164312363,
            "samples_per_second": 333476.77723617747,
            "samples_per_second_per_gpu": 41684.597154522184,
            "loss_sequences_lower_95": 2.103354573152503,
            "loss_sequences_upper_95": 2.2500919015066962,
            "loss_tokens_lower_95": 2.069075855831077,
            "loss_tokens_upper_95": 2.1473459880934747,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.145634211699168,
            "data_time": 0.1462569534778595,
            "batch_time": 0.4934762045741081,
            "samples_per_second": 365526.8453227342,
            "samples_per_second_per_gpu": 45690.855665341776,
            "loss_sequences_lower_95": 2.153273427327474,
            "loss_sequences_upper_95": 2.23029389444987,
            "loss_tokens_lower_95": 2.062956416975288,
            "loss_tokens_upper_95": 2.2215401742602747,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8617706836299464,
            "data_time": 0.007402020320296287,
            "batch_time": 0.5854188602417707,
            "samples_per_second": 445597.8926712338,
            "samples_per_second_per_gpu": 55699.736583904225,
            "loss_sequences_lower_95": 2.895218818048447,
            "loss_sequences_upper_95": 2.9722199421934206,
            "loss_tokens_lower_95": 2.8116841090711455,
            "loss_tokens_upper_95": 2.889583513754838,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3097252806629798,
            "data_time": 0.03381433635950089,
            "batch_time": 0.5757787317037583,
            "samples_per_second": 432566.9562295655,
            "samples_per_second_per_gpu": 54070.869528695686,
            "loss_sequences_lower_95": 3.0110933400163744,
            "loss_sequences_upper_95": 3.2376274751091647,
            "loss_tokens_lower_95": 2.2212967681191818,
            "loss_tokens_upper_95": 2.3262903585962333,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4085299626959062,
            "data_time": 0.06156547963619232,
            "batch_time": 0.5968795776367187,
            "samples_per_second": 422408.7755281888,
            "samples_per_second_per_gpu": 52801.0969410236,
            "loss_sequences_lower_95": 2.863123673058207,
            "loss_sequences_upper_95": 3.1202802326085215,
            "loss_tokens_lower_95": 2.330296725002017,
            "loss_tokens_upper_95": 2.4533906870789974,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.324045934633578,
            "data_time": 0.2832113057374954,
            "batch_time": 0.7859661132097244,
            "samples_per_second": 327801.79308552213,
            "samples_per_second_per_gpu": 40975.22413569027,
            "loss_sequences_lower_95": 5.215296831522902,
            "loss_sequences_upper_95": 5.4315659196409465,
            "loss_tokens_lower_95": 5.216557890312856,
            "loss_tokens_upper_95": 5.42818939348334,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.461831023693085,
            "data_time": 0.24502240121364594,
            "batch_time": 0.4820467084646225,
            "samples_per_second": 284493.7320111937,
            "samples_per_second_per_gpu": 35561.71650139921,
            "loss_sequences_lower_95": 2.3666624641418457,
            "loss_sequences_upper_95": 2.70499169921875,
            "loss_tokens_lower_95": 2.2602964779985184,
            "loss_tokens_upper_95": 2.634154902203991,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5751663141997345,
            "data_time": 0.040330881252884865,
            "batch_time": 0.5966253019869328,
            "samples_per_second": 432316.8915134643,
            "samples_per_second_per_gpu": 54039.611439183034,
            "loss_sequences_lower_95": 1.5626090105382788,
            "loss_sequences_upper_95": 1.588308628359495,
            "loss_tokens_lower_95": 1.5623879002207013,
            "loss_tokens_upper_95": 1.588259312952797,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2219777529011195,
            "data_time": 0.0611067920923233,
            "batch_time": 0.6175555139780045,
            "samples_per_second": 424033.7599981915,
            "samples_per_second_per_gpu": 53004.21999977394,
            "loss_sequences_lower_95": 2.1910978250089577,
            "loss_sequences_upper_95": 2.2530410654128916,
            "loss_tokens_lower_95": 2.191132216644912,
            "loss_tokens_upper_95": 2.2525571680967187,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.488105634869896,
            "data_time": 0.04216626472771168,
            "batch_time": 0.5663009639829397,
            "samples_per_second": 426495.03523057175,
            "samples_per_second_per_gpu": 53311.87940382147,
            "loss_sequences_lower_95": 2.781046810710521,
            "loss_sequences_upper_95": 2.914195023994321,
            "loss_tokens_lower_95": 2.447136016389647,
            "loss_tokens_upper_95": 2.4966047970928558,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.061392969608307,
            "data_time": 0.14822669327259064,
            "batch_time": 0.7160931453108788,
            "samples_per_second": 392255.63989753614,
            "samples_per_second_per_gpu": 49031.95498719202,
            "loss_sequences_lower_95": 4.529366772460937,
            "loss_sequences_upper_95": 5.096013818359374,
            "loss_tokens_lower_95": 3.829793041135837,
            "loss_tokens_upper_95": 4.18181431869416,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3714605569839478,
            "data_time": 0.22673356533050537,
            "batch_time": 0.3151725083589554,
            "samples_per_second": 216033.14007429357,
            "samples_per_second_per_gpu": 27004.142509286696,
            "loss_sequences_lower_95": 2.1214750528335573,
            "loss_sequences_upper_95": 2.6207674980163573,
            "loss_tokens_lower_95": 1.9926369371085333,
            "loss_tokens_upper_95": 2.654001507813903,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.652212766395218,
            "data_time": 0.26220642030239105,
            "batch_time": 0.6658791303634644,
            "samples_per_second": 316846.02102602494,
            "samples_per_second_per_gpu": 39605.75262825312,
            "loss_sequences_lower_95": 3.9457429030845903,
            "loss_sequences_upper_95": 4.667938021955819,
            "loss_tokens_lower_95": 2.352715465799394,
            "loss_tokens_upper_95": 2.7445096296549116,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7434054929234148,
            "data_time": 0.036436243189705744,
            "batch_time": 0.6136943830384148,
            "samples_per_second": 435790.1932893889,
            "samples_per_second_per_gpu": 54473.77416117361,
            "loss_sequences_lower_95": 1.7231494481931247,
            "loss_sequences_upper_95": 1.763832590245638,
            "loss_tokens_lower_95": 1.7228901061078459,
            "loss_tokens_upper_95": 1.7639219671526867,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0999122553080458,
            "data_time": 0.017531577320325942,
            "batch_time": 0.5759905938591275,
            "samples_per_second": 438757.9934765895,
            "samples_per_second_per_gpu": 54844.74918457369,
            "loss_sequences_lower_95": 1.104628407356,
            "loss_sequences_upper_95": 1.1867799746696404,
            "loss_tokens_lower_95": 1.0540570733671506,
            "loss_tokens_upper_95": 1.1357052504386165,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.426259769624843,
            "data_time": 0.14356756210327148,
            "batch_time": 0.46141061931848526,
            "samples_per_second": 327945.60991067044,
            "samples_per_second_per_gpu": 40993.201238833804,
            "loss_sequences_lower_95": 2.3695794409447974,
            "loss_sequences_upper_95": 2.82042734334757,
            "loss_tokens_lower_95": 2.2587947312601178,
            "loss_tokens_upper_95": 2.5279197389389165,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.0318236879786826,
            "data_time": 0.061328154802322385,
            "batch_time": 0.6375709146261215,
            "samples_per_second": 425089.32228415564,
            "samples_per_second_per_gpu": 53136.165285519455,
            "loss_sequences_lower_95": 3.1528309907137926,
            "loss_sequences_upper_95": 3.318671738188758,
            "loss_tokens_lower_95": 2.960181923957808,
            "loss_tokens_upper_95": 3.0973277415321903,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5290328664023702,
            "data_time": 0.2825384885072708,
            "batch_time": 0.66397625207901,
            "samples_per_second": 308962.7884362026,
            "samples_per_second_per_gpu": 38620.34855452533,
            "loss_sequences_lower_95": 1.5104551454869712,
            "loss_sequences_upper_95": 1.8520900261111375,
            "loss_tokens_lower_95": 1.398462160493188,
            "loss_tokens_upper_95": 1.6300837605462377,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6668943995403085,
            "data_time": 0.01125064389459018,
            "batch_time": 0.5831513666433974,
            "samples_per_second": 440566.0765574062,
            "samples_per_second_per_gpu": 55070.75956967578,
            "loss_sequences_lower_95": 1.6614355976664084,
            "loss_sequences_upper_95": 1.6724542369411382,
            "loss_tokens_lower_95": 1.6613117057005151,
            "loss_tokens_upper_95": 1.6725394444123198,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.7714015274371916,
            "data_time": 0.25470970571041107,
            "batch_time": 0.496071994304657,
            "samples_per_second": 285285.6714730136,
            "samples_per_second_per_gpu": 35660.7089341267,
            "loss_sequences_lower_95": 0.7603440349541822,
            "loss_sequences_upper_95": 0.8693212638780909,
            "loss_tokens_lower_95": 0.6543722724719325,
            "loss_tokens_upper_95": 0.8625679726732424,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.975282896274166,
            "data_time": 0.005496984223524729,
            "batch_time": 0.5905772601564725,
            "samples_per_second": 441026.2218791005,
            "samples_per_second_per_gpu": 55128.27773488756,
            "loss_sequences_lower_95": 4.959506303639282,
            "loss_sequences_upper_95": 5.012135774944314,
            "loss_tokens_lower_95": 3.8676058933752415,
            "loss_tokens_upper_95": 3.9191309961315284,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.793185929059982,
            "data_time": 0.07771621271967888,
            "batch_time": 0.6515209041535854,
            "samples_per_second": 415419.75999265246,
            "samples_per_second_per_gpu": 51927.46999908156,
            "loss_sequences_lower_95": 3.907629333496094,
            "loss_sequences_upper_95": 4.121188671875,
            "loss_tokens_lower_95": 3.6689816975722644,
            "loss_tokens_upper_95": 3.867740045180177,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9672085720559824,
            "data_time": 0.2882167398929596,
            "batch_time": 0.8194108605384827,
            "samples_per_second": 328010.45811093727,
            "samples_per_second_per_gpu": 41001.30726386716,
            "loss_sequences_lower_95": 1.9290974492612094,
            "loss_sequences_upper_95": 2.0057537244713823,
            "loss_tokens_lower_95": 1.9305964096732762,
            "loss_tokens_upper_95": 2.0047299691905147,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.438965373147617,
            "data_time": 0.05509720494349798,
            "batch_time": 0.561083234846592,
            "samples_per_second": 415082.31559760653,
            "samples_per_second_per_gpu": 51885.28944970082,
            "loss_sequences_lower_95": 5.363606271454783,
            "loss_sequences_upper_95": 5.512434202252012,
            "loss_tokens_lower_95": 5.363423887310606,
            "loss_tokens_upper_95": 5.512614265210701,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0872889201641083,
            "data_time": 0.05404383937517802,
            "batch_time": 0.6274349341789881,
            "samples_per_second": 424728.70277323993,
            "samples_per_second_per_gpu": 53091.08784665499,
            "loss_sequences_lower_95": 1.167101377360026,
            "loss_sequences_upper_95": 1.2211231811523438,
            "loss_tokens_lower_95": 1.047252383375225,
            "loss_tokens_upper_95": 1.1172244288340334,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.53565939381009,
            "data_time": 0.27766360342502594,
            "batch_time": 0.7622204422950745,
            "samples_per_second": 324353.8685252423,
            "samples_per_second_per_gpu": 40544.23356565529,
            "loss_sequences_lower_95": 4.1891650826590405,
            "loss_sequences_upper_95": 4.904651373000372,
            "loss_tokens_lower_95": 4.18351800101144,
            "loss_tokens_upper_95": 4.890637076241629,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0828799195587635,
            "data_time": 0.2245246171951294,
            "batch_time": 0.3130076676607132,
            "samples_per_second": 216240.65711092448,
            "samples_per_second_per_gpu": 27030.08213886556,
            "loss_sequences_lower_95": 0.9537903860211372,
            "loss_sequences_upper_95": 1.376335173845291,
            "loss_tokens_lower_95": 0.8099717444980268,
            "loss_tokens_upper_95": 1.104750775209407,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.800106726169586,
            "data_time": 0.07953863590955734,
            "batch_time": 0.6505479626357555,
            "samples_per_second": 417005.4051618612,
            "samples_per_second_per_gpu": 52125.67564523265,
            "loss_sequences_lower_95": 6.812289587402344,
            "loss_sequences_upper_95": 7.150411730957031,
            "loss_tokens_lower_95": 6.65262257016048,
            "loss_tokens_upper_95": 6.943154747151279,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.339926495075225,
            "data_time": 0.07888749986886978,
            "batch_time": 0.6497599072754383,
            "samples_per_second": 417242.05384510953,
            "samples_per_second_per_gpu": 52155.25673063869,
            "loss_sequences_lower_95": 5.523125671386719,
            "loss_sequences_upper_95": 5.70696806640625,
            "loss_tokens_lower_95": 5.254549500031296,
            "loss_tokens_upper_95": 5.406674574850557,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9878179266514333,
            "data_time": 0.028362996876239777,
            "batch_time": 0.5967113288740317,
            "samples_per_second": 435367.06296631397,
            "samples_per_second_per_gpu": 54420.882870789246,
            "loss_sequences_lower_95": 1.9687868707179224,
            "loss_sequences_upper_95": 2.007517939384376,
            "loss_tokens_lower_95": 1.9689562263961928,
            "loss_tokens_upper_95": 2.00664668554753,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4462991727661976,
            "data_time": 0.10173760851224263,
            "batch_time": 0.5990679264068604,
            "samples_per_second": 406064.9515009679,
            "samples_per_second_per_gpu": 50758.11893762099,
            "loss_sequences_lower_95": 2.406853319862471,
            "loss_sequences_upper_95": 2.4866068428379413,
            "loss_tokens_lower_95": 2.4054463657549086,
            "loss_tokens_upper_95": 2.4872428694811464,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.20727054643631,
            "data_time": 0.07737145572900772,
            "batch_time": 0.650117177516222,
            "samples_per_second": 416200.72692148306,
            "samples_per_second_per_gpu": 52025.09086518538,
            "loss_sequences_lower_95": 5.093959936523438,
            "loss_sequences_upper_95": 5.324870385742187,
            "loss_tokens_lower_95": 5.096459399414063,
            "loss_tokens_upper_95": 5.320839892578125,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4905661962002996,
            "data_time": 0.010848809211027054,
            "batch_time": 0.5873722171499616,
            "samples_per_second": 441003.22632084176,
            "samples_per_second_per_gpu": 55125.40329010522,
            "loss_sequences_lower_95": 2.1070258267058897,
            "loss_sequences_upper_95": 2.1849641711506624,
            "loss_tokens_lower_95": 1.4123081658504137,
            "loss_tokens_upper_95": 1.4627294628193916,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8871361390868229,
            "data_time": 0.1679909655026027,
            "batch_time": 0.5249407802309308,
            "samples_per_second": 325740.33554114995,
            "samples_per_second_per_gpu": 40717.541942643744,
            "loss_sequences_lower_95": 1.8364011166700676,
            "loss_sequences_upper_95": 1.9382520362512388,
            "loss_tokens_lower_95": 1.8358440370702032,
            "loss_tokens_upper_95": 1.9379482667837569,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8915783737220015,
            "data_time": 0.1562657728791237,
            "batch_time": 0.7392511144280434,
            "samples_per_second": 388156.56450524565,
            "samples_per_second_per_gpu": 48519.57056315571,
            "loss_sequences_lower_95": 1.8595143905340457,
            "loss_sequences_upper_95": 1.9237019378063724,
            "loss_tokens_lower_95": 1.8598839853324143,
            "loss_tokens_upper_95": 1.923763667087929,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.1291079504566355,
            "data_time": 0.013266671914607286,
            "batch_time": 0.5859412988647819,
            "samples_per_second": 438537.61438699247,
            "samples_per_second_per_gpu": 54817.20179837406,
            "loss_sequences_lower_95": 1.3661205293181604,
            "loss_sequences_upper_95": 1.4369485198772,
            "loss_tokens_lower_95": 1.0815007335048759,
            "loss_tokens_upper_95": 1.1261491033396955,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7007290238425845,
            "data_time": 0.27526380121707916,
            "batch_time": 0.7147639989852905,
            "samples_per_second": 317668.23686999944,
            "samples_per_second_per_gpu": 39708.52960874993,
            "loss_sequences_lower_95": 1.664393260491588,
            "loss_sequences_upper_95": 1.7361958175739913,
            "loss_tokens_lower_95": 1.6647179739815847,
            "loss_tokens_upper_95": 1.735980184242208,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.047785943728339,
            "data_time": 0.026351582545500535,
            "batch_time": 0.6030879731361682,
            "samples_per_second": 435044.52990567696,
            "samples_per_second_per_gpu": 54380.56623820962,
            "loss_sequences_lower_95": 4.020927599985665,
            "loss_sequences_upper_95": 4.073908131450688,
            "loss_tokens_lower_95": 4.021395849758697,
            "loss_tokens_upper_95": 4.074610987672018,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8893479074089272,
            "data_time": 0.2872786521911621,
            "batch_time": 0.764705628156662,
            "samples_per_second": 320426.7672476513,
            "samples_per_second_per_gpu": 40053.34590595641,
            "loss_sequences_lower_95": 1.8352609504773778,
            "loss_sequences_upper_95": 1.9420483598431337,
            "loss_tokens_lower_95": 1.8370568618033696,
            "loss_tokens_upper_95": 1.9420143312620883,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0852180997530618,
            "data_time": 0.23306787014007568,
            "batch_time": 0.3822778910398483,
            "samples_per_second": 255596.16575635754,
            "samples_per_second_per_gpu": 31949.520719544693,
            "loss_sequences_lower_95": 0.9562899414698284,
            "loss_sequences_upper_95": 1.3818937842051187,
            "loss_tokens_lower_95": 0.8558430711428324,
            "loss_tokens_upper_95": 1.2611687739690143,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.7742755681276321,
            "data_time": 0.23863568902015686,
            "batch_time": 0.38775216042995453,
            "samples_per_second": 254994.16133356933,
            "samples_per_second_per_gpu": 31874.270166696166,
            "loss_sequences_lower_95": 0.7219648710886638,
            "loss_sequences_upper_95": 1.06141064008077,
            "loss_tokens_lower_95": 0.582966112286857,
            "loss_tokens_upper_95": 0.9167574271727142,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.017357056551724,
            "data_time": 0.024707725005490438,
            "batch_time": 0.5781880830015454,
            "samples_per_second": 436004.5634256829,
            "samples_per_second_per_gpu": 54500.57042821036,
            "loss_sequences_lower_95": 4.000859813662095,
            "loss_sequences_upper_95": 4.034028183680044,
            "loss_tokens_lower_95": 4.000021257133652,
            "loss_tokens_upper_95": 4.034469801350792,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.23434082227779124,
            "data_time": 0.004947079191041305,
            "batch_time": 0.5898447221543591,
            "samples_per_second": 443951.00837010046,
            "samples_per_second_per_gpu": 55493.87604626256,
            "loss_sequences_lower_95": 0.3840631718333276,
            "loss_sequences_upper_95": 0.3996903739907808,
            "loss_tokens_lower_95": 0.22133284290512165,
            "loss_tokens_upper_95": 0.22836016517399302,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.7233725593784662,
            "data_time": 0.2848297357559204,
            "batch_time": 0.569572925567627,
            "samples_per_second": 303049.7744000512,
            "samples_per_second_per_gpu": 37881.2218000064,
            "loss_sequences_lower_95": 0.6941222724013442,
            "loss_sequences_upper_95": 0.8430512856310747,
            "loss_tokens_lower_95": 0.6665161762504606,
            "loss_tokens_upper_95": 0.7443827401153932,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.8076974830111943,
            "data_time": 0.2326105833053589,
            "batch_time": 0.3318314552307129,
            "samples_per_second": 222685.2716774397,
            "samples_per_second_per_gpu": 27835.658959679964,
            "loss_sequences_lower_95": 3.3359451139295424,
            "loss_sequences_upper_95": 4.278513119671796,
            "loss_tokens_lower_95": 3.095077156726225,
            "loss_tokens_upper_95": 4.414127698356723,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.620301254093647,
            "data_time": 0.2669455111026764,
            "batch_time": 0.6470836251974106,
            "samples_per_second": 313738.37276440917,
            "samples_per_second_per_gpu": 39217.296595551146,
            "loss_sequences_lower_95": 0.6170763480954055,
            "loss_sequences_upper_95": 0.7390827295256824,
            "loss_tokens_lower_95": 0.5852086242213782,
            "loss_tokens_upper_95": 0.6485705515073065,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6617678374778934,
            "data_time": 0.26666586101055145,
            "batch_time": 0.6464231908321381,
            "samples_per_second": 313948.6966317203,
            "samples_per_second_per_gpu": 39243.587078965036,
            "loss_sequences_lower_95": 0.67969367445969,
            "loss_sequences_upper_95": 0.7921358852851682,
            "loss_tokens_lower_95": 0.626972860678763,
            "loss_tokens_upper_95": 0.681571511471832,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6434393365208696,
            "data_time": 0.28390292823314667,
            "batch_time": 0.6643244624137878,
            "samples_per_second": 309366.5371020961,
            "samples_per_second_per_gpu": 38670.81713776201,
            "loss_sequences_lower_95": 0.5834407283038627,
            "loss_sequences_upper_95": 0.711987074410043,
            "loss_tokens_lower_95": 0.6014030890878828,
            "loss_tokens_upper_95": 0.6838844075567629,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.7282547016696233,
            "data_time": 0.27381160855293274,
            "batch_time": 0.653631716966629,
            "samples_per_second": 311988.2635793891,
            "samples_per_second_per_gpu": 38998.532947423635,
            "loss_sequences_lower_95": 0.7299849638124791,
            "loss_sequences_upper_95": 0.8304349736469547,
            "loss_tokens_lower_95": 0.693352136582229,
            "loss_tokens_upper_95": 0.7468500202689958,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6921826871285527,
            "data_time": 0.27089977264404297,
            "batch_time": 0.6442825198173523,
            "samples_per_second": 310536.7575389778,
            "samples_per_second_per_gpu": 38817.09469237222,
            "loss_sequences_lower_95": 0.6838295433091821,
            "loss_sequences_upper_95": 0.7549777155337126,
            "loss_tokens_lower_95": 0.6643487879085624,
            "loss_tokens_upper_95": 0.7056895291020766,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6648931790415834,
            "data_time": 0.27303753793239594,
            "batch_time": 0.6533578187227249,
            "samples_per_second": 312358.9566837405,
            "samples_per_second_per_gpu": 39044.86958546756,
            "loss_sequences_lower_95": 0.7083288006666231,
            "loss_sequences_upper_95": 0.7853797842816608,
            "loss_tokens_lower_95": 0.629973403535168,
            "loss_tokens_upper_95": 0.6692926926753692,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-32.0/params.txt",
    "uuid": "34c2e23f-c52a-45b1-9090-faf4348156c0",
    "creation_date": "2024_02_04-09_58_11"
}