{
    "name": "c4_original-open_lm_1b-4.0",
    "dataset_name": "c4_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf1",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 115183616000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 1,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp",
            "--fsdp-limit-all-gathers"
        ],
        "chinchilla_multiplier": 4,
        "seed": 124
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--workers",
        "2",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--accum-freq",
        "1",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--logs",
        "logs/448",
        "--train-num-samples",
        "23036723200",
        "--dataset-manifest",
        "<scrub>/openlm/scrub/datasets/original_c4/manifest.jsonl",
        "--data-key",
        "txt",
        "--name",
        "c4_original-open_lm_1b-4.0",
        "--fsdp",
        "--fsdp-amp",
        "--fsdp-limit-all-gathers",
        "--val-data",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/paloma_val/00000001.tar",
        "--val-frequency",
        "5",
        "--val-data-key",
        "json",
        "txt",
        "json.gz",
        "--val-tok-ci",
        "--val-seq-ci",
        "--val-num-samples",
        "245760",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/openlm/scrub/experiments/1b_4x_c4_original/"
    ],
    "results": [
        {
            "loss": 3.2246621986230215,
            "data_time": 0.3084774315357208,
            "batch_time": 2.675518751144409,
            "samples_per_second": 159471.0616128212,
            "samples_per_second_per_gpu": 19933.88270160265,
            "loss_sequences_lower_95": 3.1071359062194825,
            "loss_sequences_upper_95": 3.34733341217041,
            "loss_tokens_lower_95": 3.2100239690144856,
            "loss_tokens_upper_95": 3.2394119453430172,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.472413073245337,
            "data_time": 0.004985833755223503,
            "batch_time": 0.5963476088782589,
            "samples_per_second": 439970.34516552376,
            "samples_per_second_per_gpu": 54996.29314569047,
            "loss_sequences_lower_95": 2.4695525783759225,
            "loss_sequences_upper_95": 2.475274709114225,
            "loss_tokens_lower_95": 2.4631761041666667,
            "loss_tokens_upper_95": 2.481734901041667,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2697616985866,
            "data_time": 0.10888956487178802,
            "batch_time": 0.7225517630577087,
            "samples_per_second": 366970.0094788998,
            "samples_per_second_per_gpu": 45871.25118486248,
            "loss_sequences_lower_95": 3.2488529625717475,
            "loss_sequences_upper_95": 3.2900819552674583,
            "loss_tokens_lower_95": 3.255260828125,
            "loss_tokens_upper_95": 3.2847950416666665,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.412572120145424,
            "data_time": 0.014794046157284788,
            "batch_time": 0.6005021270952726,
            "samples_per_second": 438673.29509197833,
            "samples_per_second_per_gpu": 54834.16188649729,
            "loss_sequences_lower_95": 2.4023906300338274,
            "loss_sequences_upper_95": 2.4229197406572163,
            "loss_tokens_lower_95": 2.4032374947916666,
            "loss_tokens_upper_95": 2.4217241979166664,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.475885019282945,
            "data_time": 0.12395358830690384,
            "batch_time": 0.7836640030145645,
            "samples_per_second": 378228.0447629368,
            "samples_per_second_per_gpu": 47278.5055953671,
            "loss_sequences_lower_95": 2.4399355327039047,
            "loss_sequences_upper_95": 2.510401770442178,
            "loss_tokens_lower_95": 2.4665486302083335,
            "loss_tokens_upper_95": 2.4857167291666666,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.991414949895572,
            "data_time": 0.03888398905595144,
            "batch_time": 0.6012891953190168,
            "samples_per_second": 427688.34003352514,
            "samples_per_second_per_gpu": 53461.04250419064,
            "loss_sequences_lower_95": 2.957087090321741,
            "loss_sequences_upper_95": 3.025644076494519,
            "loss_tokens_lower_95": 2.9794709270833333,
            "loss_tokens_upper_95": 3.002974125,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9390650437802686,
            "data_time": 0.013792432099580764,
            "batch_time": 0.5753192007541656,
            "samples_per_second": 436685.67911954515,
            "samples_per_second_per_gpu": 54585.709889943144,
            "loss_sequences_lower_95": 2.910118438097895,
            "loss_sequences_upper_95": 2.9684835927535076,
            "loss_tokens_lower_95": 2.923945317708333,
            "loss_tokens_upper_95": 2.954103171875,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.248246759844076,
            "data_time": 0.0146989947871158,
            "batch_time": 0.5906233724794889,
            "samples_per_second": 439021.2811369974,
            "samples_per_second_per_gpu": 54877.660142124674,
            "loss_sequences_lower_95": 3.2400914952143323,
            "loss_sequences_upper_95": 3.2566569862565444,
            "loss_tokens_lower_95": 3.2367342708333333,
            "loss_tokens_upper_95": 3.259695338541667,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9182862702423966,
            "data_time": 0.10685862600803375,
            "batch_time": 0.6699267849326134,
            "samples_per_second": 398783.34134915855,
            "samples_per_second_per_gpu": 49847.91766864482,
            "loss_sequences_lower_95": 2.873404166368934,
            "loss_sequences_upper_95": 2.9633302952215925,
            "loss_tokens_lower_95": 2.9073670052083336,
            "loss_tokens_upper_95": 2.9292232916666667,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.984993414445357,
            "data_time": 0.1073659285902977,
            "batch_time": 0.6858314275741577,
            "samples_per_second": 399500.2662150416,
            "samples_per_second_per_gpu": 49937.5332768802,
            "loss_sequences_lower_95": 3.949953626549762,
            "loss_sequences_upper_95": 4.0160298102458,
            "loss_tokens_lower_95": 3.9711666458333332,
            "loss_tokens_upper_95": 3.9986008854166664,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8139630460209824,
            "data_time": 0.010531839111755634,
            "batch_time": 0.5860429467826054,
            "samples_per_second": 441955.30433672434,
            "samples_per_second_per_gpu": 55244.41304209054,
            "loss_sequences_lower_95": 2.8066996710977112,
            "loss_sequences_upper_95": 2.8213136070474167,
            "loss_tokens_lower_95": 2.803748328125,
            "loss_tokens_upper_95": 2.824229223958333,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6045380046793642,
            "data_time": 0.023802998661994933,
            "batch_time": 0.5736578091979027,
            "samples_per_second": 433802.9196899238,
            "samples_per_second_per_gpu": 54225.36496124048,
            "loss_sequences_lower_95": 2.5957851729259946,
            "loss_sequences_upper_95": 2.613040297133681,
            "loss_tokens_lower_95": 2.594032901041667,
            "loss_tokens_upper_95": 2.614854265625,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.423226529154284,
            "data_time": 0.1067693680524826,
            "batch_time": 0.6995982900261879,
            "samples_per_second": 391909.4911174476,
            "samples_per_second_per_gpu": 48988.68638968095,
            "loss_sequences_lower_95": 3.38698134973615,
            "loss_sequences_upper_95": 3.4583198346182495,
            "loss_tokens_lower_95": 3.41062878125,
            "loss_tokens_upper_95": 3.436049859375,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.516448086002451,
            "data_time": 0.10499705374240875,
            "batch_time": 0.6671143621206284,
            "samples_per_second": 399242.58392539294,
            "samples_per_second_per_gpu": 49905.32299067412,
            "loss_sequences_lower_95": 2.462404693168438,
            "loss_sequences_upper_95": 2.5685220131806106,
            "loss_tokens_lower_95": 2.505990166666667,
            "loss_tokens_upper_95": 2.5275657552083333,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.6353896260261536,
            "data_time": 0.17261448502540588,
            "batch_time": 0.2883654087781906,
            "samples_per_second": 243777.22325999732,
            "samples_per_second_per_gpu": 30472.152907499665,
            "loss_sequences_lower_95": 3.5356268189170144,
            "loss_sequences_upper_95": 3.7448665272105823,
            "loss_tokens_lower_95": 3.6086913975802335,
            "loss_tokens_upper_95": 3.6624239574779165,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.031979519146177,
            "data_time": 0.10894151031970978,
            "batch_time": 0.5064426437020302,
            "samples_per_second": 386509.84198834724,
            "samples_per_second_per_gpu": 48313.730248543405,
            "loss_sequences_lower_95": 2.959856457056874,
            "loss_sequences_upper_95": 3.1030915997118713,
            "loss_tokens_lower_95": 3.019373869791667,
            "loss_tokens_upper_95": 3.0450525416666663,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.46920971253931,
            "data_time": 0.12396196275949478,
            "batch_time": 0.6342194303870201,
            "samples_per_second": 383935.5162050103,
            "samples_per_second_per_gpu": 47991.93952562629,
            "loss_sequences_lower_95": 5.404230911939314,
            "loss_sequences_upper_95": 5.530607150035043,
            "loss_tokens_lower_95": 5.455905541666667,
            "loss_tokens_upper_95": 5.48270725,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7535148800396527,
            "data_time": 0.1854398399591446,
            "batch_time": 0.4619716554880142,
            "samples_per_second": 323091.0553933754,
            "samples_per_second_per_gpu": 40386.381924171925,
            "loss_sequences_lower_95": 2.70582977670138,
            "loss_sequences_upper_95": 2.7965721943339363,
            "loss_tokens_lower_95": 2.7411418914794923,
            "loss_tokens_upper_95": 2.7657635047787528,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4241609906831063,
            "data_time": 0.007813267816196788,
            "batch_time": 0.5891278033906763,
            "samples_per_second": 445185.08673578984,
            "samples_per_second_per_gpu": 55648.13584197373,
            "loss_sequences_lower_95": 2.410168135381801,
            "loss_sequences_upper_95": 2.438153313497009,
            "loss_tokens_lower_95": 2.4102435369672235,
            "loss_tokens_upper_95": 2.4383293757121494,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.2460943940174434,
            "data_time": 0.00932135060429573,
            "batch_time": 0.5831498991698026,
            "samples_per_second": 442109.7388935798,
            "samples_per_second_per_gpu": 55263.717361697476,
            "loss_sequences_lower_95": 2.262690921761415,
            "loss_sequences_upper_95": 2.2860296860607328,
            "loss_tokens_lower_95": 2.2365945976290003,
            "loss_tokens_upper_95": 2.2531383149775195,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.1961480450618924,
            "data_time": 0.029715867506133184,
            "batch_time": 0.5648610707786348,
            "samples_per_second": 433591.6012143405,
            "samples_per_second_per_gpu": 54198.95015179256,
            "loss_sequences_lower_95": 2.7893674797306773,
            "loss_sequences_upper_95": 3.064402889923831,
            "loss_tokens_lower_95": 2.0597566725953804,
            "loss_tokens_upper_95": 2.244337665157032,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.312937648733457,
            "data_time": 0.023733004927635193,
            "batch_time": 0.5930038864413897,
            "samples_per_second": 438135.4930901821,
            "samples_per_second_per_gpu": 54766.93663627276,
            "loss_sequences_lower_95": 2.4556947672526044,
            "loss_sequences_upper_95": 2.6404786783854166,
            "loss_tokens_lower_95": 2.2408376695165093,
            "loss_tokens_upper_95": 2.37216401336478,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9876477643339086,
            "data_time": 0.044674488405386605,
            "batch_time": 0.5476667881011963,
            "samples_per_second": 419093.1866591845,
            "samples_per_second_per_gpu": 52386.648332398065,
            "loss_sequences_lower_95": 2.0719826976669116,
            "loss_sequences_upper_95": 2.1261584972413408,
            "loss_tokens_lower_95": 1.9637650727079503,
            "loss_tokens_upper_95": 1.9951454826997224,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4164788636294277,
            "data_time": 0.24226118624210358,
            "batch_time": 0.7467683702707291,
            "samples_per_second": 337185.04665892955,
            "samples_per_second_per_gpu": 42148.130832366194,
            "loss_sequences_lower_95": 2.406003480391069,
            "loss_sequences_upper_95": 2.569663044322621,
            "loss_tokens_lower_95": 2.3801876716340824,
            "loss_tokens_upper_95": 2.4392842071450165,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7217693747306355,
            "data_time": 0.23000510036945343,
            "batch_time": 0.7870937436819077,
            "samples_per_second": 348428.3658469248,
            "samples_per_second_per_gpu": 43553.5457308656,
            "loss_sequences_lower_95": 2.7218298588966836,
            "loss_sequences_upper_95": 2.913254581373565,
            "loss_tokens_lower_95": 2.6853194423874407,
            "loss_tokens_upper_95": 2.780421118610359,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.860835428237915,
            "data_time": 0.12433405965566635,
            "batch_time": 0.4712824150919914,
            "samples_per_second": 369554.26874785253,
            "samples_per_second_per_gpu": 46194.28359348157,
            "loss_sequences_lower_95": 2.8745152842203776,
            "loss_sequences_upper_95": 2.974487579345703,
            "loss_tokens_lower_95": 2.7642765606289594,
            "loss_tokens_upper_95": 2.9425619495519157,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1232332875072637,
            "data_time": 0.0064677208662033085,
            "batch_time": 0.5860070327296854,
            "samples_per_second": 444674.3865850124,
            "samples_per_second_per_gpu": 55584.29832312655,
            "loss_sequences_lower_95": 3.1646018416139707,
            "loss_sequences_upper_95": 3.239727808132597,
            "loss_tokens_lower_95": 3.0729438107709397,
            "loss_tokens_upper_95": 3.1486180957045247,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4744307653671163,
            "data_time": 0.027018579840660095,
            "batch_time": 0.5720642223954201,
            "samples_per_second": 431657.91793303133,
            "samples_per_second_per_gpu": 53957.23974162892,
            "loss_sequences_lower_95": 3.3657138040972883,
            "loss_sequences_upper_95": 3.6323624325119686,
            "loss_tokens_lower_95": 2.3731919679286584,
            "loss_tokens_upper_95": 2.4893227503583386,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.586960581890959,
            "data_time": 0.052839329838752745,
            "batch_time": 0.5897164046764374,
            "samples_per_second": 423031.1169879078,
            "samples_per_second_per_gpu": 52878.889623488474,
            "loss_sequences_lower_95": 3.1151587893124733,
            "loss_sequences_upper_95": 3.42462285793682,
            "loss_tokens_lower_95": 2.504459769343437,
            "loss_tokens_upper_95": 2.645745879152045,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.007857475106575,
            "data_time": 0.22948694229125977,
            "batch_time": 0.7324890494346619,
            "samples_per_second": 339168.81734163855,
            "samples_per_second_per_gpu": 42396.10216770482,
            "loss_sequences_lower_95": 5.927533906562143,
            "loss_sequences_upper_95": 6.08122964101295,
            "loss_tokens_lower_95": 5.929549627434717,
            "loss_tokens_upper_95": 6.08139811476616,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4270964896678926,
            "data_time": 0.20752662420272827,
            "batch_time": 0.4446664899587631,
            "samples_per_second": 291963.2655471702,
            "samples_per_second_per_gpu": 36495.40819339627,
            "loss_sequences_lower_95": 2.3395399284362792,
            "loss_sequences_upper_95": 2.6743859100341796,
            "loss_tokens_lower_95": 2.228004381865636,
            "loss_tokens_upper_95": 2.5987102419831034,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0890877393885545,
            "data_time": 0.03519413061439991,
            "batch_time": 0.5931061245501041,
            "samples_per_second": 432136.1999202942,
            "samples_per_second_per_gpu": 54017.02499003678,
            "loss_sequences_lower_95": 2.0658659607995618,
            "loss_sequences_upper_95": 2.1127807429771464,
            "loss_tokens_lower_95": 2.06507508698603,
            "loss_tokens_upper_95": 2.1128849275524564,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6230105174359095,
            "data_time": 0.05097776353359222,
            "batch_time": 0.6091702818870545,
            "samples_per_second": 424899.47531798936,
            "samples_per_second_per_gpu": 53112.43441474867,
            "loss_sequences_lower_95": 2.584632247437423,
            "loss_sequences_upper_95": 2.6611247044718724,
            "loss_tokens_lower_95": 2.5856496100148445,
            "loss_tokens_upper_95": 2.6620929289996673,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.397612333816595,
            "data_time": 0.032622916623950005,
            "batch_time": 0.5578807257115841,
            "samples_per_second": 427718.4370675762,
            "samples_per_second_per_gpu": 53464.80463344703,
            "loss_sequences_lower_95": 2.6878813800666483,
            "loss_sequences_upper_95": 2.8163410373559064,
            "loss_tokens_lower_95": 2.357007425678678,
            "loss_tokens_upper_95": 2.4057901069957945,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.100942585468292,
            "data_time": 0.11826501786708832,
            "batch_time": 0.6863248348236084,
            "samples_per_second": 398459.9741105826,
            "samples_per_second_per_gpu": 49807.496763822826,
            "loss_sequences_lower_95": 4.592657800292969,
            "loss_sequences_upper_95": 5.177908789062499,
            "loss_tokens_lower_95": 3.866601392845694,
            "loss_tokens_upper_95": 4.222384156808405,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.549670919775963,
            "data_time": 0.18358248472213745,
            "batch_time": 0.27218708395957947,
            "samples_per_second": 221472.186487313,
            "samples_per_second_per_gpu": 27684.023310914126,
            "loss_sequences_lower_95": 2.311948812007904,
            "loss_sequences_upper_95": 2.780357098579407,
            "loss_tokens_lower_95": 2.1589935390428567,
            "loss_tokens_upper_95": 2.868438005995476,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.749103959949537,
            "data_time": 0.2129925638437271,
            "batch_time": 0.6171577870845795,
            "samples_per_second": 327377.40930013027,
            "samples_per_second_per_gpu": 40922.17616251628,
            "loss_sequences_lower_95": 4.084371316844019,
            "loss_sequences_upper_95": 4.818010053963497,
            "loss_tokens_lower_95": 2.450393432479566,
            "loss_tokens_upper_95": 2.837348499319599,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9642351528295214,
            "data_time": 0.03260700901349386,
            "batch_time": 0.6108250717322031,
            "samples_per_second": 435761.0407042996,
            "samples_per_second_per_gpu": 54470.13008803745,
            "loss_sequences_lower_95": 1.940690680120959,
            "loss_sequences_upper_95": 1.987505967526417,
            "loss_tokens_lower_95": 1.9418784845234682,
            "loss_tokens_upper_95": 1.9873517397659322,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4432701167205735,
            "data_time": 0.015125096553847903,
            "batch_time": 0.5749268482128779,
            "samples_per_second": 438284.8124426387,
            "samples_per_second_per_gpu": 54785.601555329835,
            "loss_sequences_lower_95": 1.4486404451517927,
            "loss_sequences_upper_95": 1.5450414082019879,
            "loss_tokens_lower_95": 1.3902745273658232,
            "loss_tokens_upper_95": 1.4860372055893696,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4344326978201396,
            "data_time": 0.12334492802619934,
            "batch_time": 0.44141070544719696,
            "samples_per_second": 331705.7663699605,
            "samples_per_second_per_gpu": 41463.22079624506,
            "loss_sequences_lower_95": 2.3761359134436524,
            "loss_sequences_upper_95": 2.78919693943345,
            "loss_tokens_lower_95": 2.2607381787154543,
            "loss_tokens_upper_95": 2.5346646375065047,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.987010103374743,
            "data_time": 0.053471150994300845,
            "batch_time": 0.6305880963802337,
            "samples_per_second": 426212.089571397,
            "samples_per_second_per_gpu": 53276.511196424624,
            "loss_sequences_lower_95": 3.1076745229719567,
            "loss_sequences_upper_95": 3.2696276826564965,
            "loss_tokens_lower_95": 2.916201613453942,
            "loss_tokens_upper_95": 3.0542104634182214,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.837502458473531,
            "data_time": 0.2135511189699173,
            "batch_time": 0.5946542620658875,
            "samples_per_second": 324649.0215415743,
            "samples_per_second_per_gpu": 40581.127692696784,
            "loss_sequences_lower_95": 1.8628611680937976,
            "loss_sequences_upper_95": 2.2786595786490094,
            "loss_tokens_lower_95": 1.6986064568623827,
            "loss_tokens_upper_95": 1.9653008863937602,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.3730957639720542,
            "data_time": 0.009423805254753854,
            "batch_time": 0.5821861675166785,
            "samples_per_second": 440393.15111260506,
            "samples_per_second_per_gpu": 55049.14388907563,
            "loss_sequences_lower_95": 3.357547886530431,
            "loss_sequences_upper_95": 3.3878128067019655,
            "loss_tokens_lower_95": 3.3578030156812613,
            "loss_tokens_upper_95": 3.3881653674484897,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6082854479261972,
            "data_time": 0.19653013348579407,
            "batch_time": 0.4380907863378525,
            "samples_per_second": 298404.64573668985,
            "samples_per_second_per_gpu": 37300.58071708623,
            "loss_sequences_lower_95": 0.5923581104834104,
            "loss_sequences_upper_95": 0.6826146820216503,
            "loss_tokens_lower_95": 0.5223405133250304,
            "loss_tokens_upper_95": 0.6727971478152787,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.976823221774971,
            "data_time": 0.005207296709219615,
            "batch_time": 0.5887560880184174,
            "samples_per_second": 442205.1993849198,
            "samples_per_second_per_gpu": 55275.64992311497,
            "loss_sequences_lower_95": 3.5654050482344077,
            "loss_sequences_upper_95": 3.6010703391149113,
            "loss_tokens_lower_95": 2.9099247400870407,
            "loss_tokens_upper_95": 2.9449116960831723,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.16680278635025,
            "data_time": 0.0654769167304039,
            "batch_time": 0.6374513246119022,
            "samples_per_second": 419011.98538315354,
            "samples_per_second_per_gpu": 52376.49817289419,
            "loss_sequences_lower_95": 4.253613024902344,
            "loss_sequences_upper_95": 4.474934167480469,
            "loss_tokens_lower_95": 4.044520978344934,
            "loss_tokens_upper_95": 4.240653511395495,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.078117378898289,
            "data_time": 0.22813627123832703,
            "batch_time": 0.7564472854137421,
            "samples_per_second": 342389.4038751313,
            "samples_per_second_per_gpu": 42798.67548439141,
            "loss_sequences_lower_95": 2.0226580843718156,
            "loss_sequences_upper_95": 2.132352361264436,
            "loss_tokens_lower_95": 2.023050540426503,
            "loss_tokens_upper_95": 2.135534973144531,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.454790184714577,
            "data_time": 0.041393871108690895,
            "batch_time": 0.5456976443529129,
            "samples_per_second": 419301.9386193506,
            "samples_per_second_per_gpu": 52412.742327418826,
            "loss_sequences_lower_95": 5.389801006895123,
            "loss_sequences_upper_95": 5.518794315222538,
            "loss_tokens_lower_95": 5.389838839444248,
            "loss_tokens_upper_95": 5.519359463778408,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.6285235975583394,
            "data_time": 0.04324223349491755,
            "batch_time": 0.615000993013382,
            "samples_per_second": 428176.981622989,
            "samples_per_second_per_gpu": 53522.122702873625,
            "loss_sequences_lower_95": 0.6601137145996094,
            "loss_sequences_upper_95": 0.6861214680989584,
            "loss_tokens_lower_95": 0.6083902408619698,
            "loss_tokens_upper_95": 0.6440299244697879,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.240024716513497,
            "data_time": 0.2183884084224701,
            "batch_time": 0.7014541029930115,
            "samples_per_second": 338525.4127651309,
            "samples_per_second_per_gpu": 42315.676595641366,
            "loss_sequences_lower_95": 4.895602010091146,
            "loss_sequences_upper_95": 5.600145408993676,
            "loss_tokens_lower_95": 4.896249738420758,
            "loss_tokens_upper_95": 5.592949494861421,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4225094821304083,
            "data_time": 0.1713799387216568,
            "batch_time": 0.25969764590263367,
            "samples_per_second": 224011.16503810184,
            "samples_per_second_per_gpu": 28001.39562976273,
            "loss_sequences_lower_95": 1.2987400144338608,
            "loss_sequences_upper_95": 1.8665967255830764,
            "loss_tokens_lower_95": 1.1075087211058312,
            "loss_tokens_upper_95": 1.4518340277917605,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.944926582813263,
            "data_time": 0.06270810589194298,
            "batch_time": 0.6320052072405815,
            "samples_per_second": 421596.7023777455,
            "samples_per_second_per_gpu": 52699.587797218184,
            "loss_sequences_lower_95": 6.9215388671874996,
            "loss_sequences_upper_95": 7.2966560424804685,
            "loss_tokens_lower_95": 6.783773044644273,
            "loss_tokens_upper_95": 7.106918660275222,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.93292094707489,
            "data_time": 0.06245031580328941,
            "batch_time": 0.6314525678753853,
            "samples_per_second": 421903.89752034494,
            "samples_per_second_per_gpu": 52737.98719004312,
            "loss_sequences_lower_95": 6.120131921386719,
            "loss_sequences_upper_95": 6.319225146484375,
            "loss_tokens_lower_95": 5.825205875219079,
            "loss_tokens_upper_95": 6.018866727826114,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.7074737449720168,
            "data_time": 0.023188977191845577,
            "batch_time": 0.5902758166193962,
            "samples_per_second": 437463.3311724474,
            "samples_per_second_per_gpu": 54682.91639655593,
            "loss_sequences_lower_95": 3.666752982067654,
            "loss_sequences_upper_95": 3.7486873778887655,
            "loss_tokens_lower_95": 3.665533645737303,
            "loss_tokens_upper_95": 3.7485671163284655,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.086499424024661,
            "data_time": 0.08597059051195781,
            "batch_time": 0.5818254798650742,
            "samples_per_second": 410538.04246157245,
            "samples_per_second_per_gpu": 51317.255307696556,
            "loss_sequences_lower_95": 2.0393238214120704,
            "loss_sequences_upper_95": 2.135328536026306,
            "loss_tokens_lower_95": 2.0388511552239343,
            "loss_tokens_upper_95": 2.1343734483381938,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.465353039264679,
            "data_time": 0.061195146292448044,
            "batch_time": 0.6323456577956676,
            "samples_per_second": 420714.64201105083,
            "samples_per_second_per_gpu": 52589.330251381354,
            "loss_sequences_lower_95": 4.375161853027344,
            "loss_sequences_upper_95": 4.5586185668945305,
            "loss_tokens_lower_95": 4.376763330078125,
            "loss_tokens_upper_95": 4.5568572021484375,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7676385458900665,
            "data_time": 0.009109046487581162,
            "batch_time": 0.5847102747786612,
            "samples_per_second": 442116.7002569043,
            "samples_per_second_per_gpu": 55264.58753211304,
            "loss_sequences_lower_95": 2.5917973184720906,
            "loss_sequences_upper_95": 2.6796727175969726,
            "loss_tokens_lower_95": 1.6747692833081838,
            "loss_tokens_upper_95": 1.7286444614330503,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8905992005298387,
            "data_time": 0.13920091731207712,
            "batch_time": 0.4961724877357483,
            "samples_per_second": 330901.47694667155,
            "samples_per_second_per_gpu": 41362.684618333944,
            "loss_sequences_lower_95": 1.8442320240077688,
            "loss_sequences_upper_95": 1.9382134224051861,
            "loss_tokens_lower_95": 1.844436876097722,
            "loss_tokens_upper_95": 1.9396718608799266,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8598684306238211,
            "data_time": 0.1286863386631012,
            "batch_time": 0.7111549973487854,
            "samples_per_second": 394160.0711353888,
            "samples_per_second_per_gpu": 49270.0088919236,
            "loss_sequences_lower_95": 1.8340824201995252,
            "loss_sequences_upper_95": 1.8864577110140932,
            "loss_tokens_lower_95": 1.8339402142693015,
            "loss_tokens_upper_95": 1.886417957380706,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5562686102926082,
            "data_time": 0.010943084955215454,
            "batch_time": 0.5828804424963892,
            "samples_per_second": 439603.65995951113,
            "samples_per_second_per_gpu": 54950.45749493889,
            "loss_sequences_lower_95": 2.0863676499083987,
            "loss_sequences_upper_95": 2.166039522217133,
            "loss_tokens_lower_95": 1.4861756420530574,
            "loss_tokens_upper_95": 1.5377144413390862,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7814961930431386,
            "data_time": 0.22023119032382965,
            "batch_time": 0.6582066416740417,
            "samples_per_second": 330816.46978538635,
            "samples_per_second_per_gpu": 41352.05872317329,
            "loss_sequences_lower_95": 2.6500248661747685,
            "loss_sequences_upper_95": 2.9142201499333455,
            "loss_tokens_lower_95": 2.6491976662287637,
            "loss_tokens_upper_95": 2.9121078894882606,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1360536170661995,
            "data_time": 0.021896632818075325,
            "batch_time": 0.5966081000291384,
            "samples_per_second": 437556.0697019007,
            "samples_per_second_per_gpu": 54694.50871273759,
            "loss_sequences_lower_95": 3.1116704668984134,
            "loss_sequences_upper_95": 3.160716646060302,
            "loss_tokens_lower_95": 3.1112710692612766,
            "loss_tokens_upper_95": 3.1603226239965596,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8600218487017364,
            "data_time": 0.23194897174835205,
            "batch_time": 0.7090683430433273,
            "samples_per_second": 332430.68792328605,
            "samples_per_second_per_gpu": 41553.83599041076,
            "loss_sequences_lower_95": 1.8126694244088477,
            "loss_sequences_upper_95": 1.908028967403671,
            "loss_tokens_lower_95": 1.8118225690230583,
            "loss_tokens_upper_95": 1.9086627626882016,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0723992854356765,
            "data_time": 0.18043260276317596,
            "batch_time": 0.32944105565547943,
            "samples_per_second": 266335.18806990795,
            "samples_per_second_per_gpu": 33291.89850873849,
            "loss_sequences_lower_95": 0.9365048185984293,
            "loss_sequences_upper_95": 1.3538629976908367,
            "loss_tokens_lower_95": 0.8583124995231629,
            "loss_tokens_upper_95": 1.2783430046505397,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.8883772909641265,
            "data_time": 0.18889430165290833,
            "batch_time": 0.3380986154079437,
            "samples_per_second": 264412.246374393,
            "samples_per_second_per_gpu": 33051.530796799125,
            "loss_sequences_lower_95": 0.838224425315857,
            "loss_sequences_upper_95": 1.1774888451894123,
            "loss_tokens_lower_95": 0.6816917665888754,
            "loss_tokens_upper_95": 1.033143223537488,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.259225001918023,
            "data_time": 0.021114023668425425,
            "batch_time": 0.575163970036166,
            "samples_per_second": 436329.10327919,
            "samples_per_second_per_gpu": 54541.13790989875,
            "loss_sequences_lower_95": 4.237803079551731,
            "loss_sequences_upper_95": 4.281135983811211,
            "loss_tokens_lower_95": 4.23745330765602,
            "loss_tokens_upper_95": 4.281689668860456,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.29497618913948065,
            "data_time": 0.004692645702820158,
            "batch_time": 0.5916718513944784,
            "samples_per_second": 442481.8610894372,
            "samples_per_second_per_gpu": 55310.23263617965,
            "loss_sequences_lower_95": 0.42246677430246876,
            "loss_sequences_upper_95": 0.43435641439213907,
            "loss_tokens_lower_95": 0.28361160834666865,
            "loss_tokens_upper_95": 0.28959396177215585,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.817732212111706,
            "data_time": 0.20912815630435944,
            "batch_time": 0.4943315386772156,
            "samples_per_second": 319614.26267959643,
            "samples_per_second_per_gpu": 39951.782834949554,
            "loss_sequences_lower_95": 4.079603732852485,
            "loss_sequences_upper_95": 4.47132528710553,
            "loss_tokens_lower_95": 3.6574818336826023,
            "loss_tokens_upper_95": 3.8972687794575966,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.198479536417368,
            "data_time": 0.18375931680202484,
            "batch_time": 0.28268736600875854,
            "samples_per_second": 230942.35950957934,
            "samples_per_second_per_gpu": 28867.794938697418,
            "loss_sequences_lower_95": 5.824450580493823,
            "loss_sequences_upper_95": 6.770941492029138,
            "loss_tokens_lower_95": 5.387246233151283,
            "loss_tokens_upper_95": 6.769631939169801,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.6090230767319844,
            "data_time": 0.2131752371788025,
            "batch_time": 0.593609943985939,
            "samples_per_second": 324875.3431020777,
            "samples_per_second_per_gpu": 40609.417887759715,
            "loss_sequences_lower_95": 3.779660136525224,
            "loss_sequences_upper_95": 4.090787915485661,
            "loss_tokens_lower_95": 3.4502010289634146,
            "loss_tokens_upper_95": 3.647573188800988,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.659615917903621,
            "data_time": 0.20568984746932983,
            "batch_time": 0.5860971361398697,
            "samples_per_second": 327077.20498733304,
            "samples_per_second_per_gpu": 40884.65062341663,
            "loss_sequences_lower_95": 3.7933000890220083,
            "loss_sequences_upper_95": 4.0680584326023,
            "loss_tokens_lower_95": 3.522897836697036,
            "loss_tokens_upper_95": 3.688404714665548,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.8833355540182533,
            "data_time": 0.21674101054668427,
            "batch_time": 0.597313717007637,
            "samples_per_second": 324108.7296027844,
            "samples_per_second_per_gpu": 40513.59120034805,
            "loss_sequences_lower_95": 4.159753557530846,
            "loss_sequences_upper_95": 4.56449171857136,
            "loss_tokens_lower_95": 3.698950876210735,
            "loss_tokens_upper_95": 3.9659309619730267,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.706778719657805,
            "data_time": 0.2203231304883957,
            "batch_time": 0.6009649485349655,
            "samples_per_second": 323279.5411746579,
            "samples_per_second_per_gpu": 40409.942646832234,
            "loss_sequences_lower_95": 3.811300249797542,
            "loss_sequences_upper_95": 4.0718617695133865,
            "loss_tokens_lower_95": 3.5769656552703952,
            "loss_tokens_upper_95": 3.72850233416691,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.9511416684026304,
            "data_time": 0.2150983363389969,
            "batch_time": 0.5889262855052948,
            "samples_per_second": 322239.9514417429,
            "samples_per_second_per_gpu": 40279.99393021786,
            "loss_sequences_lower_95": 4.089315151427844,
            "loss_sequences_upper_95": 4.39774964137107,
            "loss_tokens_lower_95": 3.838925112880228,
            "loss_tokens_upper_95": 3.970873416477771,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.8099701535410997,
            "data_time": 0.22854536771774292,
            "batch_time": 0.6087930351495743,
            "samples_per_second": 321669.5752270875,
            "samples_per_second_per_gpu": 40208.696903385935,
            "loss_sequences_lower_95": 4.101496496433165,
            "loss_sequences_upper_95": 4.418509171648723,
            "loss_tokens_lower_95": 3.6762560512733575,
            "loss_tokens_upper_95": 3.8152039714718957,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/c4_original-open_lm_1b-4.0/params.txt",
    "uuid": "92f3b22f-6792-490f-8e0b-bf0d2426737f",
    "creation_date": "2024_01_24-21_14_59"
}