{
    "name": "rpj-open_lm_1b-1.0",
    "dataset_name": "rpj",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf6",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 28795904000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 2,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp"
        ],
        "chinchilla_multiplier": 1.0
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--train-num-samples",
        "5759180800",
        "--workers",
        "2",
        "--dataset-manifest",
        "<scrub>/rpj_tokenized_upsampled_eleutherai/manifest.jsonl",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--data-key",
        "json",
        "--accum-freq",
        "2",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--name",
        "rpj-open_lm_1b-1.0",
        "--logs",
        "/admin/<scrub>/scrub_logs",
        "--val-data",
        "/admin/<scrub>/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/admin/<scrub>/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-batch-size",
        "8",
        "--val-data-key",
        "json",
        "txt",
        "--val-num-samples",
        "245760",
        "--fsdp",
        "--fsdp-amp",
        "--report-to",
        "wandb",
        "--wandb-project-name",
        "scrub",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/scrub_experiments_v3"
    ],
    "results": [
        {
            "loss": 2.2233477155367534,
            "data_time": 0.21009579300880432,
            "batch_time": 1.999956488609314,
            "samples_per_second": 164000.34913031408,
            "samples_per_second_per_gpu": 20500.04364128926,
            "loss_sequences_lower_95": 2.1596716435750323,
            "loss_sequences_upper_95": 2.283709093729655,
            "loss_tokens_lower_95": 2.2124574979146323,
            "loss_tokens_upper_95": 2.2340333239237467,
            "sequences": 120,
            "tokens": 245760,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/openlm/shard_00000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.768756661738063,
            "data_time": 0.0026301132882485755,
            "batch_time": 0.29490159798314347,
            "samples_per_second": 445294.68899415835,
            "samples_per_second_per_gpu": 55661.836124269794,
            "loss_sequences_lower_95": 2.766092459543642,
            "loss_sequences_upper_95": 2.7715180932275674,
            "loss_tokens_lower_95": 2.7586985677083335,
            "loss_tokens_upper_95": 2.7788959739583334,
            "sequences": 84999,
            "tokens": 174077952,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/c4_val/shard-{0000000..0000010}.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3567956442735634,
            "data_time": 0.045916344970464706,
            "batch_time": 0.3481498770415783,
            "samples_per_second": 395687.5788220204,
            "samples_per_second_per_gpu": 49460.94735275255,
            "loss_sequences_lower_95": 2.333866078902264,
            "loss_sequences_upper_95": 2.3796234193140147,
            "loss_tokens_lower_95": 2.3464257395833332,
            "loss_tokens_upper_95": 2.3673040468749997,
            "sequences": 490,
            "tokens": 1003520,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_4chan_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6203906701766337,
            "data_time": 0.006199361070206291,
            "batch_time": 0.2949191743606015,
            "samples_per_second": 446375.55581358605,
            "samples_per_second_per_gpu": 55796.944476698256,
            "loss_sequences_lower_95": 2.6089051576594717,
            "loss_sequences_upper_95": 2.631674870127255,
            "loss_tokens_lower_95": 2.61072671875,
            "loss_tokens_upper_95": 2.6299094010416666,
            "sequences": 4850,
            "tokens": 9932800,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_100_domains/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.774335279484145,
            "data_time": 0.04319484159350395,
            "batch_time": 0.32183388620615005,
            "samples_per_second": 417937.4356604617,
            "samples_per_second_per_gpu": 52242.179457557715,
            "loss_sequences_lower_95": 2.740633813426839,
            "loss_sequences_upper_95": 2.8077076525406537,
            "loss_tokens_lower_95": 2.764416765625,
            "loss_tokens_upper_95": 2.784180276041667,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_c4_en/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.581883589642782,
            "data_time": 0.01617444058259328,
            "batch_time": 0.29608234266440075,
            "samples_per_second": 434755.9710517837,
            "samples_per_second_per_gpu": 54344.49638147296,
            "loss_sequences_lower_95": 2.543711993063817,
            "loss_sequences_upper_95": 2.620179272411147,
            "loss_tokens_lower_95": 2.57179784375,
            "loss_tokens_upper_95": 2.5919552916666664,
            "sequences": 1471,
            "tokens": 3012608,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma-v1_5/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.387048371276077,
            "data_time": 0.0062285757217651755,
            "batch_time": 0.2902876826433035,
            "samples_per_second": 445493.76107216266,
            "samples_per_second_per_gpu": 55686.72013402033,
            "loss_sequences_lower_95": 1.367209397919324,
            "loss_sequences_upper_95": 1.4067813197544643,
            "loss_tokens_lower_95": 1.37817690625,
            "loss_tokens_upper_95": 1.3957735260416666,
            "sequences": 4900,
            "tokens": 10035200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_programing_languages/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.206562415966813,
            "data_time": 0.006589187995383614,
            "batch_time": 0.29112495325113597,
            "samples_per_second": 444684.18332415586,
            "samples_per_second_per_gpu": 55585.52291551948,
            "loss_sequences_lower_95": 3.1984563103321335,
            "loss_sequences_upper_95": 3.2144969987320025,
            "loss_tokens_lower_95": 3.196294458333333,
            "loss_tokens_upper_95": 3.216828640625,
            "sequences": 4775,
            "tokens": 9779200,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_dolma_100_subreddits/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.9668423979262997,
            "data_time": 0.04307296872138977,
            "batch_time": 0.321941863745451,
            "samples_per_second": 418500.8612341942,
            "samples_per_second_per_gpu": 52312.607654274274,
            "loss_sequences_lower_95": 2.9223885264823104,
            "loss_sequences_upper_95": 3.0134359282206713,
            "loss_tokens_lower_95": 2.9562344375,
            "loss_tokens_upper_95": 2.9774636354166666,
            "sequences": 492,
            "tokens": 1007616,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_falcon-refinedweb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.616389826352417,
            "data_time": 0.042179644107818604,
            "batch_time": 0.3277665264904499,
            "samples_per_second": 421183.0187576772,
            "samples_per_second_per_gpu": 52647.87734470965,
            "loss_sequences_lower_95": 3.581410410281698,
            "loss_sequences_upper_95": 3.646834886875077,
            "loss_tokens_lower_95": 3.60442709375,
            "loss_tokens_upper_95": 3.6282279791666667,
            "sequences": 506,
            "tokens": 1036288,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_gab/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.705168423333102,
            "data_time": 0.004949556384133614,
            "batch_time": 0.29409502474397325,
            "samples_per_second": 447398.23634841276,
            "samples_per_second_per_gpu": 55924.779543551595,
            "loss_sequences_lower_95": 2.697463339760775,
            "loss_sequences_upper_95": 2.7130068368743148,
            "loss_tokens_lower_95": 2.69549353125,
            "loss_tokens_upper_95": 2.7148178333333335,
            "sequences": 7297,
            "tokens": 14944256,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_s2orc_unsplit_dedup/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.60271462417056,
            "data_time": 0.011102237199482164,
            "batch_time": 0.3000418426174867,
            "samples_per_second": 441616.74193915556,
            "samples_per_second_per_gpu": 55202.092742394445,
            "loss_sequences_lower_95": 2.5931366081190133,
            "loss_sequences_upper_95": 2.611944791070127,
            "loss_tokens_lower_95": 2.5929055260416667,
            "loss_tokens_upper_95": 2.612531677083333,
            "sequences": 2401,
            "tokens": 4917248,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_m2d2_wikipedia_unsplit/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1639981792132703,
            "data_time": 0.04987405240535736,
            "batch_time": 0.34416720643639565,
            "samples_per_second": 413222.8845295822,
            "samples_per_second_per_gpu": 51652.86056619778,
            "loss_sequences_lower_95": 3.131328287182667,
            "loss_sequences_upper_95": 3.1945795952911067,
            "loss_tokens_lower_95": 3.1536713854166667,
            "loss_tokens_upper_95": 3.1744211041666666,
            "sequences": 493,
            "tokens": 1009664,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_manosphere_meta_sep/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.528011455069734,
            "data_time": 0.0434257946908474,
            "batch_time": 0.321614645421505,
            "samples_per_second": 418510.16732468526,
            "samples_per_second_per_gpu": 52313.77091558566,
            "loss_sequences_lower_95": 2.4701583769073796,
            "loss_sequences_upper_95": 2.5854725637649567,
            "loss_tokens_lower_95": 2.5176380260416664,
            "loss_tokens_upper_95": 2.538541203125,
            "sequences": 491,
            "tokens": 1005568,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_mc4/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1090340397574683,
            "data_time": 0.14811253547668457,
            "batch_time": 0.2637506425380707,
            "samples_per_second": 249951.44791633677,
            "samples_per_second_per_gpu": 31243.930989542096,
            "loss_sequences_lower_95": 3.053137770566073,
            "loss_sequences_upper_95": 3.1650502638383347,
            "loss_tokens_lower_95": 3.090330860831521,
            "loss_tokens_upper_95": 3.128221754594283,
            "sequences": 44,
            "tokens": 90112,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_ptb/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.211243706958982,
            "data_time": 0.05665086706479391,
            "batch_time": 0.3192608157793681,
            "samples_per_second": 401397.4243643415,
            "samples_per_second_per_gpu": 50174.678045542685,
            "loss_sequences_lower_95": 2.126518916805701,
            "loss_sequences_upper_95": 2.2947278748448316,
            "loss_tokens_lower_95": 2.2018848541666665,
            "loss_tokens_upper_95": 2.2207423333333334,
            "sequences": 343,
            "tokens": 702464,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_redpajama/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.269906581234491,
            "data_time": 0.05320223172505697,
            "batch_time": 0.3381320635477702,
            "samples_per_second": 412557.63761465205,
            "samples_per_second_per_gpu": 51569.704701831506,
            "loss_sequences_lower_95": 5.210168819377165,
            "loss_sequences_upper_95": 5.324372312198528,
            "loss_tokens_lower_95": 5.258262447916667,
            "loss_tokens_upper_95": 5.28172809375,
            "sequences": 379,
            "tokens": 776192,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_twitterAAE_HELM_fixed/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6836230481257206,
            "data_time": 0.1615321785211563,
            "batch_time": 0.4391207695007324,
            "samples_per_second": 330549.7892027058,
            "samples_per_second_per_gpu": 41318.72365033822,
            "loss_sequences_lower_95": 2.6248490755675267,
            "loss_sequences_upper_95": 2.731251969884654,
            "loss_tokens_lower_95": 2.6726176652751983,
            "loss_tokens_upper_95": 2.694432530637647,
            "sequences": 122,
            "tokens": 249856,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/paloma_wikitext_103/00000001.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0591402071745812,
            "data_time": 0.003742165457118641,
            "batch_time": 0.29058466242118314,
            "samples_per_second": 451242.84169025713,
            "samples_per_second_per_gpu": 56405.35521128214,
            "loss_sequences_lower_95": 2.048276525751985,
            "loss_sequences_upper_95": 2.069907343399489,
            "loss_tokens_lower_95": 2.0483908138864657,
            "loss_tokens_upper_95": 2.070081336626282,
            "sequences": 14042,
            "tokens": 14042,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/mmlu/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.547796541085592,
            "data_time": 0.004362730663033981,
            "batch_time": 0.29119406885738613,
            "samples_per_second": 448713.32330180466,
            "samples_per_second_per_gpu": 56089.16541272558,
            "loss_sequences_lower_95": 2.5459025121832055,
            "loss_sequences_upper_95": 2.5706439010841966,
            "loss_tokens_lower_95": 2.537256815164713,
            "loss_tokens_upper_95": 2.555443319519961,
            "sequences": 10042,
            "tokens": 291143,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/hellaswag/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0894166289718297,
            "data_time": 0.012889590333489812,
            "batch_time": 0.2924195186180227,
            "samples_per_second": 444433.205471024,
            "samples_per_second_per_gpu": 55554.150683878,
            "loss_sequences_lower_95": 2.577933666173388,
            "loss_sequences_upper_95": 2.83665103759189,
            "loss_tokens_lower_95": 1.9172944082315122,
            "loss_tokens_upper_95": 2.0895489547035084,
            "sequences": 2117,
            "tokens": 4197,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/jeopardy_all/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5621778871417047,
            "data_time": 0.009708194682995478,
            "batch_time": 0.2905012934158246,
            "samples_per_second": 446182.58822218236,
            "samples_per_second_per_gpu": 55772.823527772794,
            "loss_sequences_lower_95": 2.7129166341145834,
            "loss_sequences_upper_95": 2.90224365234375,
            "loss_tokens_lower_95": 2.475389126375786,
            "loss_tokens_upper_95": 2.6098927500982705,
            "sequences": 3000,
            "tokens": 7950,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/triviaqa_sm_sub/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0823735797649263,
            "data_time": 0.01919110661203211,
            "batch_time": 0.29010056907480414,
            "samples_per_second": 434299.70655713795,
            "samples_per_second_per_gpu": 54287.463319642244,
            "loss_sequences_lower_95": 2.165004153537244,
            "loss_sequences_upper_95": 2.214064212130994,
            "loss_tokens_lower_95": 2.054259836016399,
            "loss_tokens_upper_95": 2.0822609515656763,
            "sequences": 1319,
            "tokens": 123972,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/gsm8k/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6307042988863858,
            "data_time": 0.09939922392368317,
            "batch_time": 0.3541770428419113,
            "samples_per_second": 372166.08586962434,
            "samples_per_second_per_gpu": 46520.76073370304,
            "loss_sequences_lower_95": 1.6436465662175959,
            "loss_sequences_upper_95": 1.7314249108054423,
            "loss_tokens_lower_95": 1.6010320443981154,
            "loss_tokens_upper_95": 1.6392664217600523,
            "sequences": 220,
            "tokens": 49615,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_math/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.505591638234197,
            "data_time": 0.09726261347532272,
            "batch_time": 0.3726550415158272,
            "samples_per_second": 387915.6606845931,
            "samples_per_second_per_gpu": 48489.457585574135,
            "loss_sequences_lower_95": 2.515398995535714,
            "loss_sequences_upper_95": 2.6712533382493624,
            "loss_tokens_lower_95": 2.4520583186992213,
            "loss_tokens_upper_95": 2.534003681448883,
            "sequences": 245,
            "tokens": 14770,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/aqua/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3782536872227986,
            "data_time": 0.06402574479579926,
            "batch_time": 0.2927168806393941,
            "samples_per_second": 391013.52910088125,
            "samples_per_second_per_gpu": 48876.691137610156,
            "loss_sequences_lower_95": 2.3830824330647786,
            "loss_sequences_upper_95": 2.4652166900634764,
            "loss_tokens_lower_95": 2.2991240172509944,
            "loss_tokens_upper_95": 2.4458195595570342,
            "sequences": 300,
            "tokens": 3236,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/svamp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.9767007775079324,
            "data_time": 0.0032792496231367004,
            "batch_time": 0.291001687822102,
            "samples_per_second": 450760.42696119414,
            "samples_per_second_per_gpu": 56345.05337014927,
            "loss_sequences_lower_95": 4.026034980362064,
            "loss_sequences_upper_95": 4.102745264673859,
            "loss_tokens_lower_95": 3.9095001907490805,
            "loss_tokens_upper_95": 3.9867904675480195,
            "sequences": 20321,
            "tokens": 20929,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_qa_wikidata/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7833806358603916,
            "data_time": 0.01186456256791165,
            "batch_time": 0.29496511424842636,
            "samples_per_second": 441630.6345301938,
            "samples_per_second_per_gpu": 55203.82931627423,
            "loss_sequences_lower_95": 3.619439460934212,
            "loss_sequences_upper_95": 3.893391546898017,
            "loss_tokens_lower_95": 2.6280461310474146,
            "loss_tokens_upper_95": 2.7468971134639673,
            "sequences": 2376,
            "tokens": 8808,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_easy/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.808992508023265,
            "data_time": 0.020716971158981322,
            "batch_time": 0.28617909997701646,
            "samples_per_second": 426243.5400448364,
            "samples_per_second_per_gpu": 53280.44250560455,
            "loss_sequences_lower_95": 3.35001081134679,
            "loss_sequences_upper_95": 3.6599924862181368,
            "loss_tokens_lower_95": 2.698013976282672,
            "loss_tokens_upper_95": 2.8363102418370847,
            "sequences": 1172,
            "tokens": 6198,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/arc_challenge/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 6.0901160044212865,
            "data_time": 0.09535536170005798,
            "batch_time": 0.3492366001009941,
            "samples_per_second": 372625.6783988137,
            "samples_per_second_per_gpu": 46578.20979985171,
            "loss_sequences_lower_95": 5.996243244327911,
            "loss_sequences_upper_95": 6.182740484089612,
            "loss_tokens_lower_95": 5.99611148921322,
            "loss_tokens_upper_95": 6.18362547992027,
            "sequences": 219,
            "tokens": 219,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_misconceptions/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7044754707813263,
            "data_time": 0.1751956194639206,
            "batch_time": 0.41291478276252747,
            "samples_per_second": 300200.89082789706,
            "samples_per_second_per_gpu": 37525.11135348713,
            "loss_sequences_lower_95": 2.6220987854003903,
            "loss_sequences_upper_95": 2.965617965698242,
            "loss_tokens_lower_95": 2.4927326127326976,
            "loss_tokens_upper_95": 2.890187452176389,
            "sequences": 100,
            "tokens": 559,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/copa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5141862881586277,
            "data_time": 0.013306663371622562,
            "batch_time": 0.2889016168192029,
            "samples_per_second": 439400.58802055573,
            "samples_per_second_per_gpu": 54925.07350256947,
            "loss_sequences_lower_95": 1.4903680088820208,
            "loss_sequences_upper_95": 1.5373550524365085,
            "loss_tokens_lower_95": 1.4907383242307448,
            "loss_tokens_upper_95": 1.5379372072561492,
            "sequences": 1954,
            "tokens": 1954,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/siqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8515955559256427,
            "data_time": 0.021569956839084626,
            "batch_time": 0.2971331253647804,
            "samples_per_second": 435975.426586409,
            "samples_per_second_per_gpu": 54496.92832330112,
            "loss_sequences_lower_95": 1.8289518389127764,
            "loss_sequences_upper_95": 1.8739015321473818,
            "loss_tokens_lower_95": 1.8289072097665848,
            "loss_tokens_upper_95": 1.87403266525581,
            "sequences": 1221,
            "tokens": 1221,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/commonsense_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7896710704278376,
            "data_time": 0.013853447635968526,
            "batch_time": 0.2897220363219579,
            "samples_per_second": 440862.4829317893,
            "samples_per_second_per_gpu": 55107.81036647366,
            "loss_sequences_lower_95": 3.044617316875935,
            "loss_sequences_upper_95": 3.1748630395003063,
            "loss_tokens_lower_95": 2.7249215503673434,
            "loss_tokens_upper_95": 2.777110811401725,
            "sequences": 1838,
            "tokens": 39949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/piqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.412440970420837,
            "data_time": 0.048591990023851395,
            "batch_time": 0.3285839110612869,
            "samples_per_second": 421869.6095915877,
            "samples_per_second_per_gpu": 52733.70119894846,
            "loss_sequences_lower_95": 4.853659521484375,
            "loss_sequences_upper_95": 5.426503027343751,
            "loss_tokens_lower_95": 4.126950200482917,
            "loss_tokens_upper_95": 4.489826388709361,
            "sequences": 500,
            "tokens": 1511,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/openbook_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.8871065378189087,
            "data_time": 0.14963442087173462,
            "batch_time": 0.24102309346199036,
            "samples_per_second": 227268.0373884244,
            "samples_per_second_per_gpu": 28408.50467355305,
            "loss_sequences_lower_95": 2.649208217859268,
            "loss_sequences_upper_95": 3.1162869453430178,
            "loss_tokens_lower_95": 2.4438595388127466,
            "loss_tokens_upper_95": 3.2187771676600665,
            "sequences": 32,
            "tokens": 174,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_novel_concepts/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.344803572386161,
            "data_time": 0.09553264826536179,
            "batch_time": 0.29773707687854767,
            "samples_per_second": 361389.07705044857,
            "samples_per_second_per_gpu": 45173.63463130607,
            "loss_sequences_lower_95": 4.453783223820829,
            "loss_sequences_upper_95": 5.240367100156587,
            "loss_tokens_lower_95": 2.7034091609709696,
            "loss_tokens_upper_95": 3.123794180645346,
            "sequences": 174,
            "tokens": 887,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strange_stories/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.379423148978897,
            "data_time": 0.011960248152414957,
            "batch_time": 0.2965082873900731,
            "samples_per_second": 447185.51187517634,
            "samples_per_second_per_gpu": 55898.18898439704,
            "loss_sequences_lower_95": 2.3556559298120767,
            "loss_sequences_upper_95": 2.4027093743173875,
            "loss_tokens_lower_95": 2.3555465678243777,
            "loss_tokens_upper_95": 2.402747579285441,
            "sequences": 2289,
            "tokens": 2289,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_strategy_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.5861379019952204,
            "data_time": 0.006602731783215593,
            "batch_time": 0.2887504344306341,
            "samples_per_second": 448322.27616924956,
            "samples_per_second_per_gpu": 56040.284521156194,
            "loss_sequences_lower_95": 1.5961090958467763,
            "loss_sequences_upper_95": 1.7010324026204395,
            "loss_tokens_lower_95": 1.5216238947363174,
            "loss_tokens_upper_95": 1.624180557968579,
            "sequences": 5153,
            "tokens": 5486,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/lambada_openai/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6475170036375304,
            "data_time": 0.061441476146380104,
            "batch_time": 0.27015016973018646,
            "samples_per_second": 367485.4519655245,
            "samples_per_second_per_gpu": 45935.681495690565,
            "loss_sequences_lower_95": 2.5973004351605424,
            "loss_sequences_upper_95": 3.021072840428614,
            "loss_tokens_lower_95": 2.4747506705397924,
            "loss_tokens_upper_95": 2.746307389985401,
            "sequences": 273,
            "tokens": 1081,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winograd_wsc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2193975173871916,
            "data_time": 0.020756402611732484,
            "batch_time": 0.30456694811582563,
            "samples_per_second": 440908.77258642577,
            "samples_per_second_per_gpu": 55113.59657330322,
            "loss_sequences_lower_95": 3.337024368972598,
            "loss_sequences_upper_95": 3.496877437552412,
            "loss_tokens_lower_95": 3.134597381781602,
            "loss_tokens_upper_95": 3.2754641503561523,
            "sequences": 1267,
            "tokens": 5949,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogrande/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0971885054576687,
            "data_time": 0.09049316495656967,
            "batch_time": 0.28248684853315353,
            "samples_per_second": 353310.1486163453,
            "samples_per_second_per_gpu": 44163.76857704316,
            "loss_sequences_lower_95": 2.0569674422101274,
            "loss_sequences_upper_95": 2.46548220006431,
            "loss_tokens_lower_95": 1.9068081679974256,
            "loss_tokens_upper_95": 2.2047980652353307,
            "sequences": 164,
            "tokens": 1226,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conlang_translation/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.2444281754052073,
            "data_time": 0.0043063851430563465,
            "batch_time": 0.28904148648166805,
            "samples_per_second": 449675.0094044876,
            "samples_per_second_per_gpu": 56209.37617556095,
            "loss_sequences_lower_95": 3.2292437149539284,
            "loss_sequences_upper_95": 3.2594032917911706,
            "loss_tokens_lower_95": 3.2293709327803057,
            "loss_tokens_upper_95": 3.259498511030331,
            "sequences": 9998,
            "tokens": 9998,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_language_identification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.9141914184811046,
            "data_time": 0.18620552122592926,
            "batch_time": 0.4276329427957535,
            "samples_per_second": 301001.55996340286,
            "samples_per_second_per_gpu": 37625.19499542536,
            "loss_sequences_lower_95": 0.8915617396530596,
            "loss_sequences_upper_95": 1.058322865754655,
            "loss_tokens_lower_95": 0.7786956443513307,
            "loss_tokens_upper_95": 1.0144459610219623,
            "sequences": 103,
            "tokens": 977,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_conceptual_combinations/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7813174349363243,
            "data_time": 0.002705401949659239,
            "batch_time": 0.29114126909935356,
            "samples_per_second": 449044.836206952,
            "samples_per_second_per_gpu": 56130.604525869,
            "loss_sequences_lower_95": 3.3105774616745283,
            "loss_sequences_upper_95": 3.3462959977315907,
            "loss_tokens_lower_95": 2.6640994983075434,
            "loss_tokens_upper_95": 2.6983382616054157,
            "sequences": 38160,
            "tokens": 64625,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_elementary_math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 4.930562160253524,
            "data_time": 0.024303534999489784,
            "batch_time": 0.30759939551353455,
            "samples_per_second": 432330.1379419625,
            "samples_per_second_per_gpu": 54041.267242745314,
            "loss_sequences_lower_95": 5.0464686523437505,
            "loss_sequences_upper_95": 5.279897473144532,
            "loss_tokens_lower_95": 4.7560842657126114,
            "loss_tokens_upper_95": 4.974266860619683,
            "sequences": 1000,
            "tokens": 1293,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_dyck_languages/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9933020265206047,
            "data_time": 0.09465543925762177,
            "batch_time": 0.3602603077888489,
            "samples_per_second": 376103.9232409013,
            "samples_per_second_per_gpu": 47012.990405112665,
            "loss_sequences_lower_95": 1.9190259452488112,
            "loss_sequences_upper_95": 2.0675313501772674,
            "loss_tokens_lower_95": 1.9188880887238877,
            "loss_tokens_upper_95": 2.067664098324983,
            "sequences": 230,
            "tokens": 230,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_ar/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.887892918153242,
            "data_time": 0.018529169938781044,
            "batch_time": 0.2903666699474508,
            "samples_per_second": 433449.96817941184,
            "samples_per_second_per_gpu": 54181.24602242648,
            "loss_sequences_lower_95": 5.814670067989465,
            "loss_sequences_upper_95": 5.9591318581321016,
            "loss_tokens_lower_95": 5.817061490145597,
            "loss_tokens_upper_95": 5.95895391290838,
            "sequences": 1320,
            "tokens": 1320,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_cs_algorithms/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0128432135979335,
            "data_time": 0.017869889736175537,
            "batch_time": 0.30114366114139557,
            "samples_per_second": 437392.789865264,
            "samples_per_second_per_gpu": 54674.098733158,
            "loss_sequences_lower_95": 1.0938126505533854,
            "loss_sequences_upper_95": 1.1500021301269532,
            "loss_tokens_lower_95": 0.9679037415747549,
            "loss_tokens_upper_95": 1.0342352722338937,
            "sequences": 1500,
            "tokens": 12495,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_logical_deduction/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.499770716258458,
            "data_time": 0.09406054764986038,
            "batch_time": 0.3366844207048416,
            "samples_per_second": 373905.09431916766,
            "samples_per_second_per_gpu": 46738.13678989596,
            "loss_sequences_lower_95": 5.138102286202567,
            "loss_sequences_upper_95": 5.875414908272879,
            "loss_tokens_lower_95": 5.143106326148624,
            "loss_tokens_upper_95": 5.863666120256696,
            "sequences": 210,
            "tokens": 210,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_operators/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.6479255482554436,
            "data_time": 0.15162724256515503,
            "batch_time": 0.24016328155994415,
            "samples_per_second": 227679.58774481082,
            "samples_per_second_per_gpu": 28459.948468101353,
            "loss_sequences_lower_95": 1.5043102622032165,
            "loss_sequences_upper_95": 2.1647587060928344,
            "loss_tokens_lower_95": 1.24162900944346,
            "loss_tokens_upper_95": 1.6448741731938628,
            "sequences": 32,
            "tokens": 485,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_repeat_copy_logic/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 7.200643270015717,
            "data_time": 0.025459017604589462,
            "batch_time": 0.30717430636286736,
            "samples_per_second": 434539.8346421161,
            "samples_per_second_per_gpu": 54317.47933026451,
            "loss_sequences_lower_95": 7.187488305664063,
            "loss_sequences_upper_95": 7.554352954101563,
            "loss_tokens_lower_95": 7.0133735153275705,
            "loss_tokens_upper_95": 7.33284028080716,
            "sequences": 1000,
            "tokens": 1182,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_nospaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 5.92965897846222,
            "data_time": 0.025230538100004196,
            "batch_time": 0.30711819790303707,
            "samples_per_second": 434393.2109139602,
            "samples_per_second_per_gpu": 54299.15136424502,
            "loss_sequences_lower_95": 6.088707934570313,
            "loss_sequences_upper_95": 6.285430737304687,
            "loss_tokens_lower_95": 5.828928610200457,
            "loss_tokens_upper_95": 5.983017543893966,
            "sequences": 1000,
            "tokens": 1997,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/simple_arithmetic_withspaces/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.1799768646251745,
            "data_time": 0.010041574637095133,
            "batch_time": 0.2903919133047263,
            "samples_per_second": 443374.57439779426,
            "samples_per_second_per_gpu": 55421.82179972428,
            "loss_sequences_lower_95": 3.150274635696028,
            "loss_sequences_upper_95": 3.2097446156475233,
            "loss_tokens_lower_95": 3.151183758747486,
            "loss_tokens_upper_95": 3.209861660738979,
            "sequences": 2983,
            "tokens": 2983,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/math_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.0788964307802615,
            "data_time": 0.033597710368397475,
            "batch_time": 0.2932763728466663,
            "samples_per_second": 412716.877879147,
            "samples_per_second_per_gpu": 51589.609734893376,
            "loss_sequences_lower_95": 2.03062018000402,
            "loss_sequences_upper_95": 2.12682620750228,
            "loss_tokens_lower_95": 2.030957757859003,
            "loss_tokens_upper_95": 2.1283380042572726,
            "sequences": 651,
            "tokens": 651,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/logi_qa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.540477602481842,
            "data_time": 0.024213362485170364,
            "batch_time": 0.3072610832750797,
            "samples_per_second": 432991.68791561254,
            "samples_per_second_per_gpu": 54123.96098945157,
            "loss_sequences_lower_95": 3.4473049255371095,
            "loss_sequences_upper_95": 3.636313732910156,
            "loss_tokens_lower_95": 3.447704010009766,
            "loss_tokens_upper_95": 3.6387091247558594,
            "sequences": 1000,
            "tokens": 1000,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/pubmed_qa_labeled/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.088405030779717,
            "data_time": 0.004269519663718809,
            "batch_time": 0.2919848272240306,
            "samples_per_second": 448283.4892173483,
            "samples_per_second_per_gpu": 56035.436152168535,
            "loss_sequences_lower_95": 2.914972407720849,
            "loss_sequences_upper_95": 3.005301865908822,
            "loss_tokens_lower_95": 1.9250848842471633,
            "loss_tokens_upper_95": 1.9853193117348462,
            "sequences": 10570,
            "tokens": 46886,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/squad/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.9045622793596182,
            "data_time": 0.06740863214839589,
            "batch_time": 0.2917390682480552,
            "samples_per_second": 367430.2581615104,
            "samples_per_second_per_gpu": 45928.7822701888,
            "loss_sequences_lower_95": 1.8424522314498673,
            "loss_sequences_upper_95": 1.963989810089567,
            "loss_tokens_lower_95": 1.844161731093677,
            "loss_tokens_upper_95": 1.9664038757779705,
            "sequences": 268,
            "tokens": 268,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_rc/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.887404837327845,
            "data_time": 0.04691661149263382,
            "batch_time": 0.33456091582775116,
            "samples_per_second": 419285.5157401414,
            "samples_per_second_per_gpu": 52410.68946751767,
            "loss_sequences_lower_95": 1.8414060854444316,
            "loss_sequences_upper_95": 1.9328453004126456,
            "loss_tokens_lower_95": 1.841679537904029,
            "loss_tokens_upper_95": 1.933180643717448,
            "sequences": 510,
            "tokens": 510,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_lsat_lr/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.7577554787495493,
            "data_time": 0.00505153669251336,
            "batch_time": 0.2920327413649786,
            "samples_per_second": 446320.8221247052,
            "samples_per_second_per_gpu": 55790.10276558815,
            "loss_sequences_lower_95": 2.14289603077438,
            "loss_sequences_upper_95": 2.2189653991294,
            "loss_tokens_lower_95": 1.6525120895122265,
            "loss_tokens_upper_95": 1.7084290980610952,
            "sequences": 7983,
            "tokens": 27277,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/coqa/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.106165820959384,
            "data_time": 0.09706329554319382,
            "batch_time": 0.31710168719291687,
            "samples_per_second": 365432.13026987796,
            "samples_per_second_per_gpu": 45679.016283734745,
            "loss_sequences_lower_95": 2.0442327065442605,
            "loss_sequences_upper_95": 2.16780839142976,
            "loss_tokens_lower_95": 2.043892439584883,
            "loss_tokens_upper_95": 2.168495840244192,
            "sequences": 189,
            "tokens": 189,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bigbench_understanding_fables/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.290389523119737,
            "data_time": 0.009470886909044705,
            "batch_time": 0.29375042823644787,
            "samples_per_second": 444140.7696753526,
            "samples_per_second_per_gpu": 55517.596209419076,
            "loss_sequences_lower_95": 2.262967518097764,
            "loss_sequences_upper_95": 2.3179228000465883,
            "loss_tokens_lower_95": 2.263380825031059,
            "loss_tokens_upper_95": 2.317362493579176,
            "sequences": 3270,
            "tokens": 3270,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/boolq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.8853646499439352,
            "data_time": 0.09187959134578705,
            "batch_time": 0.33163121342658997,
            "samples_per_second": 371427.2722111224,
            "samples_per_second_per_gpu": 46428.4090263903,
            "loss_sequences_lower_95": 1.8135874998222277,
            "loss_sequences_upper_95": 1.9584985640442487,
            "loss_tokens_lower_95": 1.812786276363632,
            "loss_tokens_upper_95": 1.9585958610460596,
            "sequences": 206,
            "tokens": 206,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/agi_eval_sat_en/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.308053473631541,
            "data_time": 0.16137158870697021,
            "batch_time": 0.3109530806541443,
            "samples_per_second": 270819.36016852607,
            "samples_per_second_per_gpu": 33852.42002106576,
            "loss_sequences_lower_95": 1.2173461755116781,
            "loss_sequences_upper_95": 1.546434825261434,
            "loss_tokens_lower_95": 1.0971911271413168,
            "loss_tokens_upper_95": 1.4794072813457912,
            "sequences": 60,
            "tokens": 72,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_female/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.4180999457836152,
            "data_time": 0.16209641098976135,
            "batch_time": 0.311676561832428,
            "samples_per_second": 270302.2465735698,
            "samples_per_second_per_gpu": 33787.78082169622,
            "loss_sequences_lower_95": 1.377202704747518,
            "loss_sequences_upper_95": 1.7779571310679119,
            "loss_tokens_lower_95": 1.1301424455106928,
            "loss_tokens_upper_95": 1.6572280969512596,
            "sequences": 60,
            "tokens": 89,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/winogender_mc_male/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.67316758193745,
            "data_time": 0.009281506692921673,
            "batch_time": 0.2931769319154598,
            "samples_per_second": 444759.0626625629,
            "samples_per_second_per_gpu": 55594.88283282036,
            "loss_sequences_lower_95": 3.647151313972754,
            "loss_sequences_upper_95": 3.6986934845015647,
            "loss_tokens_lower_95": 3.646925000287647,
            "loss_tokens_upper_95": 3.699078579482695,
            "sequences": 3395,
            "tokens": 3395,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/enterprise_pii_classification/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.3125028383486869,
            "data_time": 0.002539609369355166,
            "batch_time": 0.29239382159527333,
            "samples_per_second": 448592.3086901332,
            "samples_per_second_per_gpu": 56074.03858626665,
            "loss_sequences_lower_95": 0.40114781405069283,
            "loss_sequences_upper_95": 0.41116501759188223,
            "loss_tokens_lower_95": 0.29948609058930403,
            "loss_tokens_upper_95": 0.30519979038286416,
            "sequences": 58492,
            "tokens": 141385,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/bbq/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 1.0042401012473219,
            "data_time": 0.18678927421569824,
            "batch_time": 0.4725438505411148,
            "samples_per_second": 325482.04379024054,
            "samples_per_second_per_gpu": 40685.25547378007,
            "loss_sequences_lower_95": 0.9801664757916307,
            "loss_sequences_upper_95": 1.1550525334876354,
            "loss_tokens_lower_95": 0.9320603312804171,
            "loss_tokens_upper_95": 1.0261935993776483,
            "sequences": 127,
            "tokens": 4071,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_complex/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 3.4098434963741817,
            "data_time": 0.14815501868724823,
            "batch_time": 0.24801155924797058,
            "samples_per_second": 238261.0758476681,
            "samples_per_second_per_gpu": 29782.634480958513,
            "loss_sequences_lower_95": 3.0078222171680347,
            "loss_sequences_upper_95": 3.813383989076357,
            "loss_tokens_lower_95": 2.8043245574574414,
            "loss_tokens_upper_95": 3.8935314131371763,
            "sequences": 37,
            "tokens": 162,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval_return_simple/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.8804924226388698,
            "data_time": 0.09485907107591629,
            "batch_time": 0.286581851541996,
            "samples_per_second": 351668.14718989795,
            "samples_per_second_per_gpu": 43958.518398737244,
            "loss_sequences_lower_95": 0.8748326720260993,
            "loss_sequences_upper_95": 1.0213606927453018,
            "loss_tokens_lower_95": 0.8251899882221141,
            "loss_tokens_upper_95": 0.9003851200293052,
            "sequences": 164,
            "tokens": 5945,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.5/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.923068601183775,
            "data_time": 0.09717762470245361,
            "batch_time": 0.28897757828235626,
            "samples_per_second": 351238.0548129139,
            "samples_per_second_per_gpu": 43904.756851614235,
            "loss_sequences_lower_95": 0.9427098972041432,
            "loss_sequences_upper_95": 1.0758403196567443,
            "loss_tokens_lower_95": 0.8628361450209628,
            "loss_tokens_upper_95": 0.9280927638173229,
            "sequences": 164,
            "tokens": 8527,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.25/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.8922452254266273,
            "data_time": 0.0918767899274826,
            "batch_time": 0.2837551608681679,
            "samples_per_second": 352434.6370938891,
            "samples_per_second_per_gpu": 44054.32963673614,
            "loss_sequences_lower_95": 0.8143672873334187,
            "loss_sequences_upper_95": 0.9683012450613626,
            "loss_tokens_lower_95": 0.8460189020859915,
            "loss_tokens_upper_95": 0.9456712193020189,
            "sequences": 164,
            "tokens": 3478,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval-0.75/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.9867455548629528,
            "data_time": 0.09227565675973892,
            "batch_time": 0.2840319722890854,
            "samples_per_second": 352583.02336559823,
            "samples_per_second_per_gpu": 44072.87792069978,
            "loss_sequences_lower_95": 0.9814036299542682,
            "loss_sequences_upper_95": 1.1025744973159417,
            "loss_tokens_lower_95": 0.9249324382651261,
            "loss_tokens_upper_95": 0.9879567559263045,
            "sequences": 164,
            "tokens": 10272,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/human_eval/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.8920376582175308,
            "data_time": 0.0933821052312851,
            "batch_time": 0.28182952105998993,
            "samples_per_second": 349953.0216403972,
            "samples_per_second_per_gpu": 43744.12770504965,
            "loss_sequences_lower_95": 0.8736177716936384,
            "loss_sequences_upper_95": 0.9582891570855372,
            "loss_tokens_lower_95": 0.8529855792081018,
            "loss_tokens_upper_95": 0.9011048138047492,
            "sequences": 161,
            "tokens": 17095,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_cpp/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 0.8449624680527826,
            "data_time": 0.08867323398590088,
            "batch_time": 0.2805200591683388,
            "samples_per_second": 353836.76643751044,
            "samples_per_second_per_gpu": 44229.595804688804,
            "loss_sequences_lower_95": 0.8750270890026558,
            "loss_sequences_upper_95": 0.967089443671994,
            "loss_tokens_lower_95": 0.786276138793136,
            "loss_tokens_upper_95": 0.8299789559316606,
            "sequences": 164,
            "tokens": 16590,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/checkpoints/epoch_6.pt",
            "val_data": [
                "/admin/<scrub>/scrub/training/eval_data/processed_human_eval_js/shard-0000000.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/rpj-open_lm_1b-1.0/params.txt",
    "uuid": "affe7ca4-aee5-4b58-bdeb-01234f3722ad",
    "creation_date": "2023_12_13-16_58_57"
}