{
    "name": "rw_original-open_lm_1b-16.0",
    "dataset_name": "rw_original",
    "dataset_uuid": "7e0f5507-aa36-4d8c-9026-d049f885adf7",
    "hyperparameters": {
        "model": "open_lm_1b",
        "tokens": 460734464000,
        "warmup": 5000,
        "lr": 0.003,
        "wd": 0.033,
        "cd": 3e-05,
        "global_bs": 256,
        "acc": 1,
        "qk_norm": true,
        "z_loss": 0.0001,
        "grad_checkpointing": false,
        "params": 1439795200,
        "params_no_embed": 1336510464,
        "fsdp_flags": [
            "--fsdp",
            "--fsdp-amp",
            "--fsdp-limit-all-gathers"
        ],
        "chinchilla_multiplier": 16.0,
        "seed": 124
    },
    "checkpoint_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
    "open_lm_version": "0.0.21",
    "open_lm_args": [
        "--workers",
        "2",
        "--precision",
        "amp_bfloat16",
        "--global-batch-size",
        "256",
        "--log-every-n-steps",
        "20",
        "--grad-clip-norm",
        "1",
        "--lr",
        "0.003",
        "--warmup",
        "5000",
        "--model",
        "open_lm_1b",
        "--wd",
        "0.033",
        "--beta2",
        "0.95",
        "--epochs",
        "5",
        "--resume",
        "latest",
        "--seed",
        "124",
        "--accum-freq",
        "1",
        "--model-norm",
        "gain_only_lp_layer_norm",
        "--delete-previous-checkpoint",
        "--lr-cooldown-end",
        "3e-05",
        "--logs",
        "./logs/1765",
        "--train-num-samples",
        "92146892800",
        "--dataset-manifest",
        "<scrub>/openlm/scrub/datasets/refined_web_tokenized/manifest.jsonl",
        "--data-key",
        "json.gz",
        "--name",
        "rw_original-open_lm_1b-16.0",
        "--fsdp",
        "--fsdp-amp",
        "--fsdp-limit-all-gathers",
        "--val-data",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/open_lm_val/shard_00000000.tar",
        "/<scrub>/ubuntu/research/openlm/scrub/training/eval_data/c4_val/shard-0000000.tar",
        "--val-frequency",
        "5",
        "--val-data-key",
        "json",
        "txt",
        "--val-tok-ci",
        "--val-seq-ci",
        "--val-max-pop-ci",
        "300000",
        "--qk-norm",
        "--z-loss",
        "0.0001",
        "--remote-sync",
        "<scrub>/openlm/scrub/experiments/1b_16x_rpj-original/"
    ],
    "results": [
        {
            "loss": 2.324344128370285,
            "data_time": 0.18080567568540573,
            "batch_time": 1.4381960108876228,
            "samples_per_second": 364155.9124995716,
            "samples_per_second_per_gpu": 45519.48906244645,
            "loss_sequences_lower_95": 2.274986654520035,
            "loss_sequences_upper_95": 2.3733268558979033,
            "loss_tokens_lower_95": 2.31432978125,
            "loss_tokens_upper_95": 2.3343806614583333,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_000.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.3105469197034836,
            "data_time": 0.15725654363632202,
            "batch_time": 0.7148265242576599,
            "samples_per_second": 406391.2785423117,
            "samples_per_second_per_gpu": 50798.90981778896,
            "loss_sequences_lower_95": 2.255956107378006,
            "loss_sequences_upper_95": 2.363364887237549,
            "loss_tokens_lower_95": 2.300286796875,
            "loss_tokens_upper_95": 2.3206971770833333,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_010.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.400407448410988,
            "data_time": 0.1579040363430977,
            "batch_time": 0.7147950828075409,
            "samples_per_second": 406740.23052018107,
            "samples_per_second_per_gpu": 50842.52881502263,
            "loss_sequences_lower_95": 2.3522773146629334,
            "loss_sequences_upper_95": 2.4492549180984495,
            "loss_tokens_lower_95": 2.390152796875,
            "loss_tokens_upper_95": 2.4104546406250003,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_020.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.4107843190431595,
            "data_time": 0.16022057831287384,
            "batch_time": 0.7170708924531937,
            "samples_per_second": 406490.9215882705,
            "samples_per_second_per_gpu": 50811.365198533815,
            "loss_sequences_lower_95": 2.3588970422744753,
            "loss_sequences_upper_95": 2.460795158147812,
            "loss_tokens_lower_95": 2.400778192708333,
            "loss_tokens_upper_95": 2.4210432500000003,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_030.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.432395860552788,
            "data_time": 0.16005288064479828,
            "batch_time": 0.716881513595581,
            "samples_per_second": 406476.3414737478,
            "samples_per_second_per_gpu": 50809.542684218475,
            "loss_sequences_lower_95": 2.381284350156784,
            "loss_sequences_upper_95": 2.4823881149291993,
            "loss_tokens_lower_95": 2.4222147031250003,
            "loss_tokens_upper_95": 2.44283365625,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_040.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.485835514962673,
            "data_time": 0.15774134546518326,
            "batch_time": 0.7141750678420067,
            "samples_per_second": 407225.4197962809,
            "samples_per_second_per_gpu": 50903.177474535114,
            "loss_sequences_lower_95": 2.4390162229537964,
            "loss_sequences_upper_95": 2.5327403426170347,
            "loss_tokens_lower_95": 2.4757410052083335,
            "loss_tokens_upper_95": 2.4961710624999998,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_050.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5458331927657127,
            "data_time": 0.15664316713809967,
            "batch_time": 0.7135217636823654,
            "samples_per_second": 407082.76663740684,
            "samples_per_second_per_gpu": 50885.345829675854,
            "loss_sequences_lower_95": 2.4965257585048675,
            "loss_sequences_upper_95": 2.593232923746109,
            "loss_tokens_lower_95": 2.5353701197916667,
            "loss_tokens_upper_95": 2.5559591875,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_060.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.5835645720362663,
            "data_time": 0.1624009981751442,
            "batch_time": 0.7195647358894348,
            "samples_per_second": 405608.2591441707,
            "samples_per_second_per_gpu": 50701.032393021334,
            "loss_sequences_lower_95": 2.5351644277572634,
            "loss_sequences_upper_95": 2.63284826874733,
            "loss_tokens_lower_95": 2.5734308854166668,
            "loss_tokens_upper_95": 2.594019020833333,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_070.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6034414023160934,
            "data_time": 0.16722026467323303,
            "batch_time": 0.7244242206215858,
            "samples_per_second": 404831.86056885624,
            "samples_per_second_per_gpu": 50603.98257110703,
            "loss_sequences_lower_95": 2.5610094904899596,
            "loss_sequences_upper_95": 2.6455617964267732,
            "loss_tokens_lower_95": 2.5929959322916667,
            "loss_tokens_upper_95": 2.613821578125,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_080.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.6136318743228912,
            "data_time": 0.16552801430225372,
            "batch_time": 0.7221404388546944,
            "samples_per_second": 405752.53973520687,
            "samples_per_second_per_gpu": 50719.06746690086,
            "loss_sequences_lower_95": 2.5721293091773987,
            "loss_sequences_upper_95": 2.653644698858261,
            "loss_tokens_lower_95": 2.6032690416666666,
            "loss_tokens_upper_95": 2.6240020416666665,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_090.tar"
            ],
            "model": "open_lm_1b"
        },
        {
            "loss": 2.7552285566926003,
            "data_time": 0.155470073223114,
            "batch_time": 0.7119228541851044,
            "samples_per_second": 407546.59411879466,
            "samples_per_second_per_gpu": 50943.32426484933,
            "loss_sequences_lower_95": 2.671890467405319,
            "loss_sequences_upper_95": 2.864807790517807,
            "loss_tokens_lower_95": 2.7439778854166668,
            "loss_tokens_upper_95": 2.7666404322916667,
            "sequences": 512,
            "tokens": 1048576,
            "checkpoint_path": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/checkpoints/epoch_6.pt",
            "val_data": [
                "training/eval_data/val_tok_mult/de-en/val_de-en_100.tar"
            ],
            "model": "open_lm_1b"
        }
    ],
    "params_url": "<scrub>/scrub_experiments_v3/rw_original-open_lm_1b-16.0/params.txt",
    "uuid": "3c5b74df-39a3-47d0-87af-15d5deda1f90",
    "creation_date": "2024_01_29-13_20_48"
}