{
    "stages": [
        {
            "name": "baseline",
            "ft_forget": false,
            "log_loss": true
        },
        {
            "name": "maxent",
            "ft_forget": true,
            "me_alpha_retain": 100,
            "me_steps": 400,
            "me_lr": 5e-05
        }
    ],
    "run": {
        "seed": 0,
        "data_dirs": [
            "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/fineweb",
            "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/bigcode",
            "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/arxiv"
        ],
        "arbsub": false,
        "test_ood": false,
        "batch_size": 16,
        "epochs": 1,
        "lr": 0.0012,
        "aux_labels": [
            "bigcode",
            "biology",
            "nuclear",
            "cyber"
        ],
        "core_labels": [
            "fineweb"
        ],
        "lr_schedule": true,
        "device": "cuda",
        "timestamp": "2026-01-16_20-32-02",
        "num_gpus": 8,
        "do_compile": true,
        "do_save_model": true,
        "gen_samples": false,
        "core_batch_num": 106775,
        "aux_batch_num": 5339,
        "accumulation_steps": 1,
        "baseline_checkpoint": "src/results/realistic/02/combined_2026-01-11_05-40-00/results_2026-01-11_05-40-00/baseline/baseline_model.pth",
        "len_core": 106775,
        "len_aux": 5340
    },
    "model": {
        "ctx_len": 1024,
        "vocab_size": 50257,
        "num_layers": 22,
        "target_layers": [
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7,
            8,
            9,
            10,
            11,
            12,
            13,
            14,
            15,
            16,
            17,
            18,
            19,
            20,
            21
        ],
        "num_heads": 8,
        "num_key_value": 2,
        "attn_bias": true,
        "eos_token_id": 50256,
        "embed_dim": 1536,
        "mlp_dim": 6144
    }
}