{
    "stages": [
        {
            "name": "routed",
            "arch": "moe",
            "ordered": false,
            "ft_forget": true,
            "expert_dist": "prc_one",
            "aux_route_prc": 0.75,
            "robust_prc": 0.1
        }
    ],
    "run": {
        "seed": 3,
        "data_dirs": [
            "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
        ],
        "arbsub": false,
        "test_ood": false,
        "batch_size": 128,
        "epochs": 1,
        "lr": 0.005,
        "aux_labels": [
            "a-deadline-or-time-limit",
            "alien-encounters",
            "bygone-eras",
            "cultural-traditions"
        ],
        "core_labels": [
            "dinosaurs",
            "dream-worlds",
            "enchanted-forests",
            "fairy-tales",
            "fantasy-worlds",
            "gardens",
            "giant-creatures",
            "haunted-places",
            "hidden-treasures",
            "holidays",
            "invisibility",
            "island-adventures",
            "living-objects",
            "lost-cities",
            "lost-civilizations",
            "magical-lands",
            "magical-objects",
            "miniature-worlds",
            "mysterious-maps",
            "mystical-creatures",
            "outer-space",
            "pirates",
            "riddles",
            "robots-and-technology",
            "royal-kingdoms",
            "school-life",
            "seasonal-changes",
            "secret-societies",
            "shape-shifting",
            "sibling-rivalry",
            "snowy-adventures",
            "space-exploration",
            "sports",
            "subterranean-worlds",
            "superheroes",
            "talking-animals",
            "the-arts",
            "the-sky",
            "time-travel",
            "treasure-hunts",
            "undercover-missions",
            "underwater-adventures",
            "unusual-vehicles",
            "virtual-worlds"
        ],
        "lr_schedule": true,
        "device": "cuda",
        "timestamp": "2026-01-25_23-34-44",
        "num_gpus": 1,
        "do_compile": true,
        "core_batch_num": 15291,
        "aux_batch_num": 1406,
        "accumulation_steps": 1,
        "optimize_routed_training": false,
        "num_baseline_params": 26257920,
        "len_core": 15291,
        "len_aux": 1406
    },
    "model": {
        "ctx_len": 256,
        "vocab_size": 4096,
        "num_layers": 8,
        "target_layers": [
            0,
            1,
            2,
            3,
            4,
            5,
            6,
            7
        ],
        "num_heads": 8,
        "num_key_value": 2,
        "attn_bias": true,
        "eos_token_id": 1,
        "embed_dim": 512,
        "mlp_dim": 2048
    }
}