{
  "gpu_0/results_2025-12-31_20-50-08": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-31_20-50-08",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-31_20-50-01/gpu_0/results_2025-12-31_20-50-08",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal_one"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc_one"
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 0
  },
  "gpu_2/results_2025-12-31_20-50-08": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-31_20-50-08",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-31_20-50-01/gpu_2/results_2025-12-31_20-50-08",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal_one"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc_one"
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 2
  },
  "gpu_1/results_2025-12-31_20-50-08": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-31_20-50-08",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-31_20-50-01/gpu_1/results_2025-12-31_20-50-08",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal_one"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc_one"
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 1
  }
}