{
  "gpu_4/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_4/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 1.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 4
  },
  "gpu_2/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_2/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.5,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 2
  },
  "gpu_7/results_2026-01-09_21-20-06": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-06",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_7/results_2026-01-09_21-20-06",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 7
  },
  "gpu_6/results_2026-01-09_21-20-06": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-06",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_6/results_2026-01-09_21-20-06",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.25
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 6
  },
  "gpu_1/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_1/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.25,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 1
  },
  "gpu_3/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_3/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 3
  },
  "gpu_0/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_0/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 0
  },
  "gpu_5/results_2026-01-09_21-20-05": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-20-05",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_5/results_2026-01-09_21-20-05",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 5
  },
  "gpu_4/results_2026-01-09_21-49-01": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-01",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_4/results_2026-01-09_21-49-01",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.75
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 4
  },
  "gpu_2/results_2026-01-09_21-49-26": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-26",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_2/results_2026-01-09_21-49-26",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 1.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 2
  },
  "gpu_1/results_2026-01-09_21-49-46": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-46",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_1/results_2026-01-09_21-49-46",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.5,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 1
  },
  "gpu_3/results_2026-01-09_21-49-49": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-49",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_3/results_2026-01-09_21-49-49",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 3
  },
  "gpu_6/results_2026-01-09_21-49-45": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-45",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_6/results_2026-01-09_21-49-45",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.25,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 6
  },
  "gpu_7/results_2026-01-09_21-49-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-49-44",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_7/results_2026-01-09_21-49-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 7
  },
  "gpu_0/results_2026-01-09_21-50-09": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-50-09",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_0/results_2026-01-09_21-50-09",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 1.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 0
  },
  "gpu_5/results_2026-01-09_21-50-15": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_21-50-15",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_5/results_2026-01-09_21-50-15",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 5
  },
  "gpu_2/results_2026-01-09_22-16-58": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-16-58",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_2/results_2026-01-09_22-16-58",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 2
  },
  "gpu_4/results_2026-01-09_22-16-39": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-16-39",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_4/results_2026-01-09_22-16-39",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.25
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 4
  },
  "gpu_1/results_2026-01-09_22-17-15": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-17-15",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_1/results_2026-01-09_22-17-15",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.75
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 1
  },
  "gpu_3/results_2026-01-09_22-17-28": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-17-28",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_3/results_2026-01-09_22-17-28",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 1.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 3
  },
  "gpu_7/results_2026-01-09_22-17-41": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-17-41",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_7/results_2026-01-09_22-17-41",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.25,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 7
  },
  "gpu_5/results_2026-01-09_22-18-14": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-18-14",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_5/results_2026-01-09_22-18-14",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 5
  },
  "gpu_0/results_2026-01-09_22-17-45": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-17-45",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_0/results_2026-01-09_22-17-45",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.5,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 0
  },
  "gpu_6/results_2026-01-09_22-17-34": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-17-34",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_6/results_2026-01-09_22-17-34",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 6
  },
  "gpu_2/results_2026-01-09_22-44-00": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-44-00",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_2/results_2026-01-09_22-44-00",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 1.0,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 2
  },
  "gpu_3/results_2026-01-09_22-44-58": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-44-58",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_3/results_2026-01-09_22-44-58",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.5
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 3
  },
  "gpu_4/results_2026-01-09_22-44-20": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-44-20",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_4/results_2026-01-09_22-44-20",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 4
  },
  "gpu_1/results_2026-01-09_22-44-58": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-44-58",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_1/results_2026-01-09_22-44-58",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.25
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 1
  },
  "gpu_7/results_2026-01-09_22-45-23": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-45-23",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_7/results_2026-01-09_22-45-23",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.75
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 7
  },
  "gpu_5/results_2026-01-09_22-45-26": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-09_22-45-26",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-09_21-19-57/gpu_5/results_2026-01-09_22-45-26",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 1.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "process_id": 5
  }
}