{
  "gpu_2/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_2/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 2
  },
  "gpu_4/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_4/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 4
  },
  "gpu_3/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_3/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 3
  },
  "gpu_6/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_6/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 6
  },
  "gpu_7/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_7/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 7
  },
  "gpu_1/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_1/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 1
  },
  "gpu_5/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_5/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 5
  },
  "gpu_0/results_2026-01-25_22-29-30": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-29-30",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_0/results_2026-01-25_22-29-30",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 0
  },
  "gpu_2/results_2026-01-25_22-50-53": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-50-53",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_2/results_2026-01-25_22-50-53",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 2
  },
  "gpu_1/results_2026-01-25_22-51-04": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-51-04",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_1/results_2026-01-25_22-51-04",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 1
  },
  "gpu_4/results_2026-01-25_22-50-55": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-50-55",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_4/results_2026-01-25_22-50-55",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 4
  },
  "gpu_7/results_2026-01-25_22-51-01": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-51-01",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_7/results_2026-01-25_22-51-01",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 7
  },
  "gpu_5/results_2026-01-25_22-51-12": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-51-12",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_5/results_2026-01-25_22-51-12",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 5
  },
  "gpu_6/results_2026-01-25_22-51-00": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-51-00",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_6/results_2026-01-25_22-51-00",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 6
  },
  "gpu_3/results_2026-01-25_22-50-58": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_22-50-58",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_22-29-22/gpu_3/results_2026-01-25_22-50-58",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.0
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 3
  }
}