{
  "gpu_7/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_7/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 4,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 7
  },
  "gpu_6/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_6/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 4,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 6
  },
  "gpu_3/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_3/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 3,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 3
  },
  "gpu_5/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_5/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 4,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 5
  },
  "gpu_4/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_4/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 4,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 4
  },
  "gpu_1/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_1/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 3,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 1
  },
  "gpu_2/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_2/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 3,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 2
  },
  "gpu_0/results_2026-01-25_23-34-44": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-34-44",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_0/results_2026-01-25_23-34-44",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 3,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 0
  },
  "gpu_6/results_2026-01-25_23-52-51": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-52-51",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_6/results_2026-01-25_23-52-51",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 5,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.1
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 6
  },
  "gpu_5/results_2026-01-25_23-52-53": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-52-53",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_5/results_2026-01-25_23-52-53",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 5,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.2
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 5
  },
  "gpu_7/results_2026-01-25_23-52-49": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-52-49",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_7/results_2026-01-25_23-52-49",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 5,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.05
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 7
  },
  "gpu_3/results_2026-01-25_23-52-51": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2026-01-25_23-52-51",
    "log_level": "DEBUG",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/06/combined_2026-01-25_23-34-36/gpu_3/results_2026-01-25_23-52-51",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 5,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "core_labels": null,
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "stages": [
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "expert_dist": "prc_one",
        "aux_route_prc": 0.75,
        "robust_prc": 0.15
      }
    ],
    "do_cleanup_distributed": true,
    "accumulation_steps": 1,
    "optimize_routed_training": false,
    "process_id": 3
  }
}