{
  "gpu_0/results_2025-12-20_04-45-58": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-20_04-45-58",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-20_04-45-52/gpu_0/results_2025-12-20_04-45-58",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 0,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "baseline",
        "ft_forget": false,
        "log_loss": true
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 15
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 30
      },
      {
        "name": "filtering",
        "ft_forget": true
      },
      {
        "name": "coreftaux",
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5
      },
      {
        "name": "routed",
        "arch": "lora",
        "ordered": true,
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5,
        "lora_attn": true,
        "lora_mlp": true,
        "lora_rank": 32
      },
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc"
      }
    ],
    "do_cleanup_distributed": true,
    "process_id": 0
  },
  "gpu_0/results_2025-12-20_08-34-31": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-20_08-34-31",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-20_04-45-52/gpu_0/results_2025-12-20_08-34-31",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 1,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "baseline",
        "ft_forget": false,
        "log_loss": true
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 15
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 30
      },
      {
        "name": "filtering",
        "ft_forget": true
      },
      {
        "name": "coreftaux",
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5
      },
      {
        "name": "routed",
        "arch": "lora",
        "ordered": true,
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5,
        "lora_attn": true,
        "lora_mlp": true,
        "lora_rank": 32
      },
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc"
      }
    ],
    "do_cleanup_distributed": true,
    "process_id": 0
  },
  "gpu_0/results_2025-12-20_12-22-28": {
    "data_dirs": [
      "/workspace/gradient-routing/experiments/ICML-Codebase/src/data/stories"
    ],
    "timestamp": "2025-12-20_12-22-28",
    "log_level": "INFO",
    "res_dir": "/workspace/gradient-routing/experiments/ICML-Codebase/src/results/stories/01/combined_2025-12-20_04-45-52/gpu_0/results_2025-12-20_12-22-28",
    "aux_batch_limit": null,
    "core_batch_limit": null,
    "seed": 2,
    "batch_size": 128,
    "epochs": 1,
    "lr": 0.005,
    "lr_schedule": true,
    "arbsub": false,
    "test_ood": false,
    "do_compile": true,
    "do_save_model": false,
    "aux_labels": [
      "a-deadline-or-time-limit",
      "alien-encounters",
      "bygone-eras",
      "cultural-traditions"
    ],
    "ctx_len": 256,
    "num_layers": 8,
    "embed_dim": 512,
    "mlp_dim": 2048,
    "gen_samples": false,
    "stages": [
      {
        "name": "baseline",
        "ft_forget": false,
        "log_loss": true
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 15
      },
      {
        "name": "maxent",
        "ft_forget": true,
        "me_alpha_retain": 30
      },
      {
        "name": "filtering",
        "ft_forget": true
      },
      {
        "name": "coreftaux",
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5
      },
      {
        "name": "routed",
        "arch": "lora",
        "ordered": true,
        "ft_forget": true,
        "alpha": 1.0,
        "beta": 0.5,
        "lora_attn": true,
        "lora_mlp": true,
        "lora_rank": 32
      },
      {
        "name": "routed",
        "arch": "demix",
        "ft_forget": true,
        "expert_dist": "equal"
      },
      {
        "name": "routed",
        "arch": "moe",
        "ordered": false,
        "ft_forget": true,
        "aux_route_prc": 0.75,
        "robust_prc": 0.5,
        "expert_dist": "prc"
      }
    ],
    "do_cleanup_distributed": true,
    "process_id": 0
  }
}