ablations:
  code_dims:
  - 8
  - 16
  - 32
  gumbel_temperatures:
  - 0.1
  - 0.3
  - 0.5
  - 1.0
  ib_weights:
  - 0.5
  - 1.0
  - 2.0
  kl_budgets:
  - - 0.01
    - 0.005
  - - 0.05
    - 0.03
  - - 0.1
    - 0.06
baseline:
  no_budget:
    lambda_A: 0.0
    lambda_H: 0.0
    mu_rep: 0.0
  no_ib:
    beta_ib: 0.0
    ib_weight: 0.0
  no_mapper:
    disable_rep_mapper: true
    mu_rep: 0.0
  no_teacher:
    disable_instructor: true
    kappa_teach: 0.0
  one_way:
    disable_instructor: true
    disable_protocol_learning: true
    disable_rep_mapper: true
    unidirectional_adaptation: true
device: auto
env:
  grid_size: 8
  max_steps: 80
  obstacle_rate:
  - 0.1
  - 0.15
  ood:
    obstacle_rate:
    - 0.25
    - 0.35
    patterns:
    - corridor
    - rooms
    sensor_flip: 0.1
  reward_collision: -5.0
  reward_step: -0.5
  reward_success: 50.0
  reward_token_cost: -0.05
evaluation:
  calibration_bins: 10
  ccm_lambda: 0.5
  num_episodes: 100
  ood_variants:
  - high_obstacles
  - sensor_noise
  - corridors
  - rooms
  - stress_test
  perturbation_kl_target: 0.02
  perturbation_tolerance: 0.005
experiment: maptalk_main
logging:
  checkpoint_interval: 100
  entity: null
  log_interval: 10
  project: bica-maptalk
  save_best: true
  tags:
  - maptalk
  - main_experiment
  use_wandb: true
mixed_precision: false
model:
  ai_latent_dim: 256
  ai_message_embed_dim: 32
  ai_obs_dim: 22
  ai_vocab_size: 64
  cca_components: 10
  code_dim: 16
  context_dim: 64
  embed_dim: 128
  gru_hidden_dim: 128
  history_dim: 64
  history_window_size: 10
  human_gru_hidden: 128
  human_latent_dim: 112
  human_obs_dim: 192
  human_vocab_size: 32
  instructor_embed_dim: 16
  instructor_gru_hidden: 64
  instructor_hidden_dim: 128
  mapper_hidden_dim: 64
  message_embed_dim: 32
  num_interventions: 8
  policy_hidden_dim: 256
  protocol_hidden_dim: 128
  protocol_state_dim: 64
  use_gru: true
  value_hidden_dim: 256
protocol:
  gumbel_tau_end: 0.3
  gumbel_tau_start: 1.0
  tau_decay: 0.95
regularizers:
  alpha_lambda: 0.01
  beta_ib: 1.0
  consistency_weight: 0.2
  diversity_weight: 0.1
  ib_weight: 1.0
  kappa_teach: 0.05
  kl_budget_a: 0.05
  kl_budget_h: 0.03
  lambda_A: 0.02
  lambda_H: 0.01
  mdl_weight: 0.5
  mu_rep: 0.1
  num_projections: 50
  sinkhorn_reg: 0.1
  use_sinkhorn: true
seeds:
- 13
- 42
- 15213
- 2025
- 4096
targets:
  ccm:
    initial_range:
    - 0.3
    - 0.5
    stability_threshold: 0.05
  maptalk_id:
    bas_improvement: 0.1
    steps_reduction:
    - 20
    - 35
    tokens_reduction:
    - 40
    - 60
  maptalk_ood:
    collision_reduction: 20
    rc_improvement: true
    success_improvement:
    - 8
    - 15
train:
  batch_episodes: 32
  entropy_coeff: 0.01
  episodes: 16000
  eval_interval: 50
  gae_lambda: 0.95
  gamma: 0.99
  grad_clip: 1.0
  kl_coeff: 0.02
  lr: 0.0003
  lr_decay_on_plateau: true
  optimizer: adamw
  patience: 5
  ppo_clip: 0.2
  ppo_epochs: 5
  value_coeff: 0.5
  weight_decay: 0.0001
