device: auto
env:
  grid_size: 8
  max_steps: 80
  obstacle_rate:
  - 0.1
  - 0.15
  ood:
    obstacle_rate:
    - 0.25
    - 0.35
    patterns:
    - corridor
    - rooms
    sensor_flip: 0.1
  reward_collision: -5.0
  reward_step: -0.5
  reward_success: 50.0
  reward_token_cost: -0.05
evaluation:
  calibration_bins: 10
  ccm_lambda: 0.5
  eval_frequency: 2
  num_episodes: 20
  ood_variants:
  - high_obstacles
  - sensor_noise
  - corridors
  - rooms
  perturbation_kl_target: 0.02
  perturbation_tolerance: 0.005
experiment: maptalk_one_way_baseline
logging:
  checkpoint_interval: 2
  entity: null
  eval_interval: 2
  log_interval: 1
  project: bica-baselines
  save_best: true
  tags:
  - maptalk
  - single_directional
  - one_way
  - baseline
  - rlhf_style
  use_wandb: true
mixed_precision: false
model:
  ai_latent_dim: 256
  ai_message_embed_dim: 32
  ai_obs_dim: 22
  ai_vocab_size: 64
  code_dim: 16
  embed_dim: 128
  gru_hidden: 128
  hidden_dim: 256
  human_gru_hidden: 128
  human_latent_dim: 256
  human_message_embed_dim: 32
  human_obs_dim: 192
  human_vocab_size: 32
  instructor_embed_dim: 16
  message_embed_dim: 32
  policy_hidden_dim: 256
  protocol_hidden_dim: 128
  value_hidden_dim: 256
output_dir: results
protocol:
  enable_mutual_adaptation: false
  enable_protocol_learning: false
  gumbel_tau_end: 0.3
  gumbel_tau_start: 1.0
  tau_decay: 0.95
regularizers:
  beta_ib: 0.0
  ib_weight: 0.0
  kappa_teach: 0.0
  lambda_A: 0.05
  lambda_H: 0.0
  mu_rep: 0.0
  perturbation_kl_target: 0.02
  perturbation_tolerance: 0.005
save_models: true
save_trajectories: false
seeds:
- 42
- 123
- 456
single_directional:
  ai_adapts_to_human: true
  disable_instructor: true
  disable_protocol_learning: true
  disable_rep_mapper: true
  fixed_human_policy: true
  human_adapts_to_ai: false
  human_uses_simple_heuristics: true
  unidirectional_adaptation: true
train:
  batch_episodes: 32
  entropy_coef: 0.01
  episodes: 16000
  gae_lambda: 0.95
  gamma: 0.99
  gradient_clip: 0.5
  lr: 0.0003
  optimizer: adamw
  ppo_clip: 0.2
  update_frequency: 32
  value_loss_coef: 0.5
  weight_decay: 1.0e-05
