# @package _global_

defaults:
  - override /env: gridworld/doors-dynamics
  - override /agent: dynamics_fb

train_steps: 500_000

log_interval: 5_000
eval_interval: 20_000

tags: [dynamics-fb, doors, discrete]

agent:
  lr: 3e-4
  batch_size: 512

  # FB Specific
  z_dim: 100
  z_mix_ratio: 0.5
  boltzmann: True

  # F Network
  f_hidden_dims: [1024, 512, 512]  # Value network hidden dimensions.
  f_layer_norm: True  # Whether to use layer normalization.

  # B Network
  b_hidden_dims: [512, 512, 512]  # Value network hidden dimensions.
  b_layer_norm: True  # Whether to use layer normalization.

  # Whether Dynamics-Adaptive
  number_of_meta_envs: 3
  use_context: True
  dyn_encoder_warmup_steps: 20000
  n_blocks: 4
  n_heads: 4
  emb_dim: 72
  mlp_dim: 256
  output_dim: 4
  kappa: 10
  context_len: 100
  world_pred_hidden: [128, 128, 128]
  use_masked_attention: False
  use_mean_embedding: True
  cosine_schedule: False

  # MISC
  discount: 0.99
  tau: 0.01  # Target network update rate.
  
