# @package _global_

defaults:
  - override /env: gridworld/4rooms-dynamics
  - override /agent: dynamics_fb

train_steps: 200_001

log_interval: 10_000
eval_interval: 30_000

tags: [dynamics-fb, 4rooms, discrete]

agent:
  lr: 3e-4
  batch_size: 1024

  # FB Specific
  z_dim: 150
  z_mix_ratio: 0.5
  boltzmann: True

  # F Network
  f_hidden_dims: [1024, 512, 512]  # Value network hidden dimensions.
  f_layer_norm: True  # Whether to use layer normalization.

  # B Network
  b_hidden_dims: [512, 512, 512]  # Value network hidden dimensions.
  b_layer_norm: True  # Whether to use layer normalization.

  # Whether Dynamics-Adaptive
  use_context: True
  number_of_meta_envs: 50
  dyn_encoder_warmup_steps: 80000 ###
  n_blocks: 4
  n_heads: 4
  emb_dim: 96
  mlp_dim: 256
  output_dim: 8
  context_len: 100
  use_masked_attention: False
  use_mean_embedding: True
  cosine_schedule: False
  world_pred_hidden: [256, 256, 256]

  # MISC
  discount: 0.99
  tau: 0.01  # Target network update rate.
  num_eval_episodes: 20
