# @package _global_

defaults:
  - override /env: gridworld/doors-dynamics
  - override /agent: gciql

train_steps: 1_000_000
# For debug
log_interval: 5_000
eval_interval: 5_000

# Wandb
tags: [dynamics_gciql, discrete]

agent:
  lr: 3e-4
  batch_size: 1024

  # GCIQL
  const_std: True
  actor_hidden_dims: [512, 512, 512]
  value_hidden_dims: [512, 512, 512]
  layer_norm: True
  actor_loss: awr
  alpha: 0.003
  discrete: True
  use_film: False
  
  # Transformer
  use_context: True
  dyn_encoder_warmup_steps: 50000
  n_blocks: 4
  n_heads: 4
  emb_dim: 96
  mlp_dim: 512
  output_dim: 4 # changed
  context_len: 100
  number_of_meta_envs: 5
  use_masked_attention: False
  use_mean_embedding: True
  cosine_schedule: False
  world_pred_hidden: [128, 128, 128]

  # MISC
  discount: 0.99
  tau: 0.01  # Target network update rate.
  expectile: 0.9
