# @package _global_

defaults:
  - override /env: gridworld/doors
  - override /agent: gciql

train_steps: 100_000

# For debug
log_interval: 10_000
eval_interval: 20_000
tags: [gciql, discrete]

agent:
  lr: 3e-4
  batch_size: 1024

  # IQL
  const_std: True
  actor_hidden_dims: [512, 512, 512]
  value_hidden_dims: [512, 512, 512]
  layer_norm: True
  actor_loss: awr
  alpha: 0.003
  discrete: True
  use_film: False
  number_of_meta_envs: 1 # Only for debug on different dynamics
  # Transformer
  use_context: False

  # MISC
  discount: 0.99
  tau: 0.01  # Target network update rate.
  expectile: 0.9
