environment:
    type: "Minigrid"
    name: MiniGrid-MemoryS9-v0
gamma: 0.995
lamda: 0.95
updates: 100
epochs: 5
n_workers: 16
worker_steps: 512
n_mini_batch: 8
value_loss_coefficient: 0.5
hidden_layer_size: 384
max_grad_norm: 0.5
transformer:
    num_blocks: 3
    embed_dim: 384
    num_heads: 4
    memory_length: 64
    positional_encoding: "relative" # options: "" "relative" "learned"
    layer_norm: "post" # options: "" "pre" "post"
    gtrxl: True
    gtrxl_bias: 0.0
learning_rate_schedule:
    initial: 3.5e-4
    final: 1.0e-4
    power: 1.0
    max_decay_steps: 250
beta_schedule:
    initial: 0.001
    final: 0.001
    power: 1.0
    max_decay_steps: 10000
clip_range_schedule:
    initial: 0.1
    final: 0.1
    power: 1.0
    max_decay_steps: 10000