environment:
    type: "CartPoleMasked"
gamma: 0.99
lamda: 0.95
updates: 300
epochs: 4
n_workers: 16
worker_steps: 256
n_mini_batch: 4
value_loss_coefficient: 0.2
hidden_layer_size: 128
max_grad_norm: 0.5
transformer:
    num_blocks: 4
    embed_dim: 128
    num_heads: 1
    memory_length: 32
    positional_encoding: "" # options: "" "relative" "learned"
    layer_norm: "pre" # options: "" "pre" "post"
    gtrxl: True
    gtrxl_bias: 0.0
learning_rate_schedule:
    initial: 3.0e-4
    final: 3.0e-5
    power: 1.0
    max_decay_steps: 300
beta_schedule:
    initial: 0.001
    final: 0.0001
    power: 1.0
    max_decay_steps: 300
clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 300