environment:
    type: "MysteryPath-Grid"
    name: "MysteryPath-Grid-v0"
    reset_params:
        start-seed: 0
        num-seeds: 100000
        agent_scale: 0.25
        cardinal_origin_choice: [0, 1, 2, 3]
        show_origin: False
        show_goal: False
        visual_feedback: True
        reward_goal: 1.0
        reward_fall_off: 0.0
        reward_path_progress: 0.0
gamma: 0.995
lamda: 0.95
updates: 5000
epochs: 3
n_workers: 32
worker_steps: 512
n_mini_batch: 8
value_loss_coefficient: 0.5
hidden_layer_size: 256
max_grad_norm: 0.25
transformer:
    num_blocks: 2
    embed_dim: 256
    num_heads: 4
    memory_length: 96
    positional_encoding: ""
    layer_norm: "pre"
    gtrxl: False
    gtrxl_bias: 0.0
learning_rate_schedule:
    initial: 2.75e-4
    final: 1.0e-5
    power: 1.0
    max_decay_steps: 10000
beta_schedule:
    initial: 0.001
    final: 0.000001
    power: 1.0
    max_decay_steps: 10000
clip_range_schedule:
    initial: 0.1
    final: 0.1
    power: 1.0
    max_decay_steps: 10000