environment:
    type: "POPGym"
    name: "RepeatPreviousHard"
    seed: 42
    reset_params:
    start-seed: 0
    num-seeds: 100000
    agent_scale: 0.25
    cardinal_origin_choice: [0, 1, 2, 3]
    show_origin: False
    show_goal: False
    visual_feedback: True
    reward_goal: 1.0
    reward_fall_off: 0.0
    reward_path_progress: 0.0
gamma: 0.99
lamda: 0.95
updates: 50000000
save_model_steps: 1000
eval_steps: 200
epochs: 10
n_workers: 256
worker_steps: 256
n_mini_batch: 8
value_loss_coefficient: 0.1
hidden_layer_size: 256
max_grad_norm: 0.5
transformer:
    embed_per_obs_dim: 8
    num_blocks: 4
    embed_dim: 256
    num_heads: 8
    memory_length: 155 # 32
    positional_encoding: "" # options: "" "relative" "learned"
    layer_norm: "pre" # options: "" "pre" "post"
    gtrxl: False
    gtrxl_bias: 0.0
learning_rate_schedule:
    initial: 3.0e-4
    final: 3.0e-4
    power: 1.0
    max_decay_steps: 200
beta_schedule:
    initial: 0.001
    final: 0.0001
    power: 1.0
    max_decay_steps: 200
clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 200