environment:
  type: "MemoryGym"
  name: "MysteryPath-Grid-v0"
  frame_skip: 1
  last_action_to_obs: False
  last_reward_to_obs: False
  obs_stacks: 1
  grayscale: False
  resize_vis_obs: [84, 84]
  reset_params:
    start-seed: 0
    num-seeds: 100000
    agent_scale: 0.25
    cardinal_origin_choice: [0, 1, 2, 3]
    show_origin: True
    show_goal: False
    visual_feedback: True
    reward_goal: 1.0
    reward_fall_off: 0.0
    reward_path_progress: 0.0

model:
  load_model: False
  model_path: ""
  checkpoint_interval: 1000
  activation: "relu"
  vis_encoder: "cnn"
  vec_encoder: "linear"
  num_vec_encoder_units: 128
  hidden_layer: "default"
  num_hidden_layers: 1
  num_hidden_units: 512
  helm:
    memory_length: 128
    beta: 1000

evaluation:
  evaluate: False
  n_workers: 5
  seeds: [2000000, 2000001, 2000002, 2000003, 2000004, 2000005, 2000006, 2000007, 2000008, 2000009]
  interval: 200

sampler:
  type: "TrajectorySampler"
  n_workers: 32
  worker_steps: 512

trainer:
  algorithm: "PPO"
  resume_at: 0
  gamma: 0.99
  lamda: 0.95
  updates: 10000
  epochs: 3
  refresh_buffer_epoch: -1
  n_mini_batches: 8
  value_coefficient: 0.25
  max_grad_norm: 0.5
  share_parameters: True
  learning_rate_schedule:
    initial: 3.0e-4
    final: 1.0e-4
    power: 1.0
    max_decay_steps: 10000
  beta_schedule:
    initial: 0.0001
    final: 0.00001
    power: 1.0
    max_decay_steps: 10000
  clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 10000