environment:
  type: "MemoryGym"
  name: "MysteryPath-v0"
  frame_skip: 1
  last_action_to_obs: False
  last_reward_to_obs: False
  obs_stacks: 1
  grayscale: False
  resize_vis_obs: [84, 84]
  reset_params:
    start-seed: 0
    num-seeds: 100000
    agent_scale: 0.25
    agent_speed: 2.5
    cardinal_origin_choice: [0, 1, 2, 3]
    show_origin: True
    show_goal: True
    visual_feedback: True
    reward_goal: 1.0
    reward_fall_off: 0.0
    reward_path_progress: 0.0
    reward_step: -0.001

model:
  load_model: False
  model_path: ""
  checkpoint_interval: 1000
  activation: "relu"
  vis_encoder: "cnn"
  vec_encoder: "linear"
  num_vec_encoder_units: 128
  hidden_layer: "default"
  num_hidden_layers: 1
  num_hidden_units: 512
  recurrence:
    layer_type: "gru"
    sequence_length: 128
    hidden_state_size: 512
    hidden_state_init: "zero"
    reset_hidden_state: True
    residual: False

evaluation:
  evaluate: False
  n_workers: 5
  seeds: [200000, 200001, 200002, 200003, 200004, 200005, 200006, 200007, 200008, 200009]
  interval: 200

sampler:
  type: "TrajectorySampler"
  n_workers: 32
  worker_steps: 512

trainer:
  algorithm: "PPO"
  resume_at: 0
  gamma: 0.99
  lamda: 0.95
  updates: 20000
  epochs: 3
  refresh_buffer_epoch: -1
  n_mini_batches: 8
  value_coefficient: 0.25
  max_grad_norm: 0.5
  share_parameters: True
  learning_rate_schedule:
    initial: 3.0e-4
    final: 1.0e-4
    power: 1.0
    max_decay_steps: 20000
  beta_schedule:
    initial: 0.0001
    final: 0.00001
    power: 1.0
    max_decay_steps: 20000
  clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 20000