environment:
  type: "MemoryGym"
  name: "SearingSpotlights-v0"
  frame_skip: 2
  last_action_to_obs: False
  last_reward_to_obs: False
  obs_stacks: 1
  grayscale: False
  resize_vis_obs: [84, 84]
  reset_params:
    start-seed: 0
    num-seeds: 100000
    max_steps: 512
    initial_spawns: 4
    num_spawns: 30
    initial_spawn_interval: 30
    spawn_interval_threshold: 10
    spawn_interval_decay: 0.95
    spot_min_radius: 7.5
    spot_max_radius: 13.75
    spot_min_speed: 0.0025
    spot_max_speed: 0.0075
    spot_damage: 1.0
    visual_feedback: True
    black_background: False
    hide_chessboard: False
    light_dim_off_duration: 6
    light_threshold: 255
    num_coins: [1]
    coin_scale: 0.375
    coins_visible: False
    use_exit: True
    exit_visible: False
    exit_scale: 0.5
    agent_speed: 2.5
    agent_health: 100.0
    agent_scale: 0.25
    sample_agent_position: True
    reward_inside_spotlight: 0.0
    reward_outside_spotlight: 0.0
    reward_death: 0.0
    reward_exit: 1.0
    reward_max_steps: 0.0
    reward_coin: 0.25

model:
  load_model: False
  model_path: ""
  checkpoint_interval: 1000
  activation: "relu"
  vis_encoder: "cnn"
  vec_encoder: "linear"
  num_vec_encoder_units: 128
  hidden_layer: "default"
  num_hidden_layers: 1
  num_hidden_units: 512
  recurrence:
    layer_type: "gru"
    sequence_length: 128
    hidden_state_size: 512
    hidden_state_init: "zero"
    reset_hidden_state: True
    residual: False

evaluation:
  evaluate: False
  n_workers: 5
  seeds: [200000, 200001, 200002, 200003, 200004, 200005, 200006, 200007, 200008, 200009]
  interval: 200

sampler:
  type: "TrajectorySampler"
  n_workers: 32
  worker_steps: 512

trainer:
  algorithm: "PPO"
  resume_at: 0
  gamma: 0.99
  lamda: 0.95
  updates: 10000
  epochs: 3
  refresh_buffer_epoch: -1
  n_mini_batches: 8
  value_coefficient: 0.25
  max_grad_norm: 0.5
  share_parameters: True
  learning_rate_schedule:
    initial: 3.0e-4
    final: 1.0e-4
    power: 1.0
    max_decay_steps: 10000
  beta_schedule:
    initial: 0.0001
    final: 0.00001
    power: 1.0
    max_decay_steps: 10000
  clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 10000