environment:
  type: "Procgen"
  name: "procgen:procgen-bossfight-v0"
  frame_skip: 1
  last_action_to_obs: False
  last_reward_to_obs: False
  obs_stacks: 1
  grayscale: False
  resize_vis_obs: [84, 84]
  spotlight_perturbation: True
  reset_params:
    start-seed: 1
    num-seeds: 100000
    # More detailed information about reset parameters: https://github.com/openai/procgen#environment-options
    # Paint player velocity info in the top left corner. Only supported by certain games.
    paint_vel_info: False
    # Use randomly generated assets in place of human designed assets.
    use_generated_assets: False
    # Determines whether observations are centered on the agent or display the full level. Override at your own risk.
    center_agent: False
    # When you reach the end of a level, the episode is ended and a new level is selected.
    use_sequential_levels: False
    # What variant of the levels to use, the options are "easy", "hard", "extreme", "memory", "exploration".
    distribution_mode: "easy"
    # Normally games use human designed backgrounds, if this flag is set to False, games will use pure black backgrounds.
    use_backgrounds: False
    # Some games select assets from multiple themes, if this flag is set to True, those games will only use a single theme.
    restrict_themes: False
    # If set to True, games will use monochromatic rectangles instead of human designed assets.
    use_monochrome_assets: False

model:
  load_model: False
  model_path: ""
  checkpoint_interval: 200
  activation: "relu"
  vis_encoder: "cnn"
  vec_encoder: "linear"
  num_vec_encoder_units: 128
  hidden_layer: "default"
  num_hidden_layers: 1
  num_hidden_units: 512
  recurrence:
    layer_type: "gru"
    sequence_length: 128
    hidden_state_size: 512
    hidden_state_init: "zero"
    reset_hidden_state: True
    residual: False

evaluation:
  evaluate: False
  n_workers: 5
  seeds: [2000000, 2000001, 2000002, 2000003, 2000004, 2000005, 2000006, 2000007, 2000008, 2000009]
  interval: 200


sampler:
  type: "TrajectorySampler"
  n_workers: 32
  worker_steps: 512

trainer:
  algorithm: "PPO"
  resume_at: 0
  gamma: 0.99
  lamda: 0.95
  updates: 2000
  epochs: 3
  refresh_buffer_epoch: -1
  n_mini_batches: 8
  value_coefficient: 0.5
  max_grad_norm: 0.5
  share_parameters: True
  learning_rate_schedule:
    initial: 3.0e-4
    final: 1.0e-5
    power: 1.0
    max_decay_steps: 10000
  beta_schedule:
    initial: 0.001
    final: 0.00001
    power: 1.0
    max_decay_steps: 10000
  clip_range_schedule:
    initial: 0.2
    final: 0.2
    power: 1.0
    max_decay_steps: 10000