defaults:
  - ppo@algo
  - _self_

parallel: False
batch_T: 128
batch_B: 16
reward_clip_min: -5
reward_clip_max: 5
max_steps_decorrelate: 50
render_during_training: False
env:
  max_episode_steps: 1000
  reward_type: sparse
  headless: True
device: null
model:
  channels: [32, 64, 128, 256]
  kernel_sizes: [3, 3, 3, 3]
  strides: [2, 2, 2, 2]
  paddings: [0, 0, 0, 0]
  use_maxpool: False
  post_conv_hidden_sizes: 1024
  post_conv_output_size: null
  post_conv_nonlinearity: ReLU
  lstm_size: 512
  post_lstm_hidden_sizes: 512
  post_lstm_nonlinearity: ReLU
runner:
  n_steps: 409600  # 200 iterations
  log_interval_steps: 20480  # log every 10 iterations
video_recorder:
  record_every_n_steps: 5.e+4
  video_length: 250
  output_root: videos
