defaults:
  - ppo@algo
  - _self_

parallel: False
batch_T: 64
batch_B: 16
reward_clip_min: -5
reward_clip_max: 5
max_steps_decorrelate: 50
render_during_training: False
env:
  max_episode_steps: 1000
  reward_type: sparse
  headless: True
device: null
cart_model:
  channels: [32, 64, 64]
  kernel_sizes: [8, 4, 3]
  strides: [4, 2, 1]
  paddings: [0, 0, 0]
  use_maxpool: False
  post_conv_hidden_sizes: 256
  post_conv_output_size: null
  post_conv_nonlinearity: ReLU
  lstm_size: 256
  post_lstm_hidden_sizes: null
  post_lstm_nonlinearity: ReLU
camera_model: ${cart_model}
algo:
  learning_rate: 3.e-4
runner:
  n_steps: 2.e+6
  log_interval_steps: 1.e+4
