defaults:
  - ppo@algo
  - cartpole@env
  - _self_

hydra:
  job:
    name: cartpole-recurrent-ppo
  run:
    dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    dir: multirun/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}

parallel: True
batch_T: 128
batch_B: 16
reward_clip_min: -5
reward_clip_max: 5
obs_norm_initial_count: 10000
max_steps_decorrelate: 50
render_during_training: False
device: null
model:
  pre_lstm_hidden_sizes: 32
  lstm_size: 16
  post_lstm_hidden_sizes: 32
  hidden_nonlinearity: Tanh
runner:
  n_steps: 204800  # 100 iterations
  log_interval_steps: 20480  # log every 10 iterations
