defaults:
  - ppo@algo
  - cartpole@env
  - _self_

hydra:
  job:
    name: cartpole-ppo
  run:
    dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
  sweep:
    dir: multirun/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}

parallel: False
batch_T: 128
batch_B: 16
reward_clip_min: -5
reward_clip_max: 5
obs_norm_initial_count: 10000
max_steps_decorrelate: 50
device: null
model:
  hidden_sizes: [64, 64]
  hidden_nonlinearity: Tanh
runner:
  n_steps: 102400  # 50 iterations
  log_interval_steps: 10240  # log every 5 iterations
