seed: -1

clip_observations: 5.0
clip_actions: 1.0

policy: # only works for MlpPolicy right now
  pi_hid_sizes: [1024, 1024, 512]
  vf_hid_sizes: [1024, 1024, 512]
  activation: elu # can be elu, relu, selu, crelu, lrelu, tanh, sigmoid
learn:
  agent_name: shadow_hand
  test: False
  resume: 0
  save_interval: 1000 # check for potential saves every this many iterations
  print_log: True

  # rollout params
  max_iterations: 1000000

  # training params
  cliprange: 0.2
  nsteps: 8
  noptepochs: 5
  nminibatches: 4 # this is per agent
  max_grad_norm: 10
  optim_stepsize: 1.e-3 # 3e-4 is default for single agent training with constant schedule
  schedule: adaptive # could be adaptive or linear or fixed
  gamma: 0.99
  lam: 0.95
  init_noise_std: 0.8
  value_loss_coef: 2.0

  # optimize the actor
  damping: 0.1
  cg_nsteps: 3
  max_kl: 0.1
  max_num_backtrack: 10
  accept_ratio: 0.01
  step_fraction: 1

  log_interval: 1
  asymmetric: False

