seed: -1

clip_observations: 5.0
clip_actions: 1.0


learn:
  agent_name: shadow_hand
  test: False
  resume: 0
  save_interval: 1000 # check for potential saves every this many iterations
  print_log: True

  # rollout params
  max_iterations: 2500

  # training params
  hidden_nodes: 256
  hidden_layer: 3

  cliprange: 0.2
  nsteps: 8
  noptepochs: 5
  nminibatches: 4 # this is per agent
  replay_size: 100
  polyak: 0.995
  learning_rate: 0.0001
  max_grad_norm: 1
  policy_delay: 2
  gamma: 0.99
  act_noise: 0.1
  target_noise: 0.2
  noise_clip: 0.5
  reward_scale: 1
  batch_size: 64
  
  log_interval: 1
  asymmetric: False
