algorithm_kwargs:
  batch_size: 256
  max_path_length: 1000
  min_num_steps_before_training: 10000
  num_epochs: 1000
  num_eval_paths_per_epoch: 10
  num_expl_steps_per_train_loop: 1000
  num_trains_per_train_loop: 1000
env: Reacher-v2
eval_env_num: 10
expl_env_num: 10
layer_size: 256
num_atoms: 51
replay_buffer_size: 1000000
trainer_kwargs:
  alpha: 0.2
  discount: 0.99
  policy_lr: 0.0003
  soft_target_tau: 0.005
  tau_type: C51
  use_automatic_entropy_tuning: false
  zf_lr: 0.0003
  v_min: -300
version: normal-C51-neutral
