algorithm_kwargs:
  batch_size: 256
  max_path_length: 1000
  min_num_steps_before_training: 10000
  num_epochs: 1000
  num_eval_paths_per_epoch: 10
  num_expl_steps_per_train_loop: 1000
  num_trains_per_train_loop: 1000
env: Ant-v2
eval_env_num: 10
expl_env_num: 10
layer_size: 256
replay_buffer_size: 1000000
trainer_kwargs:
  alpha: 0.2
  discount: 0.99
  policy_lr: 0.0003
  qf_lr: 0.0003
  soft_target_tau: 0.005
  target_update_period: 1
  use_automatic_entropy_tuning: false
version: normal
