algorithm_kwargs:
  batch_size: 256
  max_path_length: 1000
  min_num_steps_before_training: 10000
  num_epochs: 1000
  num_eval_paths_per_epoch: 10
  num_expl_steps_per_train_loop: 1000
  num_trains_per_train_loop: 1000
env: Ant-v2
eval_env_num: 10
expl_env_num: 10
layer_size: 256
replay_buffer_size: 1000000
trainer_kwargs:
  discount: 0.99
version: normal
