experiment:
  sampling_device: cpu
  train_device: cpu
  buffer_device: cpu
  share_policy_params: true
  prefer_continuous_actions: true
  collect_with_grad: false
  parallel_collection: false
  gamma: 0.99
  lr: 5.0e-05
  adam_eps: 1.0e-06
  clip_grad_norm: true
  clip_grad_val: 5.0
  soft_target_update: true
  polyak_tau: 0.005
  hard_target_update_frequency: 5
  exploration_eps_init: 0.8
  exploration_eps_end: 0.01
  exploration_anneal_frames: null
  max_n_iters: null
  max_n_frames: 3000000
  on_policy_collected_frames_per_batch: 6000
  on_policy_n_envs_per_worker: 10
  on_policy_n_minibatch_iters: 45
  on_policy_minibatch_size: 400
  off_policy_collected_frames_per_batch: 6000
  off_policy_n_envs_per_worker: 10
  off_policy_n_optimizer_steps: 1000
  off_policy_train_batch_size: 128
  off_policy_memory_size: 1000000
  off_policy_init_random_frames: 0
  off_policy_use_prioritized_replay_buffer: false
  off_policy_prb_alpha: 0.6
  off_policy_prb_beta: 0.4
  evaluation: true
  render: true
  evaluation_interval: 120000
  evaluation_episodes: 10
  evaluation_deterministic_actions: true
  evaluation_static: false
  loggers:
  - csv
  - wandb
  project_name: benchmarl
  create_json: true
  save_folder: null
  restore_file: null
  restore_map_location: null
  checkpoint_interval: 0
  checkpoint_at_end: false
  keep_checkpoints_num: 3
algorithm:
  share_param_critic: true
  clip_epsilon: 0.2
  entropy_coef: 0.0
  critic_coef: 1.0
  loss_critic_type: l2
  lmbda: 0.9
  scale_mapping: biased_softplus_1.0
  use_tanh_normal: true
  minibatch_advantage: false
task:
  max_steps: 80
model:
  name: mlp
  num_cells:
  - 256
  - 256
  layer_class: torch.nn.Linear
  activation_class: torch.nn.Tanh
  activation_kwargs: null
  norm_class: null
  norm_kwargs: null
critic_model:
  name: mlp
  num_cells:
  - 256
  - 256
  layer_class: torch.nn.Linear
  activation_class: torch.nn.Tanh
  activation_kwargs: null
  norm_class: null
  norm_kwargs: null
seed: 0
