Ddpg:
exp_base: "results/"
exp_name: "ddpg_coverage_task"
system: "DDPG"
gpu_device: 0
num_workers: 0
seed: 1
total_env_steps: 200000
scenario:
  name: "sampling"
  use_continuous_actions: True
  n_agents: 1
  n_gaussians: 3
  xdim: 1
  ydim: 1
  cov: 0.05
env:
  max_steps: 150
  num_envs: 1
  seed: ???
eval_env:
  max_steps: 150
  evaluation_episodes: 200
  evaluation_interval: 20
loss:
  gamma: 0.9
  tau: 0.005
exploration:
  explore_type: "none" # ["none", "additive gaussian", "pink noise"]
  eps_init: 0.8
  eps_end: 0.01
  eval_deterministic_actions: True
learning_rate:
  use_scheduler: False
  lr: 0.00005
  adam_eps: 0.000001
policy:
  shared_parameters: True
  centralized_critic: False
  num_epochs: 40
  max_grad_norm: 40.0
  device: ???
  activation: "Tanh" # ["Mish", "Tanh"]
replay_buffer:
  use_priority: False
  memory_size: 150
  minibatch_size: 150
logger:
  backend: wandb
  project_name: "optimistic_rl_sampling"
  save_data: False
  output_dir: "training_logs"
save_checkpoint_per_iter: null