Ddpg:
exp_base: "results/"
exp_name: "ddpg_navigation_task"
system: "DDPG"
gpu_device: 0
num_workers: 0
seed: 1
total_env_steps: 50000
scenario:
  name: "navigation"
  use_continuous_actions: True
  n_agents: 1
  n_obstacles: 3
  collisions: True
env:
  max_steps: 100
  num_envs: 1
  seed: ???
eval_env:
  max_steps: 100
  evaluation_episodes: 200
  evaluation_interval: 20
loss:
  gamma: 0.94
  tau: 0.005
exploration:
  explore_type: "none" # ["none", "additive gaussian", "pink noise"]
  eps_init: 0.8
  eps_end: 0.01
  eval_deterministic_actions: True
learning_rate:
  use_scheduler: False
  lr: 0.00005
  adam_eps: 0.000001
policy:
  shared_parameters: True
  centralized_critic: False
  num_epochs: 40
  max_grad_norm: 40.0
  device: ???
  activation: "Tanh" # ["Mish", "Tanh"]
replay_buffer:
  use_priority: False
  memory_size: 100
  minibatch_size: 100
logger:
  backend: wandb
  project_name: "optimistic_rl_navigation"
  save_data: False
  output_dir: "training_logs"
save_checkpoint_per_iter: null