dqn:
  batch_size: 256
  max_replay_size: 1_000_000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 0
  n_features: 64
  lr: 5e-4
  epsilon_decay_start: 0
  epsilon_decay_end: 50000
  epsilon_start_value: 1.0
  epsilon_end_value: 0.05
  grad_norm_clip: 10
  obs_last_action: True
  share_agent_params: True

dqn_continuous:
  target_update_frequency: -1
  tau: 0.001
  target_update_mode: soft
  batch_size: 100
  warmup_replay_size: 10_000
  max_replay_size: 1_000_000
  n_features: (400,300)
  lr: 0.001
  grad_norm_clip: 0.5
  obs_last_action: True
  share_agent_params: True

gru_dqn:
  batch_size: 32
  max_replay_size: 5000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 32
  target_update_mode: hard
  n_features: 64
  lr: 5e-4
  epsilon_decay_start: 0
  epsilon_decay_end: 50000
  epsilon_start_value: 1.0
  epsilon_end_value: 0.05
  grad_norm_clip: 10
  obs_last_action: True
  share_agent_params: True

ddpg:
  pi_sigma: 0.1
  target_update_frequency: -1
  tau: 0.001
  batch_size: 100
  warmup_replay_size: 10_000
  max_replay_size: 1_000_000
  lr_actor: 0.001
  n_features_actor: (400,300)
  lr_critic: 0.001
  n_features_critic: (400,300)
  centralized_critic: False
  grad_norm_clip: 0.5
  obs_last_action: False
  share_agent_params: False

discrete_ddpg:
  target_update_frequency: 200
  batch_size: 256
  warmup_replay_size: 1000
  max_replay_size: 1_000_000
  n_features_actor: 64
  n_features_critic: 64
  lr_actor: 1e-4
  lr_critic: 3e-4
  epsilon_decay_start: 10_000
  epsilon_decay_end: 100_000
  epsilon_start_value: 1.0
  epsilon_end_value: 0.01
  grad_norm_clip: 10
  obs_last_action: True
  share_agent_params: True

gru_discrete_ddpg:
  target_update_frequency: 200
  batch_size: 32
  warmup_replay_size: 100
  max_replay_size: 5000
  n_features_actor: 64
  n_features_critic: 64
  lr_actor: 0.0025
  actor_optimizer_class: adam
  lr_critic: 3e-4
  critic_optimizer_class: adam
  epsilon_decay_start: 0
  epsilon_decay_end: 50000
  epsilon_start_value: 0.5
  epsilon_end_value: 0.05
  grad_norm_clip: 10
  obs_last_action: True
  critic_obs_last_action: True
  critic_agent_encoding: True
  share_agent_params: True

maddpg:
  batch_size: 100
  max_replay_size: 1_000_000
  target_update_frequency: -1
  tau: 0.001
  warmup_replay_size: 10_000
  target_update_mode: soft
  n_features_critic: 64
  lr_actors: 0.0025
  actor_optimizer_class: adam
  lr_critics: 0.0005
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  grad_norm_clip: 0.5
  obs_last_action: False
  host_agent: ddpg
  share_agent_params: True

kmaddpg:
  k_level: 2
  batch_size: 100
  max_replay_size: 1_000_000
  target_update_frequency: -1
  tau: 0.001
  warmup_replay_size: 10_000
  target_update_mode: soft
  n_features_critic: 64
  lr_actors: 0.0025
  actor_optimizer_class: adam
  lr_critics: 0.0005
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  grad_norm_clip: 0.5
  obs_last_action: False
  host_agent: ddpg
  share_agent_params: True

maddpg_discrete:
  batch_size: 32
  max_replay_size: 5000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 32
  target_update_mode: hard
  n_features_critic: 64
  lr_actors: 0.0025
  actor_optimizer_class: adam
  lr_critics: 0.0005
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  grad_norm_clip: 10
  obs_last_action: True # Include the agent's own last action (one_hot) in the actor observation
  host_agent: gru_discrete_ddpg
  share_agent_params: True

qmix:
  batch_size: 32
  max_replay_size: 5000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 32
  target_update_mode: hard
  mixing_embed_dim: 32
  lr: 5e-4
  optimizer_class: adam
  scale_loss: False
  grad_norm_clip: 10
  obs_last_action: True
  state_last_action: True
  host_agent: gru_dqn
  share_agent_params: True

comix:
  batch_size: 100
  max_replay_size: 1_000_000
  target_update_frequency: -1
  tau: 0.001
  warmup_replay_size: 10_000
  target_update_mode: soft
  mixing_embed_dim: 32
  lr: 0.001
  optimizer_class: adam
  scale_loss: False # Scale the loss by the number of agents
  grad_norm_clip: 0.5
  obs_last_action: False
  host_agent: dqn_continuous
  share_agent_params: True

facmac:
  batch_size: 32
  max_replay_size: 5000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 32
  target_update_mode: hard
  mixing_embed_dim: 32
  lr_actors: 0.0025
  actor_optimizer_class: adam
  lr_critics: 0.0005
  critic_optimizer_class: adam
  scale_actor_loss: False # Scale the actor loss by the number of agents
  scale_critic_loss: False # Scale the critic loss by the number of agents
  centralized_critic: True
  grad_norm_clip: 10
  obs_last_action: True # Include the agent's own last action (one_hot) in the actor observation
  critic_obs_last_action: False # Include the agent's own last action (one_hot) in the critic observation
  critic_agent_encoding: False
  host_agent: gru_discrete_ddpg
  share_agent_params: True

kfacmac:
  k_level: 2
  batch_size: 32
  max_replay_size: 5000
  target_update_frequency: 200
  tau: -1
  warmup_replay_size: 32
  target_update_mode: hard
  mixing_embed_dim: 32
  lr_actors: 0.0025
  actor_optimizer_class: adam
  lr_critics: 0.0005
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  centralized_critic: True
  grad_norm_clip: 10
  obs_last_action: True
  critic_obs_last_action: False
  critic_agent_encoding: False
  host_agent: gru_discrete_ddpg
  share_agent_params: True

facmac_continuous:
  batch_size: 100
  max_replay_size: 1_000_000
  target_update_frequency: -1
  tau: 0.001
  warmup_replay_size: 10_000
  target_update_mode: soft
  mixing_embed_dim: 32
  lr_actors: 0.001
  actor_optimizer_class: adam
  lr_critics: 0.001
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  centralized_critic: True
  grad_norm_clip: 0.5
  obs_last_action: False
  critic_obs_last_action: False
  critic_agent_encoding: False
  host_agent: ddpg
  share_agent_params: True

kfacmac_continuous:
  k_level: 1
  batch_size: 100
  max_replay_size: 1_000_000
  target_update_frequency: -1
  tau: 0.001
  warmup_replay_size: 10_000
  target_update_mode: soft
  mixing_embed_dim: 32
  lr_actors: 0.001
  actor_optimizer_class: adam
  lr_critics: 0.001
  critic_optimizer_class: adam
  scale_critic_loss: False
  scale_actor_loss: False
  centralized_critic: True
  grad_norm_clip: 0.5
  obs_last_action: False
  critic_obs_last_action: False
  critic_agent_encoding: False
  host_agent: ddpg
  share_agent_params: True

