# --- population specific parameters ---
# --- This config file uses the episodic runner, which is useful for testing locally ---

# Global parameter, indep of agent type
runner_function: "population"
runner: "parallel_runner_population"
batch_size_run: 8 # Number of env at the same time
name: "population_qvmix_vs_heuristic"
use_cuda: True

matchmaking: "duo_fair"

save_model_interval: 20000
save_model: True # Save the models to disk

use_tensorboard: True

n_agent_type: 2

log_interval: 20000
runner_log_interval: 20000
learner_log_interval: 20000

t_max: 20050000
test_interval: -1
test_nepisode: 8
test_greedy: False

agent_type_1:
  number: 1 # number of agent of this type in the population
  mac: "basic_mac"
  action_selector: "epsilon_greedy"
  epsilon_start: 1.0
  epsilon_finish: 0.05
  epsilon_anneal_time: 2000000

  gamma: 0.99

  buffer_size: 5000

  target_update_interval: 200

  agent_output_type: "q"
  learner: "qv_learner"
  mixer: "qmix"
  vmixer: "vmix"
  mixing_embed_dim: 32

  learner_log_interval: 2000 # Log training stats every {} timesteps


  # --- RL hyperparameters ---
  batch_size: 32 # Number of episodes to train on
  lr: 0.0005 # Learning rate for agents
  critic_lr: 0.0005 # Learning rate for critics
  optim_alpha: 0.99 # RMSProp alpha
  optim_eps: 0.00001 # RMSProp epsilon
  grad_norm_clip: 10 # Reduce magnitude of gradients above this L2 norm

  # --- Agent parameters ---
  agent: "rnn" # Default rnn agent
  rnn_hidden_dim: 64 # Size of hidden state for default rnn agent
  obs_agent_id: True # Include the agent's one_hot id in the observation
  obs_last_action: True # Include the agent's last action (one_hot) in the observation

  # --- Logging options ---
  save_model: [True] # Save the models to disk
  save_model_interval: [20000] # Save models after this many timesteps
  checkpoint_path: [""] # Load a checkpoint from this path
  load_step: [0] # Load model trained on this many timesteps (0 if choose max possible)


agent_type_2:
  number: 1 # number of agent of this type in the population
  mac: "do_not_mac"
  learner: "do_not_learn"

  # --- Logging options ---
  save_model: [False] # Save the models to disk
  save_model_interval: [20000] # Save models after this many timesteps
  checkpoint_path: [""] # Load a checkpoint from this path
  load_step: [0] # Load model trained on this many timesteps (0 if choose max possible)