# # --- MAPPO specific parameters ---

# action_selector: "soft_policies"
# mask_before_softmax: True

# runner: "parallel"

# buffer_size: 10
# batch_size_run: 10
# batch_size: 10

# # update the target network every {} training steps
# target_update_interval_or_tau: 0.01

# lr: 0.0003
# hidden_dim: 128

# obs_agent_id: True
# obs_last_action: False
# obs_individual_obs: False

# agent_output_type: "pi_logits"
# learner: "ppo_learner"
# entropy_coef: 0.001
# use_rnn: True
# standardise_returns: False
# standardise_rewards: True
# q_nstep: 5 # 1 corresponds to normal r + gammaV
# critic_type: "cv_critic"
# epochs: 4
# eps_clip: 0.2
# name: "mappo"

# t_max: 400500

action_selector: "soft_policies"
mask_before_softmax: True

runner: "parallel"

buffer_size: 10
batch_size_run: 10
batch_size: 10

# update the target network every {} training steps
target_update_interval_or_tau: 0.01

lr: 0.0003
hidden_dim: 128

obs_agent_id: True
obs_last_action: False
obs_individual_obs: False

agent_output_type: "pi_logits"
learner: "ppo_learner"
entropy_coef: 0.001
use_rnn: True
standardise_returns: False
standardise_rewards: True
q_nstep: 5 # 1 corresponds to normal r + gammaV
critic_type: "cv_critic"
epochs: 4
eps_clip: 0.2
name: "mappo"

t_max: 2005000