# --- IA2C specific parameters ---
name: "ia2c"
# this SHOULD be Truwe, but to be faithful to the epymarl authors, we keep it False for now
on_policy: False 
action_selector: "soft_policies"
mask_before_softmax: True

runner: "parallel"

buffer_size: 10
batch_size_run: 10
batch_size: 10
# update the target network every {} training steps
target_update_interval_or_tau: 0.01

lr: 0.0005
hidden_dim: 64

obs_agent_id: True
# obs_last_action: False
obs_individual_obs: False 

# use IA2C
agent_output_type: "pi_logits"
learner: "actor_critic_learner"
agent: rnn
mac: basic_mac
critic_type: "ac_critic"

entropy_coef: 0.01
standardise_returns: False
standardise_rewards: True
q_nstep: 5 # 1 corresponds to normal r + gammaV

