# --- MAPPO specific parameters ---

action_selector: "soft_policies"
mask_before_softmax: True

# Variables

buffer_size: 5
batch_size_run: 5
batch_size: 5

lr: 0.0003
hidden_dim: 256 # Size of hidden state for default rnn agent
entropy_coef: 0.12
epochs: 10
gamma: 0.8
t_max: 50000000
name: "mappo" # Name for the experiment. Used for sacred

# Model Specification
mac: "basic_mac" # Basic shared controller
agent: "rnn_egg"
critic_type: "cv_critic"
learner: "ppo_learner"
runner: "parallel"
use_rnn: False

obs_agent_id: False
obs_last_action: False
obs_individual_obs: False
agent_output_type: "pi_logits"
standardise_returns: False
standardise_rewards: True
q_nstep: 5 # 1 corresponds to normal r + gammaV
eps_clip: 0.2
use_tensorboard: True
# update the target network every {} training steps
target_update_interval_or_tau: 2500


