# --- IPPO NS specific parameters ---

action_selector: "soft_policies"
mask_before_softmax: True

runner: "parallel"
# lexico
beta_values: [2.0, 1.0]
eta_values: 5.0
n_objectives: 2

buffer_size: 5
batch_size_run: 5
batch_size: 5

# update the target network every {} training steps
target_update_interval_or_tau: 2500

lr: 0.003
hidden_dim: 256 # Size of hidden state for default rnn agent

obs_agent_id: False
obs_last_action: False
obs_individual_obs: False

mac: "basic_mac"
agent: "rnn_egg"
agent_output_type: "pi_logits"
learner: "ppo_learner_mo"
entropy_coef: 0.12
standardise_returns: False
standardise_rewards: True
use_rnn: False
epochs: 10
eps_clip: 0.2
q_nstep: 5 # 1 corresponds to normal r + gammaV
critic_type: "ac_critic_mo"
name: "lippo_egg"
use_tensorboard: False

