# --- MAPPO specific parameters ---

action_selector: "soft_policies"
mask_before_softmax: True

runner: "parallel"

buffer_size: 10
batch_size_run: 10
batch_size: 10

# update the target network every {} training steps
target_update_interval_or_tau: 0.01

mac: "pd_mac"

lr: 0.001
optimizer: 'adam'
hidden_dim: 128

obs_agent_id: True
obs_last_action: False
obs_individual_obs: False

agent_output_type: "pi_logits"
learner: "pd_ppo_learner"
entropy_coef: 0.001
use_rnn: True
q_nstep: 5 # 1 corresponds to normal r + gammaV
critic_type: "cv_critic"
epochs: 4
eps_clip: 0.2
name: "pd_mappo"

add_value_last_step: True

pd_lr: 0.001
predictor_checkpoint_path: ""
rl_checkpoint_path: ""
cheating_start_value: 1.0
cheating_end_value: 0.0
cheating_start_time: 4000000
cheating_end_time: 9000000
# 预测器模式：none, single_step, multi_step
predictor_mode: "none"
pd_hidden_dim: 64
pd_hyper_dim: 128
one_hot_delay: False
delay_aware: True