AGENT_INIT_SCALE: 1.0
AGENT_LR: 0.005
AGENT_OPT: sgd
BUFFER_BATCH_SIZE: 128
BUFFER_SIZE: 5000
C_OUT: 8
ENV_KWARGS: {}
ENV_NAME: MPE_simple_spread_v3
EPS_DECAY: 0.1
EPS_FINISH: 0.05
EPS_START: 1.0
GAMMA: 0.9
HIDDEN_SIZE: 512
LEARNING_STARTS: 10000
LOG_AGENTS_SEPARATELY: false
LR_LINEAR_DECAY: true
MAX_GRAD_NORM: 25
MIXER_EMBEDDING_DIM: 32
MIXER_HYPERNET_HIDDEN_DIM: 128
MOMENTUM: 0.9
NUM_ENVS: 8
NUM_EPOCHS: 5
NUM_STEPS: 26
PRE_POLICY_LR: 0.0005
PRE_POLICY_OPT: radam
SWITCH_INTERVAL: 200
TARGET_UPDATE_INTERVAL: 200
TAU: 1.0
TEST_DURING_TRAINING: true
TEST_INTERVAL: 0.02
TEST_NUM_ENVS: 128
TEST_NUM_STEPS: 30
TOTAL_TIMESTEPS: 8e7