# valid for iql, vdn, qmix
"TOTAL_TIMESTEPS": 2e6
"NUM_ENVS": 8
"NUM_STEPS": 26
"BUFFER_SIZE": 5000
"BUFFER_BATCH_SIZE": 32
"HIDDEN_SIZE": 64
"MIXER_EMBEDDING_DIM": 32 # just for qmix
"MIXER_HYPERNET_HIDDEN_DIM": 128 # just for qmix
"MIXER_INIT_SCALE": 0.001 # just for qmix
"EPS_START": 1.0
"EPS_FINISH": 0.05
"EPS_DECAY": 0.1 # percentage of updates
"MAX_GRAD_NORM": 25
"TARGET_UPDATE_INTERVAL": 200
"TAU": 1.
"NUM_EPOCHS": 1 
"LR": 0.005
"LEARNING_STARTS": 10000 # timesteps
"LR_LINEAR_DECAY": True
"GAMMA": 0.9

# ENV
"ENV_NAME": "MPE_simple_spread_v3"
"ENV_KWARGS": {}

# evaluate
"TEST_DURING_TRAINING": True
"TEST_INTERVAL": 0.05 # as a fraction of updates, i.e. log every 5% of training process
"TEST_NUM_STEPS": 30
"TEST_NUM_ENVS": 512 # number of episodes to average over, can affect performance

"LOG_AGENTS_SEPARATELY": True # iql will log indipendent agents separately