"TOTAL_TIMESTEPS": 2e6
"NUM_ENVS": 256
"NUM_STEPS": 16
"MEMORY_WINDOW": 0 # extra steps in the rnn horizon from previous episodes 
"HIDDEN_SIZE": 256
"NUM_LAYERS": 2
"NORM_INPUTS": False
"NORM_TYPE": "layer_norm"
"EPS_START": 1.0
"EPS_FINISH": 0.1
"EPS_DECAY": 0.1
"MAX_GRAD_NORM": 1
"NUM_MINIBATCHES": 16
"NUM_EPOCHS": 4
"LR": 0.0005
"LR_LINEAR_DECAY": False
"GAMMA": 0.9
"LAMBDA": 0.3

# ENV
"ENV_NAME": "MPE_simple_spread_v3"
"ENV_KWARGS": {}

# evaluate
"TEST_DURING_TRAINING": True
"TEST_INTERVAL": 0.05 # as a fraction of updates, i.e. log every 5% of training process
"TEST_NUM_STEPS": 30
"TEST_NUM_ENVS": 512 # number of episodes to average over, can affect performance

#"ALG_NAME": "pqn_vdn_ff" # if you want to change the name of the algo in the metrics