"TOTAL_TIMESTEPS": 5e6
"NUM_ENVS": 32
"NUM_STEPS": 1
"BUFFER_SIZE": 1e5
"BUFFER_BATCH_SIZE": 128
"HIDDEN_SIZE": 64
"EPS_START": 1.0
"EPS_FINISH": 0.05
"EPS_DECAY": 0.1
"LEARNING_STARTS": 1e3
"MAX_GRAD_NORM": 1
"TARGET_UPDATE_INTERVAL": 10
"NUM_EPOCHS": 4
"LR": 0.00007
"LR_LINEAR_DECAY": True
"GAMMA": 0.99
"TAU": 1.
"LOSS_TYPE": "vdn"

# env
"ENV_NAME": "overcooked"
"ENV_KWARGS": 
  "layout" : "cramped_room"
"REW_SHAPING_HORIZON": 2.5e6

# evaluate
"TEST_DURING_TRAINING": True
"TEST_INTERVAL": 0.05 # as a fraction of updates, i.e. log every 5% of training process
"TEST_NUM_STEPS": 400
"TEST_NUM_ENVS": 512 # number of episodes to average over, can affect performance