"TOTAL_TIMESTEPS": 1e10
"NUM_ENVS": 1024
"NUM_STEPS": 1
"HIDDEN_SIZE": 512
"NUM_LAYERS": 3
"NORM_TYPE": "layer_norm"
"NORM_INPUT": False
"DUELING": True
"EPS_START": 0.01
"EPS_FINISH": 0.001
"EPS_DECAY": 0.1
"MAX_GRAD_NORM": 0.5
"NUM_MINIBATCHES": 1
"NUM_EPOCHS": 1
"LR": 3e-4
"LR_LINEAR_DECAY": False
"LAMBDA": 0.
"GAMMA": 0.99

"ENV_NAME": "hanabi"
"ENV_KWARGS": {
  "num_agents": 2,
}

# evaluate
"TEST_DURING_TRAINING": True
"TEST_INTERVAL": 0.005 # as a fraction of updates, i.e. log every 1% of training process
"TEST_NUM_STEPS": 100
"TEST_NUM_ENVS": 10000 # number of episodes to average over, can affect performance

#"ALG_NAME": "pqn_vdn_ff" # if you want to change the name of the algo in the metrics