# valid for iql, vdn, qmix
"TOTAL_TIMESTEPS": 1e7
"NUM_ENVS": 16
"NUM_STEPS": 128
"BUFFER_SIZE": 5000
"BUFFER_BATCH_SIZE": 32
"HIDDEN_SIZE": 512

"MIXER_INIT_SCALE": 0.001
"EPS_START": 1.0
"EPS_FINISH": 0.05
"EPS_DECAY": 0.1 # percentage of updates
"MAX_GRAD_NORM": 10
"TARGET_UPDATE_INTERVAL": 10
"TAU": 1.
"NUM_EPOCHS": 8

"LEARNING_STARTS": 10000 # timesteps
"LR_LINEAR_DECAY": False
"GAMMA": 0.99
"REW_SCALE": 10. # scale the reward to the original scale of SMAC


"AGENT_OPT": "radam"
"AGENT_LR": 0.001

"PRE_POLICY_OPT": "sgd"
"PRE_POLICY_LR": 0.0005
"MOMENTUM": 0.9 # only for sgd
# ENV
"ENV_NAME": "HeuristicEnemySMAX"
#"MAP_NAME": "3s_vs_5z"
"MAP_NAME": "3s2z"
"ENV_KWARGS":
  "see_enemy_actions": True
  "walls_cause_death": True
  "attack_mode": "closest"
  "train_pre": False
  "vertical_line_reward_scale": 0.011
  "relative_horizontal_reward_scale": 0.1

# evaluate
"TEST_DURING_TRAINING": True
"TEST_INTERVAL": 0.05 # as a fraction of updates, i.e. log every 5% of training process
"TEST_NUM_STEPS": 128
"TEST_NUM_ENVS": 128 # number of episodes to average over, can affect performance

# Pre-policy
"C_OUT": 32
"TEMPERATURE": 0.5
"SWITCH_INTERVAL": 100

# Hypernetwork
"NUM_EMBEDDINGS": 128
"EMBEDDING_DIM": 4


"TUNED_CONFIG_ID": 61