# --- QMIX specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "parallel" # episode_ad, parallel_ad
batch_size_run: 12 # 1, 16

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "q_learner"
double_q: True
mixer: "qmix"
mixing_embed_dim: 32
hypernet_layers: 2
hypernet_embed: 64

random_perturbation: False
perturbation_range: 10.0
# checkpoint_path: 'results/run_online/sc2/3m/qmix/2022-06-22_09-28-19_qmix_sc2_3m_save_model/models'
checkpoint_path: 'results/run_online/sc2/5m_vs_6m/qmix_save_model/qmix_save_model_2022-08-25_17-12-47/models'
load_step: 0
use_tensorboard: False
use_wandb: True
concurrent_train: True

# may be unnecessary
attacker: "hyar"
defender: "qmix"
discrete_action_dim: 8 
parameter_action_dim: 64 
defense_module_hidden_size: 64
defense_loaded: True
defense_load_path: "results/run_online/sc2/5m_vs_6m/qmix_defense_pert20_defense/qmix_defense_pert20_defense_2022-09-13_14-41-14/models"
defense_load_step: 0
enc_emb: 32
enc_heads: 3
enc_depth: 16

name: "qmix_defense"
