# --- QMIX specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "parallel" # episode_ad, parallel_ad
batch_size_run: 12 # 1, 16

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "q_learner"
double_q: True
mixer: "qmix"
mixing_embed_dim: 32
hypernet_layers: 2
hypernet_embed: 64

random_perturbation: True
perturbation_range: 10.0
# checkpoint_path: 'results/run_online/sc2/3m/qmix/2022-06-22_09-28-19_qmix_sc2_3m_save_model/models'
checkpoint_path: 'results/run_online/sc2/5m_vs_6m/qmix_save_model/qmix_save_model_2022-08-25_17-12-47/models'
load_step: 0
use_tensorboard: False
use_wandb: True 
concurrent_train: True

# may be unnecessary
attacker: "hyar"
defender: "qmix"
discrete_action_dim: 8 
parameter_action_dim: 64 

name: "qmix_random"
