# --- QMIX specific parameters ---

# env
t_max: 6050000
test_greedy: True
test_nepisode: 32
test_interval: 10000
log_interval: 10000
runner_log_interval: 10000
learner_log_interval: 10000

# qmix
action_selector: "epsilon_greedy"
runner: "parallel"
batch_size_run: 8
buffer_size: 5000
batch_size: 128
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 100000
obs_agent_id: True 
obs_last_action: True 
standardise_rewards: False
# use the Q_Learner to train
mac: "pd_mac"
agent: "n_rnn"
agent_output_type: "q"
# update the target network every {} episodes
target_update_interval: 200
learner: "pd_nq_learner"
mixer: "qmix"
# network
mixing_embed_dim: 32
hypernet_embed: 64
pd_hidden_dim: 64
lr: 0.001
pd_lr: 0.001
td_lambda: 0.6
optimizer: 'adam'
q_lambda: False
use_layer_norm: False
use_orthogonal: False
gain: 0.01
# Priority experience replay
use_per: False
per_alpha: 0.6
per_beta: 0.4
return_priority: False

# delay
delay_type: "uf"
delay_value: 3
delay_scope: 3
n_expand_action: 6
delay_aware: False
use_history: False
one_hot_delay: False
res_classification: True
mask_prediction: True

# predictor
predictor_mode: "none"
predictor_model: "tf"
transformer_structure: ""
num_minibatch_predictor: 2
# train
cheating_start_value: 0.0
cheating_end_value: 0.0
cheating_start_time: 1000000
cheating_end_time: 4000000
teacher_forcing_start_value: 0.0
teacher_forcing_end_value: 0.0
teacher_forcing_start_time: 1000000
teacher_forcing_end_time: 4000000 
# teacher student
teacher_model_dir: ""
teacher_mac: "pd_mac"
alpha_mac: 1.0
alpha_mixer: 0.05
teacher_student_start_value: 0.0
teacher_student_end_value: 0.0
teacher_student_start_time: 1000000
teacher_student_end_time: 4000000 

name: "pd_qmix"