# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode"

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "coin_q_learner"
double_q: True
mixer: "qmix"
mixing_embed_dim: 32
hypernet_layers: 2
hypernet_embed: 64

# beta1 0.05 beta2 0.05 for aloha  (2000000)
# beta1 0.01 beta2 0.01 for gather  
# beta1 0.0005 beta2 0.0005 for sensor  
# beta1 0.0005 beta2 0.0005 for hallway
beta1: 0.0005
beta2: 0.0005
influence_exp_decay_stop: 0.0001
curiosity_exp_decay_stop: 0.0001


beta: 0.5
influence_exp_decay: True
influence_exp_decay_rate: 0.9
influence_exp_decay_cycle: 100000

curiosity_exp_decay: True
curiosity_exp_decay_rate: 0.9
curiosity_exp_decay_cycle: 50000

predictor_lr: 0.0005
predict_net_dim: 128

optimizer: 'RMSprop'
name: "caiq"
