# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode"

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "coin_q_learner"
double_q: True
mixer: "qmix"
mixing_embed_dim: 32
hypernet_layers: 2
hypernet_embed: 64

beta1: 0.0005
beta2: 0.0005
beta: 0.5
influence_exp_decay: True
influence_exp_decay_rate: 0.9
influence_exp_decay_cycle: 100000
influence_exp_decay_stop: 0.00001
curiosity_exp_decay: True
curiosity_exp_decay_rate: 0.9
curiosity_exp_decay_cycle: 50000
curiosity_exp_decay_stop: 0.0
predictor_lr: 0.0005
predict_net_dim: 128

optimizer: 'RMSprop'
name: "caiq"
