# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode"

buffer_size: 5000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "q"
learner: "coin_qplex_learner"
double_q: True
mixer: "dmaq"
mixing_embed_dim: 32
hypernet_embed: 64
adv_hypernet_layers: 3
adv_hypernet_embed: 64

num_kernel: 10
is_minus_one: True
weighted_head: True
is_adv_attention: True
is_stop_gradient: True

optimizer: 'RMSprop'
# beta1 0.05 beta2 0.05 for aloha  (2000000)
# beta1 0.01 beta2 0.01 for gather  
# beta1 0.0005 beta2 0.0005 for sensor  
# beta1 0.0005 beta2 0.0005 for hallway
beta1: 0.05
beta2: 0.05
beta: 0.5
influence_exp_decay: True
influence_exp_decay_rate: 0.9
influence_exp_decay_cycle: 100000
influence_exp_decay_stop: 0.0001
curiosity_exp_decay: True
curiosity_exp_decay_rate: 0.9
curiosity_exp_decay_cycle: 50000
curiosity_exp_decay_stop: 0.0001



predictor_lr: 0.0005
predict_net_dim: 128

# visible credits
show_k: False

name: "cai_qplex"
