# --- RMC specific parameters ---

# use epsilon greedy action selector
action_selector: "gumbel"
epsilon_start: .0
epsilon_finish: .0
epsilon_anneal_time: 100000
mask_before_softmax: True

mac: 'lica_mac'

runner: "parallel"
batch_size_run: 8
buffer_size: 128
batch_size: 32

off_buffer_size: 5000
off_batch_size: 64
run: "on_off"

t_max: 10050000

# update the target network every {} episodes
target_update_interval: 200

# use the Q_Learner to train
agent_output_type: "pi_logits"

learner: "fmac_learner"
mixer: "qmix"
mixing_embed_dim: 32
hypernet_embed: 64
critic_hidden_dim: 128

lr: 0.001 # Learning rate for agents
critic_lr: 0.001
lambd: 0.6
entropy_coef: 0.03
optimizer: 'adam'
abs: True # monotonicity condition

name: "riit_env=8_adam_td_lambda"