# --- COMAD (Continual Offline Multi-Agent Cooperation) specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 0
epsilon_finish: 0
epsilon_anneal_time: 1

runner: "mt_episode"

buffer_size: 5000

# update the target network every {} episodes
target_update_interval_or_tau: 0.005

# use the Q_Learner to train
agent_output_type: "pi_logits"
learner: "comad_learner"
critic_type: "comad_critic"

cvae_beta: 0.1
weight_decay: 0.001 # in case of divergence

entity_embed_dim: 64
attn_embed_dim: 8
head: 1

# mac
mac: "tr_comad_mac"

id_length: 4
max_agent: 15

rnn_hidden_dim: 64
pa_hidden_dim: 64
rho_dim: 16 # act like different category of skills
xi_dim: 1

mixer: "lmix"
mixing_embed_dim: 32

alpha_temp: 10
ebm_temp: 0.1 
ebm_noise_scale: 0.1 # NOTE hyp 1
n_reuse_heads: 1 # >=1, automatically up-clipped to n_task
delta_m_thres: 1
ratio_clip_eps: 0.1
rho_lambda: 1
ent_lambda: 1
use_latest_head: True

name: "comad"

cont_train_steps: 20000
stage1_steps: 10000
confidence_thres: 8 # NOTE hyp 2

# ablation
abla_no_skill: False
abla_no_expand: False
abla_no_delta: False
abla_z: False
force_expand: False

test_interval: 250
log_interval: 250
runner_log_interval: 250
learner_log_interval: 250
# save_model_interval: 5000

