# --- QMIX+CQL specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode_meta"
mac: "meta_mac"
agent: "mt_updet"
buffer_size: 5000

# update the target network every {} episodes
target_update_interval_or_tau: 0.005

# use the Q_Learner to train
agent_output_type: "pi_logits"
learner: "encoder_learner"

rnn_hidden_dim: 128
critic_hidden_dim: 256

encoder_id: -1
role_encoder_id: -1
prior_role_encoder_id: -1

pretrain_id: -1
use_encoding: True
direct_enemy_action_embedding: False
entity_embed_dim: 32
policy_entity_embed_dim: 64
attn_embed_dim: 16
head: 1
depth: 1
policy_head: 3
policy_depth: 2
hypernet_layers: 2
hypernet_embed: 64
use_transformer_critic: True
transition_encoding_dim: 16
encoder_hidden_dim: 64
encoding_dim: 16
num_embeddings: 5

cl_loss: "dml"
global_only: False
mlp_encoding: False
average_temporal: False
average_gate: True
use_mg2l: False
use_club: True
wo_global: True
club_loss_weight: 1.0
use_decoder_loss: False
is_encoder_train: True
is_role_encoder_train: False
is_prior_role_encoder_train: False
use_role_encoder: False
infonce_temp: 1.0
infonce_neg_num: 32
encoder_learner_log_interval: 10
encoder_log_interval: 10
meta_batch_size: 16
encoder_save_model_interval: 2000
encoder_vis_interval: 1000
vis_batch_size: 100
lr: 0.001
optim_type: "AdamW"
weight_decay: 0.01



mixer: "lmix"
mixing_embed_dim: 64

alpha_temp: 10


name: "csro_meta_encoder"

