# --- QMIX+CQL specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode_meta"
mac: "meta_mac"
agent: "mt_updet"
buffer_size: 5000

# update the target network every {} episodes
target_update_interval_or_tau: 0.005

# use the Q_Learner to train
agent_output_type: "pi_logits"
learner: "prior_role_encoder_learner"
encoder_id: 0
role_encoder_id: 0
prior_role_encoder_id: -1

rnn_hidden_dim: 128
critic_hidden_dim: 256

weighted_prior_learning: False
weight_min: -2
weight_max: 2
weight_alpha: 0.1
role_use_task_encoding: True
use_encoding: True
direct_enemy_action_embedding: False
entity_embed_dim: 32
policy_entity_embed_dim: 64
attn_embed_dim: 16
head: 1
depth: 1
policy_head: 3
policy_depth: 2
hypernet_layers: 2
hypernet_embed: 64
use_transformer_critic: True
transition_encoding_dim: 16
encoder_hidden_dim: 64
encoding_dim: 16


cl_loss: "dml"
prior_role_use_history: True
global_only: False
mlp_encoding: False
average_temporal: False
average_gate: True
use_mg2l: False
use_club: False
wo_global: False
club_loss_weight: 1.0
use_decoder_loss: True
is_encoder_train: False
is_role_encoder_train: False
is_prior_role_encoder_train: True
infonce_temp: 1.0
infonce_neg_num: 32
encoder_train_episode: 100000
encoder_learner_log_interval: 10
encoder_log_interval: 10
meta_batch_size: 16
encoder_save_model_interval: 1000
encoder_vis_interval: 500
vis_batch_size: 100
lr: 0.001
optim_type: "AdamW"
weight_decay: 0.01



mixer: "lmix"
mixing_embed_dim: 64

alpha_temp: 10


name: "meta_encoder"

