# --- QMIX+CQL specific parameters ---

# use epsilon greedy action selector
action_selector: "epsilon_greedy"
epsilon_start: 1.0
epsilon_finish: 0.05
epsilon_anneal_time: 50000

runner: "episode"

role_prior_t_max: 20000
buffer_size: 30000
offline_max_buffer_size: 30000
cluster_update_episode_itv: 250
batch_size_clustering: 12000
min_batch_size_clustering: 4000
role_prior_save_model_interval: 1000
buffer_update_time: 250
t_cluster_start: 500
n_cluster: 4
n_min_cluster: 3
n_codes: 256
n_max_code: 100
k_top_seq: 30
latent_dim: 4
vae_hidden_dim: 64
codebook_update_interval: 4
ce_coef : 1.0
vq_coef : 0.25
commit_coef : 0.125
coverage_coef : 0.125
num_centroid_sample: 1


# update the target network every {} episodes
target_update_interval_or_tau: 0.005

is_diffusion: False
# use the Q_Learner to train
agent_output_type: "pi_logits"
learner: "omiga_learner"

rnn_hidden_dim: 128
critic_hidden_dim: 256


mixer: "lmix"
mixing_embed_dim: 64

alpha_temp: 10


name: "offline_role_prior"

