# environment
env_name: DarkRoom-v0
data_type: mixed
max_episode_steps: 100
num_tasks: 100

# training
training_tasks: 80
eval_tasks: [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
warmup_steps: 10000
total_steps: 300000
eval_freq: 3000
batch_size: 128
train_episode_horizon: 4
lr: 0.0003
weight_decay: 0.0001
max_grad_norm: 1.
return_scale: 50.
tstep_dim: 64
hidden_dim: 64
ff_dim: [256, 256, 256, 384]
n_layer: 4
n_head: 8
ff_pdrop: 0.
ff_moe_pdrop: 0.
emb_pdrop: 0.
attn_pdrop: 0.
activation_function: leaky_relu
positional_embedding: fixed
attention: flash
sigma_reparam: False

# parameters for sample datas
sort_data: False
last_expert: False
sample_by_timestep: True  # if True, positional_embedding must be fixed

moe_config:
  moe_layers_contrastive_and_balance: [3]
  task_hard_router: False
  use_top_k_indices: False
  add_softmax: True
  detach_gate_input: False

  num_experts: 6
  num_selects: 2
  expert_dim: 768
  gate_balance_loss_weight: 0.1
  gate_add_noise: True
  gate_noise_epsilon: 0.01

  tau: 0.005
  contrastive_loss_weight: 0.01
  num_experts_contrastive: 12
  num_selects_contrastive: 2
  expert_dim_contrastive: 1536

# evaluation
target_return: null
eval_episodes: 100
eval_horizon: 400