_target_: torch_seq_moo.algorithms.envelope_moq.EnvelopeMOQ
_recursive_: False
num_rounds: 64
num_gens: 16
random_action_prob: 0.01
max_len: ${task.max_len}
min_len: ${task.min_len}
eval_metrics: ["hypervolume", "r2", "hsri"]
batch_size: 64
train_steps: 100
pref_use_therm: True
pref_cond: True
pref_alpha: 1.0
pi_lr: 0.0001
wd: 0.0001
therm_n_bins: 50
simplex_bins: 10
eval_freq: 500
k: 10
num_samples: 128
unnormalize_rewards: False

gamma: 0.99
epsilon: 0.5
epsilon_decay: False
mem_size: 4000
weight_num: 32
beta: 0.01
homotopy: False
update_freq: 100

model:
  _target_: torch_seq_moo.algorithms.envmoq_utils.arch.EnvMOQTransformer
  max_len: ${task.max_len}
  vocab_size: 26
  num_actions: 21
  num_hid: 256
  num_layers: 3
  num_head: 16
  dropout: 0
  batch_size: 64
