_target_: torch_seq_moo.algorithms.moreinforce.MOReinforce
_recursive_: False
num_rounds: 64
num_gens: 16
random_action_prob: 0.01
max_len: ${task.max_len}
min_len: ${task.min_len}
eval_metrics: ["hypervolume", "r2", "hsri"]
batch_size: 128
reward_exp: 4
reward_min: 1e-80
reward_max: 100
sampling_temp: 1
train_steps: 100
beta_use_therm: True
pref_use_therm: True
beta_cond: True
pref_cond: True
beta_scale: 1
beta_shape: 32
pref_alpha: 1.0
pi_lr: 0.0001
z_lr: 0.001
wd: 0.0001
beta_max: 32
therm_n_bins: 50
gen_clip: 10
encoder_obj: mlm
reward_type: tchebycheff
sample_beta: 4
simplex_bins: 10
eval_freq: 100
k: 10
num_samples: 128
use_eval_pref: False
unnormalize_rewards: False

model:
  _target_: torch_seq_moo.algorithms.mogfn_utils.conditional_transformer.CondGFNTransformer
  max_len: ${task.max_len}
  vocab_size: 26
  num_actions: 21
  num_hid: 256
  num_layers: 3
  num_head: 16
  bidirectional: False
  dropout: 0
  batch_size: 128
