_target_: lambo.optimizers.mogfn_v2.MOGFN
_recursive_: False
num_rounds: 64
random_action_prob: 0.01
max_len: ${task.max_len}
min_len: ${task.min_len}
train_batch_size: 128
reward_min: 1e-80
sampling_temp: 1
num_opt_steps: 5000
beta_use_therm: False
pref_use_therm: True
beta_cond: False
pref_cond: True
beta_scale: 1
beta_shape: 32
pref_alpha: 1.0
pi_lr: 0.005
z_lr: 0.01
wd: 0.0001
beta_max: 32
therm_n_bins: 50
gen_clip: 10
encoder_obj: mlm
reward_type: convex
sample_beta: 16
simplex_bins: 20
eval_freq: 100
k: 10
num_eval_samples: 1000
use_eval_pref: False
resampling_weight: 1.
concentrate_pool: 1
val_batch_size: 64
share_encoder: True
freeze_encoder: True
use_acqf: False
offline_gamma: 0


model:
  _target_: lambo.models.cond_gfn_transformer.CondGFNTransformer
  max_len: ${task.max_len}
  vocab_size: 26
  num_actions: 21
  num_hid: 128
  num_layers: 3
  num_head: 8
  bidirectional: False
  dropout: 0