_target_: lambo.optimizers.gfn.GFN
_recursive_: False
num_rounds: 64
random_action_prob: 0.01
max_len: ${task.max_len}
min_len: ${task.min_len}
train_batch_size: 64
reward_min: 1e-20
sampling_temp: 1
num_opt_steps: 250
beta_use_therm: False
beta_cond: False
beta_scale: 1
beta_shape: 32
pi_lr: 0.005
z_lr: 0.01
wd: 0.0001
beta_max: 32
therm_n_bins: 50
gen_clip: 10
encoder_obj: mlm
reward_type: convex
beta_sched: 2
sample_beta: 16
simplex_bins: 20
eval_freq: 100
k: 10
num_eval_samples: 1000
resampling_weight: 1.
concentrate_pool: 1
val_batch_size: 64
share_encoder: False
freeze_encoder: False
use_acqf: True
offline_gamma: 0


model:
  _target_: lambo.models.cond_gfn_transformer.CondGFNTransformer
  max_len: ${task.max_len}
  vocab_size: 26
  num_actions: 21
  num_hid: 128
  num_layers: 3
  num_head: 8
  bidirectional: False
  dropout: 0