# Generative Flow Networks (GFN) with Trajectory Balance (TB) loss
name: gfn_tb
step_size: ${target.gfn_tb.step_size}
batch_size: ${target.all.batch_size}
iters: ${target.all.iters}
num_steps: ${target.all.num_steps}
grad_clip: 1.
loss_type: "tb"  # lv or tb
init_logZ: ${target.gfn_tb.init_logZ}
logZ_step_size: ${target.gfn_tb.logZ_step_size}
reference_process: ${target.gfn_tb.reference_process}  # ou or pinned_brownian
init_std: ${target.gfn_tb.init_std}  # for ou
max_diffusion: ${target.gfn_tb.max_diffusion}  # for pinned_brownian or ou
noise_scale: 6.  # for ou_dds
init_invtemp: 1.
logr_clip: -1e5

# Learning rate scheduler
lr_schedule:
  type: multistep  # multistep | cosine | constant
  milestones: ${target.all.milestones}  # iteration indices at which to decay
  gamma: 0.3  # multiply LR by gamma at each milestone

defaults:
  - model: pisgrad_net
  - noise_schedule: const  # const for pinned_brownian; this is not used for ou_dds 

model:
  use_lp: ${target.all.use_lp}
  num_hid: ${target.all.num_hid}
  weight_init: 1e-8  # Initialization of the last layers' weights of the time-dependent network
  bias_init: 0.1  # Initialization of the last layers' bias of the time-dependent network

noise_schedule:
  reverse: False

buffer:
  use: True
  max_length_in_batches: 100
  prioritize_by: "piw"  # none, reward, loss, uiw, piw
  target_ess: 0.05
  sampling_method: "systematic"  # multinomial, stratified, systematic, rank
  rank_k: 0.01  # only used if sampling_method is rank
  sample_with_replacement: True
  prefill_steps: 100  # collect `prefill_steps` batches before starting training
  bwd_to_fwd_ratio: 2  # number of bwd steps per fwd step
  update_score: False