# Generative Flow Networks (GFN) with Trajectory Balance (TB) loss
name: gfn_tb_iw
step_size: ${target.gfn_tb.step_size}
batch_size: ${target.all.batch_size}
iters: ${target.all.iters}
num_steps: ${target.all.num_steps}
grad_clip: 1.
loss_type: "tb"  # lv or tb
init_logZ: ${target.gfn_tb.init_logZ}
logZ_step_size: ${target.gfn_tb.logZ_step_size}
reference_process: ${target.gfn_tb.reference_process}  # ou or pinned_brownian
init_std: ${target.gfn_tb.init_std}  # for ou
max_diffusion: ${target.gfn_tb.max_diffusion}  # for pinned_brownian or ou
noise_scale: 6.  # for ou_dds
init_invtemp: 1.
iw_train_freq: 1  # number of importance-weighted training steps per on-policy training step
target_ess: 0.05  # target ESS for importance weighting
logr_clip: -1e5

# Learning rate scheduler
lr_schedule:
  type: multistep  # multistep | cosine | constant
  milestones: ${target.all.milestones}  # iteration indices at which to decay
  gamma: 0.3  # multiply LR by gamma at each milestone

defaults:
  - model: pisgrad_net
  - noise_schedule: const  # const for pinned_brownian; this is not used for ou_dds 

model:
  use_lp: ${target.all.use_lp}
  num_hid: ${target.all.num_hid}
  weight_init: 1e-8  # Initialization of the last layers' weights of the time-dependent network
  bias_init: 0.1  # Initialization of the last layers' bias of the time-dependent network

noise_schedule:
  reverse: False
