_target_: gflownet.gflownet.GFlowNetAgent
# Random seeds
seed: 0
# Optimizer
optimizer:
  # Loss function
  loss: flowmatch
  # Learning rates
  lr: 0.0001
  lr_decay_period: 1000000
  lr_decay_gamma: 0.5
  method: adam
  # Threshold loss for early stopping
  early_stopping: 0.0
  # Coefficient for exponential moving average
  ema_alpha: 0.5
  # Optimizer: adam, sgd
  adam_beta1: 0.9
  adam_beta2: 0.999
  # Momentum for SGD
  sgd_momentum: 0.9
  # Mini-batch size
  batch_size: 32
  # Train to sample ratio
  train_to_sample_ratio: 1
  # Number of training iterations
  n_train_steps: 20000
  # From original implementation
  bootstrap_tau: 0.0
  clip_grad_norm: 0.0
# If True, compute rewards in batches
batch_reward: True
# Force zero probability of sampling invalid actions
mask_invalid_actions: True
# Temperature for the logits /= temperature_logits
temperature_logits: 1.0
# Percentage of random actions
random_action_prob: 0.001
# Percentage of trajectories in a batch from an empirical distribution
pct_offline: 0.0
# Replay buffer
replay_capacity: 0
al: True
policy:
  forward:
    type: mlp
    n_hid: 128
    n_layers: 2
    checkpoint: null
    reload_ckpt: False
  backward: 
    type: uniform
  ckpt_period: null
num_empirical_loss: 200000
oracle:
    # Number of samples for oracle metrics
    n: 500
# remove eventually
sample_only: False