parent: research/conditional/train/configs/baselines/gpt/small.yaml
md5_parent_hash: cbdf4e5992f41713488c25c819086e51
time: "0-20:10:00"
runner: "research.blanks.train"

params:
  name: blanx-small-grid
  learning_rate: 1e-3
  n_steps: 100000
  num_workers: 1

  tokenizer: gpt
  # blanx args
  ^n_blanks: [1]
  ^blanks_residual: [true]
  ^blanks_add_embedding: [false]
  blanks_learnable_weights: true
  ^blank_initial_weight: [0.00001, 1.0, 10.0]
  ^blanks_use_straight_through: [true, false]

  # blank_initial_weight: 10.0
  # blanks_use_straight_through: true
  # LOCAL:
  batch_size: 2
  use_dummy_dataset: true
  num_workers: 0
