name: "rng"

data:
  path: "data/rng/prompts.txt"
  train_size: 0.95
  limit_prompts: 1000

model:
  name: "gpt2-xl"
  lora_config:
    _target_: peft.LoraConfig
    target_modules: ["c_attn", "c_proj", "c_fc"]
    r: 64
    lora_alpha: 16
    lora_dropout: 0.1
    bias: "none"
    fan_in_fan_out: True

training:
  subtb_lambda: 1.0
  pf_temp_high: 2.0
  pf_temp_low: 0.5
  pf_temp_prob: 0.666
  use_buffer_prob: 0.25
  n_samples: 5
  lr: 0.0001
  accumulate_grad_batches: 32
  epochs: 300

eval:
  n_probes: 10
  diversity_metric: "sequence_embedding"

reward:
  temp_start: 1.0
  temp_end: 0.6
  temp_horizon: 3000
  vocab_alpha: -50
  buffer_size: 50

callbacks:
  - _target_: pytorch_lightning.callbacks.ModelCheckpoint
    monitor: "val/logP(s)"
    mode: "max"
    save_last: True
    dirpath: ${save_dir}/checkpoints/(...redacted...)
    filename: "epoch={epoch:03d}"
    auto_insert_metric_name: True
  - _target_: pytorch_lightning.callbacks.EarlyStopping
    monitor: "val/logP(s)"
    mode: "max"
    patience: 50