defaults:
  - model: roberta_binary_reward_model
  - dataset@train_dataset: list_test
  - dataset@eval_dataset: list_eval
  - _self_

eval_dataset:
  cache_id: d_eval
  data:
    cache_path: null
    reward_shift: 0.0
    reward_scale: 1.0
    reward_f:
      name: score_human_reward
      reddit_path: data/reddit_comments/
      index_path: data/reddit_comments/eval_idxs.json
  max_len: 256
  cuttoff: null
  resample_timeout: 0.00

train_dataset:
  cache_id: d_test
  data:
    cache_path: null
    reward_shift: 0.0
    reward_scale: 1.0
    reward_f:
      name: score_human_reward
      reddit_path: data/reddit_comments/
      index_path: data/reddit_comments/test_idxs.json
  max_len: 256
  cuttoff: null
  resample_timeout: 0.00

model:
  dataset:
    name: toxicity_list_dataset
    cache_id: d_test
  roberta_kind: roberta-base
  freeze_roberta: false
  reward_cuttoff: 0.0
  load:
    checkpoint_path: null
    strict_load: true

evaluator: null

train:
  save_checkpoint_dir: outputs/toxicity/toxicity_upvote_roberta_binary_reward_f_test3/
  optim_state_path: null
  epochs: 10000000
  dataloader_workers: 2
  bsize: 16
  grad_accum_steps: 1
  log_every: 256
  eval_every: 4096
  save_every: 32768
  max_checkpoints: 4
  eval_bsize: 16
  eval_batches: 32
  lr: 1e-5
  weight_decay: 0.00
  max_steps: null
  loss: {}

wandb:
  use_wandb: true
  wandb_project: upvote_reward
