_target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool

test_datasets:
  preference_700k:
    _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
    tokenizer: ${...tokenizer}
    path: hendrydong/preference_700K
    split: train
    cache_path: null

dataloader_kwargs:
  shuffle: False
  batch_size: 16

tokenizer: ${..modelpool.tokenizer}

max_num_samples: 1000
seed: 42
