name: glue-stsb

datasets:
  _target_: datasets.load_dataset
  path: glue
  name: stsb

preprocessor:
  _target_: src.tasks.STSB_Preprocessor
  template_file: glue_v1/stsb.json

map_kwargs:
  remove_columns: ["sentence1", "sentence2", "label", "idx"]
  batched: true
  num_proc: 1
  desc: Running tokenizer on dataset
