name: refinedweb
train:
  path: "/anonymous-rcp-scratch/shared/anonymous/dataset/refinedweb"
  train_batch: 16
test:
  path: "/anonymous-rcp-scratch/shared/anonymous/dataset/refinedweb"
  test_batch: 32
overwrite_cache: false
num_workers: 16
block_size: 1024
tokenizer:
  name: null
  model_max_length: 1024