# A part of ROOTS
bigscience-data/roots_en_the_pile_uspto:
  provider: huggingface
  partition:
  split: train

  streaming: True

  # source-specific cleaning rules?
  remove_columns:
  concatenate_successive_entries: 0
