MUSE_retain:
  handler: PretrainingDataset
  args:
    hf_args:
      path: "muse-bench/MUSE-News"
      name: "raw"
      split: "retain1"
    text_key: "text"
    max_length: 2048