# MUSE_retain:
#   handler: PretrainingDataset
#   args:
#     hf_args:
#       path: "muse-bench/MUSE-News"
#       name: "raw"
#       split: "retain1"
#     text_key: "text"
#     max_length: 2048

MUSE_retain:
  handler: PretrainingDataset
  args:
    hf_args:
      path: "tamarsonha/MUSE-Books-Train"
      split: "retain"
    text_key: "text"
    max_length: 2048
  