# Commented out copyrighted datasets
DATASETS = [
    "pile_arxiv",
    # "pile_bookcorpus2",
    # "pile_books3",
    "pile_dm-mathematics",
    "pile_enron",
    "pile_europarl",
    "pile_freelaw",
    "pile_github",
    "pile_gutenberg",
    "pile_hackernews",
    "pile_nih-exporter",
    # "pile_opensubtitles",
    # "pile_openwebtext2",
    "pile_philpapers",
    "pile_pile-cc",
    "pile_pubmed-abstracts",
    "pile_pubmed-central",
    "pile_stackexchange",
    "pile_ubuntu-irc",
    "pile_uspto",
    "pile_wikipedia",
    # "pile_youtubesubtitles"
]

SOME_DATASETS = [
    "pile_dm-mathematics",
    "pile_enron",
    "pile_github",
    "pile_stackexchange",
    "pile_uspto",
    "pile_wikipedia",
]
REST_DATASETS = [
    "pile_arxiv",
    # "pile_bookcorpus2",
    # "pile_books3",
    "pile_europarl",
    "pile_freelaw",
    "pile_gutenberg",
    "pile_hackernews",
    "pile_nih-exporter",
    # "pile_opensubtitles",
    # "pile_openwebtext2",
    "pile_philpapers",
    "pile_pile-cc",
    "pile_pubmed-abstracts",
    "pile_pubmed-central",
    "pile_ubuntu-irc",
    # "pile_youtubesubtitles"
]

CHUNKED_DATASETS = [
  [
    "pile_arxiv",
    "pile_freelaw",
    "pile_wikipedia",
    "pile_philpapers",
    "pile_ubuntu-irc",
    "pile_enron",
    "pile_europarl",
  ],
  [
    "pile_github",
    "pile_stackexchange",
    "pile_dm-mathematics",
    "pile_nih-exporter",
    "pile_gutenberg",
    "pile_hackernews",
  ],
  [
    "pile_pubmed-central",
    "pile_uspto",
  ],
  [
    "pile_pile-cc",
    "pile_pubmed-abstracts",
  ],
]
