{
    "Pile-CC": 0,
    "StackExchange": 1,
    "PubMed Abstracts": 2,
    "Github": 3,
    "Wikipedia (en)": 4,
    "FreeLaw": 5,
    "PubMed Central": 6,
    "USPTO Backgrounds": 7,
    "DM Mathematics": 8,
    "NIH ExPorter": 9,
    "ArXiv": 10,
    "HackerNews": 11,
    "EuroParl": 12,
    "PhilPapers": 13,
    "Enron Emails": 14,
    "Gutenberg (PG-19)": 15,
    "Ubuntu IRC": 16
}