SUBSETS = [
    'arxiv', 'bookcorpus2', 'books3', 'cc', 'europarl', 
    'freelaw', 'github', 'gutenberg', 'hackernews', 'math', 
    'opensubtitles', 'openwebtext2', 'philpapers', 'stackexchange', 
    'ubuntu', 'uspto', 'wikipedia', 'youtubesubtitles'
]
