train: lm1b
valid: lm1b
tokenizer_name_or_path: bert-base-uncased
cache_dir: /share/kuleshov/ssahoo/textdiffusion/data
wrap: False
streaming: True
insert_train_eos: True
insert_valid_eos: True