save_path: "tiny_stories"
dataset: "roneneldan/TinyStories"
shard_size: 100000000
max_length: 512
num_tokens: 10
text_column: "text"
split: "train"