save_path: "wmt14"
dataset: "stas/wmt14-en-de-pre-processed"
shard_size: 100000000
max_length: 512
num_tokens: 10
condition_column: "de"
target_column: "en"
split: "train"