save_path: "mbpp_code_dataset"
dataset: "google-research-datasets/mbpp"
shard_size: 100000000
num_tokens: 10
prompt_column: "text"
code_column: "code"
id_column: "task_id"
test_column: "test_list"
max_length: 1024
line_length: 40
split: "train"