save_path: "python_code_dataset"
dataset: "jtatman/python-code-dataset-500k"
shard_size: 100000000
num_tokens: 10
prompt_column: "instruction"
code_column: "output"
max_length: 1024
line_length: 40
split: "train"