name: uspto_50k
data_dir: uspto_50k
processed_dir: with_yields
subset: train
header: 0
names: null
start_idx: 0
end_idx: 1
filter_by_vocab: false
similarity_type: tanimoto
combination_weight: 1
batch_size: 1024