name: uspto_full
data_dir: uspto_full
start_idx: 0
end_idx: 3000000
path_applications: uspto_full/original/2001_Sep2016_USPTOapplications_smiles.rsmi
path_grants: uspto_full/original/1976_Sep2016_USPTOgrants_smiles.rsmi
path_chunks : uspto_full/raw/chunks
yield_difference_threshold: 5
filter_by_vocab: false