name: 'uspto-50k'
dataset_nb: 'block-15'
datadir: 'data/uspto-50k'
datadist_dir: 'data/uspto-50k'
pin_memory: False
remove_h: False
shuffle: True
with_explicit_h: False
with_formal_charge: True
add_supernode_edges: True
nb_rct_dummy_nodes: 15
num_workers: 4
permute_mols: False
num_processes: 1
zero_bond_order: False
batchsize_bins: {'train': [128, 64, 16], 'test': [64], 'val': [64]}
size_bins: {'train': [64, 83, 102], 'test': [250],'val': [250]}
canonicalize_molecule: True # Should always be true, used in case the data has atom mapping & we want to canonicalize the molecule to remove information leak from atom mapping order
atom_types: ['none', 'O', 'C', 'N', 'I', 'Cl', 'Si', 'F', 'Br', 'N+1', 'O-1', 'S', 'B', 'N-1', 'Zn+1', 'Cu', 'Sn', 'P+1', 'Mg+1', 'C-1', 'P', 'S+1', 'S-1', 'Se', 'Zn', 'Mg', 'U', 'Au', 'SuNo']
bond_types: ['none', 'SINGLE', 'DOUBLE', 'TRIPLE', 'mol', 'within', 'across']
allowed_bonds:
  'O': [2]
  'C': [4]
  'N': [3]
  'I': [1, 3, 5, 7]
  'Cl': [1]
  'Si': [4, 6]
  'F': [1]
  'Br': [1]
  'N+1': [4]
  'O-1': [1]
  'S': [2, 4, 6]
  'B': [3]
  'N-1': [2]
  'Zn+1': [3]
  'Cu': [1, 2] 
  'Sn': [2, 4]
  'P+1': [4, 6, 8]
  'Mg+1': [3]
  'C-1': [3]
  'P': [3, 5, 7]
  'S+1': [3, 5, 7]
  'S-1': [1, 3, 5]
  'Se': [2, 4, 6]
  'Zn': [2]
  'Mg': [2]
  'U': [6]
  'Au': [0]
dataset_size:
  train: 39811
  test: 4949
  val: 4951 #4868