defaults:
  - mdcath_names@protein_names: under200res_noleak

_target_: src.data.MDCATHDataModule
dataset_path: ${paths.project_data_dir}/mdCATH
seq_emb_name: esmc_6b_200res.pt
max_lag: 200 # in ns
samples_per_epoch: 10000
temperatures: ["320", "348", "379", "413", "450"]
batch_size: 128
num_workers: 8
pin_memory: True
drop_last: True # useful when compiling models
collate_fn: 
  _target_: src.data.FrameDataCollator
  pad_to: null