
_target_: src.datamodules.scan.SCANDatamodule
dataset_target_: src.datamodules.scan.SCANDataset
key: scan
seed: 42
tokenize_in_dataset: True
data_type_sampling_probability: 
  - ${callbacks.supervision_scheduler.scheduler_xz.hp_init}
  - ${callbacks.supervision_scheduler.scheduler_z.hp_init} #[xz, p(z|not zx)]

dataset_parameters:
  seed: ${seed}
  batch_size: 128
  test_split: 'simple'
  train_ratio: 0.8
  supervision_ratio: [0.05, 0.5] # [r(xz), r(z|not xz)]
  num_workers: 8
  overfit_batch: ${overfit_batch}
  max_x_length: ${model.model_params.max_x_length}
  max_z_length: ${model.model_params.max_z_length}
  remove_long_data_points: True

datasets:
  seed: ${seed}
  test:
      _target_: ${datamodule.dataset_target_}
      split: "test"
  
  train:
      _target_: ${datamodule.dataset_target_}
      split: "train"
      
  val:
      _target_: ${datamodule.dataset_target_}
      split: "val"