_target_: src.datamodules.cfq.CFQDatamodule
dataset_target_: src.datamodules.cfq.CFQDataset
seed: 42
key: cfq
tokenize_in_dataset: True
data_type_sampling_probability: 
  - ${callbacks.supervision_scheduler.scheduler_xz.hp_init}
  - ${callbacks.supervision_scheduler.scheduler_z.hp_init} #[xz, p(z|not zx)]

dataset_parameters:
  seed: ${seed}
  batch_size: 128
  test_split: 'mcd1' # ['mcd1', 'mcd2', 'mcd3', 'question_complexity_split', 'question_pattern_split', 'query_complexity_split', 'query_pattern_split', 'random_split']
  train_ratio: 0.8
  supervision_ratio: [0.05, 0.5] # [r(xz), r(z|not xz)]
  num_workers: 8
  overfit_batch: ${overfit_batch}
  max_x_length: ${model.model_params.max_x_length}
  max_z_length: ${model.model_params.max_z_length}
  remove_long_data_points: False
  

datasets:
  seed: ${seed}
  test:
      _target_: ${datamodule.dataset_target_}
      split: "test"
  
  train:
      _target_: ${datamodule.dataset_target_}
      split: "train"
      
  val:
      _target_: ${datamodule.dataset_target_}
      split: "val"