_target_: src.datamodules.cogs.COGSDatamodule
dataset_target_: src.datamodules.cogs.COGSDataset
seed: 42
key: cogs
tokenize_in_dataset: True
data_type_sampling_probability: 
  - ${callbacks.supervision_scheduler.scheduler_xz.hp_init}
  - ${callbacks.supervision_scheduler.scheduler_z.hp_init} #[xz, p(z|not zx)]

# path_header: "~/blocks/data/cogs/" # "./data/cogs/"
path_header: "${work_dir}/data/cogs/"

dataset_parameters:
  seed: ${seed}
  batch_size: 128
  supervision_ratio: [0.05, 0.5] # [r(xz), r(z|not xz)]
  num_workers: 8
  overfit_batch: ${overfit_batch}
  max_x_length: ${model.model_params.max_x_length}
  max_z_length: ${model.model_params.max_z_length}
  remove_long_data_points: False
  
  train:
    tsv_path: ${datamodule.path_header}train.tsv
  val:
    tsv_path: ${datamodule.path_header}dev.tsv
  test:
    tsv_path: ${datamodule.path_header}test.tsv
  gen:
    tsv_path: ${datamodule.path_header}gen.tsv
  train_100:
    tsv_path: ${datamodule.path_header}train_100.tsv
  

datasets:
  seed: ${seed}
  test:
      _target_: ${datamodule.dataset_target_}
      split: "test"
  
  train:
      _target_: ${datamodule.dataset_target_}
      split: "train"
      
  val:
      _target_: ${datamodule.dataset_target_}
      split: "val"
  
  gen:
      _target_: ${datamodule.dataset_target_}
      split: "gen"