_target_: src.datamodules.sfst.SFSTDatamodule
dataset_target_: src.datamodules.sfst.SFSTDataset
key: sfst
seed: 42
tokenize_in_dataset: True
data_type_sampling_probability: 
  - ${callbacks.supervision_scheduler.scheduler_xz.hp_init}
  - ${callbacks.supervision_scheduler.scheduler_z.hp_init} #[xz, p(z|not zx)]

dataset_parameters:
  seed: ${seed}
  batch_size: 128
  test_split: 'simple'
  train_ratio: 0.8
  supervision_ratio: [0.02, 0.9] # [r(xz), r(z|not xz)]
  num_workers: 8
  overfit_batch: ${overfit_batch}
  max_x_length: ${model.model_params.max_x_length}
  max_z_length: ${model.model_params.max_z_length}
  remove_long_data_points: True
  
  # SFST specific parameters
  maximum_input_alphabet_delta: 1
  p_empty_emission: 0.2
  dataset_size: 20000
  node_size: 10
  max_length: 50
  output_alphabet_size: 7

datasets:
  seed: ${seed}
  test:
      _target_: ${datamodule.dataset_target_}
      split: "test"
  
  train:
      _target_: ${datamodule.dataset_target_}
      split: "train"
      
  val:
      _target_: ${datamodule.dataset_target_}
      split: "val"