train:
  _target_: data_module.dataset.TextToSpeechDataset
  content: data/libritts/train/phoneme_duration_filtered.jsonl
  audio: data/libritts/train/audio.jsonl
  base_content_path: ${env:DATA_BASE,"/path/to/datasets"}
  base_audio_path: ${env:LIBRITTS_BASE,"/path/to/libritts"}
  target_sr: ${sample_rate}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
val:
  _target_: data_module.dataset.TextToSpeechDataset
  content: data/libritts/val/phoneme_duration_filtered.jsonl
  audio: data/libritts/val/audio.jsonl
  base_content_path: ${env:DATA_BASE,"/path/to/datasets"}
  base_audio_path: ${env:LIBRITTS_BASE,"/path/to/libritts"}
  target_sr: ${sample_rate}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  max_samples: ${max_val_samples}
test:
  _target_: data_module.dataset.TextToSpeechDataset
  # content: data/libritts/voiceflow_test/phoneme.jsonl
  content: data/libritts/test/phoneme.jsonl
  base_content_path: ${env:DATA_BASE,"/path/to/datasets"}
  task_instruction: ${instruction_embedding}
  instruction_idx: 1
  max_samples: ${max_test_samples}
