train:
  _target_: data_module.dataset.SpeechEnhancementDataset
  content: ${env:LJS_MUSAN_CAPTION_TRAIN,"/path/to/LJSpeech-1.1+Musan/train/metadata_caption.jsonl"}
  audio: ${env:LJS_MUSAN_AUDIO_TRAIN,"/path/to/LJSpeech-1.1+Musan/train/metadata_audio.jsonl"}
  base_content_path: ${env:LJS_MUSAN_BASE,"/path/to/LJSpeech-1.1+Musan"}
  base_audio_path: ${env:LJS_MUSAN_BASE,"/path/to/LJSpeech-1.1+Musan"}
  downsampling_ratio: ${downsampling_ratio}
  target_sr: ${sample_rate}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  max_duration: 5.0
val:
  _target_: data_module.dataset.SpeechEnhancementDataset
  content: ${env:LJS_MUSAN_CAPTION_VAL,"/path/to/LJSpeech-1.1+Musan/val/metadata_caption.jsonl"}
  audio: ${env:LJS_MUSAN_AUDIO_VAL,"/path/to/LJSpeech-1.1+Musan/val/metadata_audio.jsonl"}
  base_content_path: ${env:LJS_MUSAN_BASE,"/path/to/LJSpeech-1.1+Musan"}
  base_audio_path: ${env:LJS_MUSAN_BASE,"/path/to/LJSpeech-1.1+Musan"}
  downsampling_ratio: ${downsampling_ratio}
  target_sr: ${sample_rate}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  max_samples: ${max_val_samples}
  max_duration: 5.0
  random_crop: false
test:
  _target_: data_module.dataset.SpeechEnhancementDataset
  content: ${env:LJS_MUSAN_CAPTION_TEST,"/path/to/LJSpeech-1.1+Musan/test/metadata_caption.jsonl"}
  base_content_path: ${env:LJS_MUSAN_BASE,"/path/to/LJSpeech-1.1+Musan"}
  downsampling_ratio: ${downsampling_ratio}
  target_sr: ${sample_rate}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  instruction_idx: 1