train:
  _target_: data_module.dataset.AudioSuperResolutionDataset
  content: ${env:TTSHQ_CONTENT_TRAIN,"/path/to/TTS_HQ/train/content.jsonl"}  # frequency-cutoff audio
  audio: ${env:TTSHQ_AUDIO_TRAIN,"/path/to/TTS_HQ/train/audio.jsonl"}  # high-frequency enhanced audio
  base_content_path: ${env:TTSHQ_BASE,"/path/to/TTS_HQ"}/
  base_audio_path: ${env:TTSHQ_BASE,"/path/to/TTS_HQ"}/
  target_sr: ${sample_rate}
  downsampling_ratio: ${downsampling_ratio}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  max_duration: 5.0
  random_crop: true
val:
  _target_: data_module.dataset.AudioSuperResolutionDataset
  content: ${env:TTSHQ_CONTENT_VAL,"/path/to/TTS_HQ/val/content.jsonl"}
  audio: ${env:TTSHQ_AUDIO_VAL,"/path/to/TTS_HQ/val/audio.jsonl"}
  base_content_path: ${env:TTSHQ_BASE,"/path/to/TTS_HQ"}/
  base_audio_path: ${env:TTSHQ_BASE,"/path/to/TTS_HQ"}/
  target_sr: ${sample_rate}
  downsampling_ratio: ${downsampling_ratio}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  max_duration: 5.0
  random_crop: false
  max_samples: ${max_val_samples}
test:
  _target_: data_module.dataset.AudioSuperResolutionDataset
  content: ${env:TTSHQ_CONTENT_TEST,"/path/to/TTS_HQ/test/content.jsonl"}
  base_content_path: ${env:TTSHQ_BASE,"/path/to/TTS_HQ"}/
  target_sr: ${sample_rate}
  downsampling_ratio: ${downsampling_ratio}
  use_h5_cache: false
  task_instruction: ${instruction_embedding}
  instruction_idx: 1