train:
  _target_: data_module.dataset.MidiSingingDataset
  content: data/m4singer/train/midi.jsonl
  audio: data/m4singer/train/audio.jsonl
  phoneme_set: data/m4singer/phone_set.json
  spk_set: data/m4singer/spk_set.json
  target_sr: ${sample_rate}
  task_instruction: ${instruction_embedding}
val:
  _target_: data_module.dataset.MidiSingingDataset
  content: data/m4singer/val/midi.jsonl
  audio: data/m4singer/val/audio.jsonl
  phoneme_set: data/m4singer/phone_set.json
  spk_set: data/m4singer/spk_set.json
  target_sr: ${sample_rate}
  task_instruction: ${instruction_embedding}
test:
  _target_: data_module.dataset.MidiSingingDataset
  content: data/m4singer/test/midi.jsonl
  task_instruction: ${instruction_embedding}
  instruction_idx: 1
  phoneme_set: data/m4singer/phone_set.json
  spk_set: data/m4singer/spk_set.json
  max_samples: ${max_test_samples}