text_to_audio: 2
text_to_music: 3
text_to_speech: 1
audio_super_resolution: 1
speech_enhancement:  1
singing_voice_synthesis: 1
video_to_audio: 1