TIME_ALIGNED_TASKS = [
    "text_to_speech",
    "singing_voice_synthesis",
    "speech_enhancement",
    "audio_super_resolution",
    "video_to_audio",
]
NON_TIME_ALIGNED_TASKS = [
    "text_to_audio",
    "text_to_music",
]
SAME_LENGTH_TASKS = [
    "speech_enhancement",
    "audio_super_resolution",
]
