KINETICS_SOUND_CLASSES = ["laughing","playing_clarinet","singing",
       "playing_harmonica","playing_keyboard","playing_xylophone","playing_bass_guitar",
       "tapping_guitar","playing_drums","playing_piano","ripping_paper","playing_saxophone",
       "tickling", "playing_trumpet","tapping_pen","playing_organ","tap_dancing","playing_accordion",
       "blowing_nose","shuffling_cards","playing_guitar","playing_trombone","playing_bagpipes","shoveling_snow",
       "bowling","playing_violin","chopping_wood","stomping_grapes","strumming_guitar","blowing_out_candles","dribbling_basketball","mowing_lawn"]

AVE_CLASSES = ["Church_bell", "Male_speech", "Bark", "airplane", "Race_car", "Female_speech", "Helicopter", "Violin", "Flute", "Ukulele", "Frying",
                    "Truck", "Shofar", "Motorcycle", "Chainsaw", "Acoustic_guitar", "Train_horn", "Clock", "Banjo", "Goat", "Baby_cry", "Bus", "Cat", "Horse", "Toilet_flush", "Rodents",
                    "Accordion", "Mandolin"]

CREMA_D_CLASSES = ['NEU', 'HAP', 'SAD','FEA', 'DIS', 'ANG']

OPEN_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
OPEN_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]


def get_num_classes(dataset_name):
    if dataset_name == 'ks':
        return len(KINETICS_SOUND_CLASSES)
    elif dataset_name =='AVE':
        return len(AVE_CLASSES)
    elif dataset_name =='CREMA-D':
        return len(CREMA_D_CLASSES)
    else:
        print('unknown dataset:', dataset_name)
        exit()


# print(len(KINETICS_SOUND_CLASSES), len(AVE_CLASSES), len(CREMA_D_CLASSES))