loss_type = "clip_bce"
enable_repeat_mode = False # repeat the spectrogram / reshape the spectrogram
enable_tscam = True # enbale the token-semantic layer
sample_rate = 32000 # 16000 for scv2, 32000 for audioset and esc-50
window_size = 1024
hop_size = 320 # 160 for scv2, 320 for audioset and esc-50
mel_bins = 64
fmin = 50
fmax = 14000
htsat_attn_heatmap = False