video_data:
  name: VideoMAE_transform
  args:
    input_size: 224
audio_data:
  name: CAV_Audio_transform
  args:
    num_mels: 128
    audio_size: 1024
    freqm: 0
    timem: 0
    mean: -5.081
    std: 4.4849
    noise: True
