arch: 'Transformer'
args:
  loss_names: ["mae_audio", "mae_frame", "vam_tvlt", "vam"]
  load_local_path: ""
  init_classifier: False
  norm_pix_loss: False
  mae_loss_weight: 0.3
  contrast_loss_weight: 0
  vam_tvlt_loss_weight: 1.0
  get_va_recall_metric: False
  get_tvlt_va_recall_metric: False