includes: projects/task/test_crosstask.yaml
model:
  model_cls: MMFusionSeparateActionLocalization
  mm_encoder_cls: 
  video_encoder_cls: MMBertForEncoder
  text_encoder_cls: BertModel  # dummy, not used.
  num_hidden_video_layers: 6
