slurm_config: big
task_type: local_predict
dataset:
  split: test
  video_processor: CrossTaskVideoProcessor
  aligner: CrossTaskAligner
  bert_name: bert-base-uncased
  meta_processor: CrossTaskMetaProcessor
  test_path: data/crosstask/crosstask_release/videos_val.csv
  train_csv_path: data/crosstask/crosstask_release/videos.csv
  val_path: data/crosstask/crosstask_release/videos_val.csv
  val_csv_path: data/crosstask/crosstask_release/videos_val.csv
  primary_path: data/crosstask/crosstask_release/tasks_primary.txt
  related_path: data/crosstask/crosstask_release/tasks_related.txt
  vfeat_dir: data/feat/feat_crosstask_s3d
  annotation_path: data/crosstask/crosstask_release/annotations
  n_train: 30
  text_processor: CrossTaskTextProcessor
  num_iso_layer: 12
  sliding_window: 16
  sliding_window_size: 32
  max_video_len: 32
  max_len: 96
fairseq:
  dataset:
    batch_size: 1
    valid_subset: test
    num_workers: 2
  common_eval:
    path: runs/mtm/vlm/checkpoint_best.pt
model:
  model_cls: MMFusionActionLocalization
  mm_encoder_cls: MMBertForJoint
  use_seg_emb: true
eval:
  save_path: runs/mtm/vlm/crosstask_zs/eval
metric: CrossTaskMetric
predictor: CrossTaskPredictor
