model_config:
  movie_mcan:
    losses:
    - type: triple_logit_bce
    image_feature_dim: 2048 

dataset_config:
  vqa2:
    depth_first: true
    use_features: true
    zoo_requirements:
      - vqa2.defaults
      - vqa2.grid_features
    features:
      train:
      - grid-feats-vqa/clip/ViT
      - grid-feats-vqa/clip/ViT
      - vqa2/grid_features/features
      val:
      - grid-feats-vqa/clip/ViT
      test:
      - grid-feats-vqa/clip/ViT
    annotations:
      train:
      - vqa2/grid_features/annotations/imdb_train2014.npy
      - vqa2/grid_features/annotations/imdb_val2014.npy
      - vqa2/grid_features/annotations/imdb_visualgenome.npy
      val:
      - vqa2/grid_features/annotations/imdb_oneval2014.npy
        #- vqa2/grid_features/annotations/imdb_val2014.npy
      test:
      - vqa2/grid_features/annotations/imdb_test2015.npy
    # Set to >= max number of features for the dataset
    max_features: 1024

optimizer:
  type: adam_w
  params:
    lr: 5e-05
    weight_decay: 0
    eps: 1e-09
    betas:
    - 0.9
    - 0.98

scheduler:
  type: multi_step
  params:
    use_warmup: true
    lr_steps:
    - 180000
    - 216000
    lr_ratio: 0.2
    warmup_iterations: 54000
    warmup_factor: 0.25

evaluation:
  metrics:
  - vqa_accuracy

training:
  lr_scheduler: true
  clip_norm_mode: all
  clip_gradients: false
  max_grad_l2_norm: 5
  max_updates: 236000
  batch_size: 64
  task_size_proportional_sampling: true
  encoder_lr_multiply: 1
  early_stop:
    criteria: vqa2/vqa_accuracy
    minimize: false
  find_unused_parameters: true
