_BASE_: "Base.yaml"
MODEL:
  TRAIN_TASK: ["ObjectDet", "DenseCap"]
  TEST_TASK: "DenseCap" # DenseCap or ObjectDet: Choose one for testing
  MASK_ON: True
  ROI_HEADS:
    SOFT_NMS_ENABLED: False
  BEAM_SIZE: 1
  WEIGHTS: "detectron2://ImageNetPretrained/MAE/mae_pretrain_vit_base.pth"
  BACKBONE:
    NAME: build_vit_fpn_backbone
  VIT_LAYERS: 12
SOLVER:
  VIT_LAYER_DECAY_RATE: 0.7
DATASETS:
  TRAIN: ("GRiT_coco2017_train", "vg_train")
  TEST: ("coco_2017_test-dev",)
DATALOADER:
  DATASET_RATIO: [1, 1]
  DATASET_BS: 2
  DATASET_INPUT_SIZE: [1024, 1024]
  DATASET_INPUT_SCALE: [[0.1, 2.0], [0.1, 2.0]]
OUTPUT_DIR: "./output/GRiT_B_DenseCap_ObjectDet"