_BASE_: "../Base-RCNN-C4.yaml"
MODEL:
  META_ARCHITECTURE: "GeneralizedRCNN"
  BACKBONE:
    NAME: "build_clip_resnet_backbone" #"build_clip_resnet_fpn_backbone" # "build_resnet_fpn_backbone"
    FREEZE_AT: 2
  WEIGHTS: "" # "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
    OUT_FEATURES: ["res4"]
    NORM: FrozenBN
    STEM_OUT_CHANNELS: 64
    RES2_OUT_CHANNELS: 256
  RPN:
    HEAD_NAME: StandardRPNHead
    IN_FEATURES: ["res4"]
  ROI_HEADS:
    NAME: "CLIPRes5ROIHeads" # "Res5ROIHeads" # "StandardROIHeads"
    IN_FEATURES: ["res4"]
    NUM_CLASSES: 48
  ROI_BOX_HEAD:
    NAME: ""
    NUM_FC: 0
    POOLER_RESOLUTION: 14
  ROI_MASK_HEAD:
    NAME: "MaskRCNNConvUpsampleHead"
    NUM_CONV: 0
    POOLER_RESOLUTION: 14
  PIXEL_MEAN: [0.48145466, 0.4578275, 0.40821073] # [103.530, 116.280, 123.675] # 
  PIXEL_STD: [0.26862954, 0.26130258, 0.27577711] # [1.0, 1.0, 1.0] # 
INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
DATASETS:
  TRAIN: ("coco_2017_ovd_b_train",)
  TEST: ("coco_2017_ovd_b_test",)
TEST:
  EVAL_PERIOD: 50000
SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000
INPUT:
  MIN_SIZE_TRAIN_SAMPLING: choice
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MAX_SIZE_TEST: 1333
  FORMAT: "RGB" # "BGR"