_BASE_: "Base_RCNN_FPN.yaml"
MODEL:
  PIXEL_MEAN: [ 123.675, 116.280, 103.530 ]
  PIXEL_STD: [ 58.395, 57.120, 57.375 ]
  WEIGHTS: "pretrained"
  MASK_ON: True
  RESNETS:
    DEPTH: 50
    NORM: "SyncBN"
  ROI_HEADS:
    NUM_CLASSES: 1203
    SCORE_THRESH_TEST: 0.0001
INPUT:
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  FORMAT: "RGB" # torchvision models take RGB input
DATASETS:
  TRAIN: ("lvis_v1_train",)
  TEST: ("lvis_v1_val",)
TEST:
  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
SOLVER:
  IMS_PER_BATCH: 16
  BASE_LR: 0.02
  STEPS: (60000, 80000)
  MAX_ITER: 90000 # 90000 * 16 / 100000 ~ 28.8 epochs  # 0.5x
  WARMUP_ITERS: 1000
DATALOADER:
  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
  REPEAT_THRESHOLD: 0.001

