MODEL:
  WEIGHT: "MODEL/groundingdino_swint_ogc.pth"

  BACKBONE:
    OUT_CHANNELS: 256

  LANGUAGE_BACKBONE:
    FREEZE: False
    TOKENIZER_TYPE: "bert-base-uncased"
    MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
    MASK_SPECIAL: False
    
  ROI_BOX_HEAD:
    POOLER_RESOLUTION: 7
    POOLER_SCALES: (0.125, 0.0625, 0.03125, 0.015625)
    POOLER_SAMPLING_RATIO: 0


TEST:
  EVAL_TASK: 'detection'
  DURING_TRAINING: False
  IMS_PER_BATCH: 8

# use for grounding model
DATASETS:
  TRAIN: ("object365_grounding_train", )
  TEST: ("coco_2017_val", )
  DISABLE_SHUFFLE: False
  ADD_DET_PROMPT: False
  RANDOM_SAMPLE_NEG: 85
  # RANDOM_SAMPLE_NEG: 365
  CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)

  SEPARATION_TOKENS: ". "

  EXCLUDE_CROWD: True
  SPECIAL_SAFEGUARD_FOR_COCO_GROUNDING: True
  SEP_AT_LAST: True
  ADD_NORMED_CXCY: True

INPUT:
  FORMAT: 'rgb'
  PIXEL_MEAN: [0.485, 0.456, 0.406]
  PIXEL_STD: [0.229, 0.224, 0.225]
  MIN_SIZE_TRAIN: 800
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MAX_SIZE_TEST: 1333

AUGMENT:
  MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)

DATALOADER:
  SIZE_DIVISIBILITY: 32

SOLVER:
  OPTIMIZER: ADAMW
  BASE_LR: 0.0001
  #### should be modified during fine-tuning #######
  GATE_LR: 0.005
  QUERY_LR: 0.00001
  #################################################
  LANG_LR: 0.00001
  WEIGHT_DECAY: 0.0001
  # STEPS: (0.67, 0.89)
  STEPS: (0.95,)
  # MAX_EPOCH: 10
  MAX_EPOCH: 1
  IMS_PER_BATCH: 16
  WARMUP_ITERS: 2000
  WARMUP_FACTOR: 0.001
  USE_AMP: True
  MODEL_EMA: 0.999
  FIND_UNUSED_PARAMETERS: False
  CHECKPOINT_PERIOD: 99999999
  CHECKPOINT_PER_EPOCH: 32.0
  # TUNING_HIGHLEVEL_OVERRIDE:  "vision_query"
  TUNING_HIGHLEVEL_OVERRIDE:  "vision_query"
  MAX_TO_KEEP: 4


  CLIP_GRADIENTS:
    ENABLED: True
    CLIP_TYPE: "full_model"
    CLIP_VALUE: 1.0
    # CLIP_VALUE: 0.1
    NORM_TYPE: 2.0

VISION_QUERY:
  ENABLED: True
  QUERY_BANK_PATH: 'MODEL/object365_query_5000_pool7_sel_gd.pth'
  PURE_TEXT_RATE: 0.
  TEXT_DROPOUT: 0.4
  VISION_SCALE: 1.0
  NUM_QUERY_PER_CLASS: 5
  RANDOM_KSHOT: False
  ADD_ADAPT_LAYER: False
  CONDITION_GATE: True
  NONLINEAR_GATE: True
  NO_CAT: True
  QUERY_ADDITION_NAME: '_groundingdino-T'

GROUNDINGDINO:
  enabled: True
  use_checkpoint: False
  use_transformer_ckpt: False
  text_encoder_type: 'bert-base-uncased'
  box_threshold: 0.05


