ATTENTION_ARCH: {}
CUDNN_BENCHMARK: false
DATALOADER:
  ASPECT_RATIO_GROUPING: true
  DATASET_ANN:
  - box
  - box
  DATASET_BS:
  - 2
  - 2
  DATASET_FILTERS:
  - true
  - true
  DATASET_RATIO:
  - 1
  - 1
  FILTER_EMPTY_ANNOTATIONS: true
  MULTI_DATASET_GROUPING: true
  NUM_WORKERS: 8
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
  USE_DIFF_BS_SIZE: true
  USE_RFS:
  - false
  - false
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: []
  PROPOSAL_FILES_TRAIN: []
  TEST:
  - coco_2017_val
  TRAIN:
  - coco_2017_train
FIND_UNUSED_PARAMETERS: true
GLOBAL:
  HACK: 1.0
INPUT:
  AUGMENTATIONS: []
  CROP:
    ENABLED: true
    SINGLE_CATEGORY_MAX_AREA: 1.0
    SIZE:
    - 384
    - 600
    TYPE: absolute_range
  DATASET_MAPPER_NAME: null
  FORMAT: RGB
  IMAGE_SIZE: 1024
  MASK_FORMAT: polygon
  MAX_SCALE: 2.0
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MAX_SIZE_TRAIN_MULTI:
  - 1333
  - 768
  MIN_SCALE: 0.1
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN:
  - 480
  - 512
  - 544
  - 576
  - 608
  - 640
  - 672
  - 704
  - 736
  - 768
  - 800
  MIN_SIZE_TRAIN_MULTI:
  - - 480
    - 512
    - 544
    - 576
    - 608
    - 640
    - 672
    - 704
    - 736
    - 768
    - 800
  - - 320
    - 352
    - 392
    - 416
    - 448
    - 480
    - 512
    - 544
    - 576
    - 608
    - 640
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
  SAMPLING_FRAME_NUM: 1
  SAMPLING_FRAME_RANGE: 10
  SAMPLING_FRAME_SHUFFLE: false
  SAMPLING_INTERVAL: 1
  SIZE_DIVISIBILITY: -1
MODEL:
  ABLATION: false
  ANCHOR_GENERATOR:
    ANGLES:
    - - -90
      - 0
      - 90
    ASPECT_RATIOS:
    - - 0.5
      - 1.0
      - 2.0
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES:
    - - 32
      - 64
      - 128
      - 256
      - 512
  BACKBONE:
    FREEZE_AT: 0
    NAME: build_resnet_backbone
  CONTRAS_MEAN: false
  CROSS_TRACK: false
  DECODER:
    DEC_LAYERS: 10
    DIM_FEEDFORWARD: 2048
    DROPOUT: 0.0
    ENFORCE_INPUT_PROJ: false
    HIDDEN_DIM: 512
    IMPORTANCE_SAMPLE_RATIO: 0.75
    MASK: true
    MAX_SPATIAL_LEN:
    - 512
    - 512
    - 512
    - 512
    NHEADS: 8
    NUM_OBJECT_QUERIES: 101
    OVERSAMPLE_RATIO: 3.0
    PRE_NORM: false
    SIZE_DIVISIBILITY: 32
    TOP_CAPTION_LAYERS: 10
    TOP_GROUNDING_LAYERS: 10
    TOP_OPENIMAGE_LAYERS: 10
    TOP_SPATIAL_LAYERS: 10
    TRAIN_NUM_POINTS: 12544
    TRANSFORMER_IN_FEATURE: multi_scale_pixel_decoder
  DEVICE: cuda
  DIM_PROJ: 256
  EARLYFUSION: true
  ENCODER:
    COMMON_STRIDE: 4
    CONVS_DIM: 512
    DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES:
    - res3
    - res4
    - res5
    IGNORE_VALUE: 255
    IN_FEATURES:
    - res2
    - res3
    - res4
    - res5
    LOSS_WEIGHT: 1.0
    MASK_DIM: 512
    NAME: transformer_encoder_fpn
    NORM: GN
    NUM_CLASSES: 133
    TRANSFORMER_ENC_LAYERS: 6
  EVA01:
    BEIT_LIKE_GAMMA: false
    BEIT_LIKE_QKV_BIAS: true
    DEPTH: 40
    DMBED_DIM: 1408
    DROP_PATH_RATE: 0.6
    FREEZE_PATH_EMBED: true
    IMAGE_SIZE: 1280
    MLP_RATIO: 4.363636363636363
    NUM_HEADS: 16
    PATCH_SIZE: 16
    PRETRAINED_WEIGHT: null
    WINDOW_BLOCK_INDEXES:
    - 0
    - 1
    - 2
    - 4
    - 5
    - 6
    - 8
    - 9
    - 10
    - 12
    - 13
    - 14
    - 16
    - 17
    - 18
    - 20
    - 21
    - 22
    - 24
    - 25
    - 26
    - 28
    - 29
    - 30
    - 32
    - 33
    - 34
    - 36
    - 37
    - 38
    WINDOW_SIZE: 16
  EVA02:
    CHECKPOINT: true
    DEPTH: 24
    DMBED_DIM: 1024
    DROP_PATH_RATE: 0.3
    IMAGE_SIZE: 1536
    MLP_RATIO: 2.6666666666666665
    NUM_HEADS: 16
    PATCH_SIZE: 16
    PRETRAINED_WEIGHT: null
    WINDOW_BLOCK_INDEXES:
    - 0
    - 1
    - 3
    - 4
    - 6
    - 7
    - 9
    - 10
    - 12
    - 13
    - 15
    - 16
    - 18
    - 19
    - 21
    - 22
    WINDOW_SIZE: 16
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: []
    NORM: ''
    OUT_CHANNELS: 256
  FREEZE_WHOLE: false
  GROUNDING: 0
  HIER_CENTER: false
  HIER_ELOSS_WEIGHT: 1.0
  HIER_EOT: false
  HIER_NEG: false
  HIER_NEG_WEIGHT: 1.0
  HIER_NORM: false
  HIER_NUM_REF: 0
  HIER_POS: false
  HIER_POSNEG: false
  HIER_PP: false
  HIER_TEACHER: false
  HIER_TRAINING: false
  INTERNIMAGE:
    CENTER_FEATURE_SCALE: false
    CHANNELS: 160
    CORE_OP: DCNv3
    DEPTHS:
    - 5
    - 5
    - 22
    - 5
    DROP_PATH_RATE: 0.0
    DW_KERNEL_SIZE: null
    GROUPS:
    - 10
    - 20
    - 40
    - 80
    LAYER_SCALE: 1.0
    LEVEL2_POST_NORM: false
    LEVEL2_POST_NORM_BLOCK_IDS: null
    MLP_RATIO: 4.0
    NORM_LAYER: LN
    OFFSET_SCALE: 2.0
    OUT_IINDICES:
    - 0
    - 1
    - 2
    - 3
    POST_NORM: true
    PRETRAINED_WEIGHT: null
    RES_POST_NORM: false
    WITH_CP: false
  KEYPOINT_ON: false
  LANGUAGE_BACKBONE:
    LANG_DIM: 512
    MAX_QUERY_LEN: 77
    MODEL_TYPE: bert-base-uncased
    N_LAYERS: 1
    PAD_MAX: true
    TOKENIZER_TYPE: bert-base-uncased
    USE_CHECKPOINT: false
  LOAD_PROPOSALS: false
  LORA: false
  LORA_ALPHA: 16
  LORA_RANK: 16
  MASK_ON: true
  MAX_CATEGORY_LEN: 100
  META_ARCHITECTURE: GLEE
  MaskDINO:
    BOX_LOSS: true
    BOX_WEIGHT: 5.0
    CLASS_WEIGHT: 4.0
    COST_BOX_WEIGHT: 5.0
    COST_CLASS_WEIGHT: 4.0
    COST_DICE_WEIGHT: 5.0
    COST_GIOU_WEIGHT: 2.0
    COST_MASK_WEIGHT: 5.0
    DEC_LAYERS: 9
    DEEP_SUPERVISION: true
    DICE_WEIGHT: 5.0
    DIM_FEEDFORWARD: 2048
    DN: standard
    DN_NOISE_SCALE: 0.4
    DN_NUM: 100
    DROPOUT: 0.0
    ENC_LAYERS: 0
    ENFORCE_INPUT_PROJ: false
    EVAL_FLAG: 1
    GIOU_WEIGHT: 2.0
    HIDDEN_DIM: 256
    IMPORTANCE_SAMPLE_RATIO: 0.75
    INITIALIZE_BOX_TYPE: 'no'
    INITIAL_PRED: true
    LEARN_TGT: false
    MASK_WEIGHT: 5.0
    NHEADS: 8
    NO_OBJECT_WEIGHT: 0.1
    NUM_OBJECT_QUERIES: 300
    OVERSAMPLE_RATIO: 3.0
    PANO_BOX_LOSS: false
    PRED_CONV: false
    PRE_NORM: false
    SEMANTIC_CE_LOSS: false
    SIZE_DIVISIBILITY: 32
    TEST:
      INSTANCE_ON: true
      OBJECT_MASK_THRESHOLD: 0.25
      OVERLAP_THRESHOLD: 0.8
      PANOPTIC_ON: false
      PANO_TEMPERATURE: 0.06
      PANO_TRANSFORM_EVAL: true
      SEMANTIC_ON: false
      SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
      TEST_FOUCUS_ON_BOX: false
    TRAIN_NUM_POINTS: 12544
    TRANSFORMER_DECODER_NAME: MaskDINODecoder
    TWO_STAGE: true
  OMNI_TEST: false
  PANOPTIC_FPN:
    COMBINE:
      ENABLED: true
      INSTANCES_CONFIDENCE_THRESH: 0.5
      OVERLAP_THRESH: 0.5
      STUFF_AREA_LIMIT: 4096
    INSTANCE_LOSS_WEIGHT: 1.0
  PIXEL_MEAN:
  - 123.675
  - 116.28
  - 103.53
  PIXEL_STD:
  - 58.395
  - 57.12
  - 57.375
  PROPOSAL_GENERATOR:
    MIN_SIZE: 0
    NAME: RPN
  PSEUDO_VIDEO: false
  REGISTER: false
  RESNETS:
    DEFORM_MODULATED: false
    DEFORM_NUM_GROUPS: 1
    DEFORM_ON_PER_STAGE:
    - false
    - false
    - false
    - false
    DEPTH: 50
    NORM: FrozenBN
    NUM_GROUPS: 1
    OUT_FEATURES:
    - res2
    - res3
    - res4
    - res5
    RES2_OUT_CHANNELS: 256
    RES4_DILATION: 1
    RES5_DILATION: 1
    RES5_MULTI_GRID:
    - 1
    - 1
    - 1
    STEM_OUT_CHANNELS: 64
    STEM_TYPE: basic
    STRIDE_IN_1X1: false
    WIDTH_PER_GROUP: 64
  RETINANET:
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_WEIGHTS: &id002
    - 1.0
    - 1.0
    - 1.0
    - 1.0
    FOCAL_LOSS_ALPHA: 0.25
    FOCAL_LOSS_GAMMA: 2.0
    IN_FEATURES:
    - p3
    - p4
    - p5
    - p6
    - p7
    IOU_LABELS:
    - 0
    - -1
    - 1
    IOU_THRESHOLDS:
    - 0.4
    - 0.5
    NMS_THRESH_TEST: 0.5
    NORM: ''
    NUM_CLASSES: 80
    NUM_CONVS: 4
    PRIOR_PROB: 0.01
    SCORE_THRESH_TEST: 0.05
    SMOOTH_L1_LOSS_BETA: 0.1
    TOPK_CANDIDATES_TEST: 1000
  REVERSE: false
  ROI_BOX_CASCADE_HEAD:
    BBOX_REG_WEIGHTS:
    - &id001
      - 10.0
      - 10.0
      - 5.0
      - 5.0
    - - 20.0
      - 20.0
      - 10.0
      - 10.0
    - - 30.0
      - 30.0
      - 15.0
      - 15.0
    IOUS:
    - 0.5
    - 0.6
    - 0.7
  ROI_BOX_HEAD:
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_LOSS_WEIGHT: 1.0
    BBOX_REG_WEIGHTS: *id001
    CLS_AGNOSTIC_BBOX_REG: false
    CONV_DIM: 256
    FC_DIM: 1024
    NAME: ''
    NORM: ''
    NUM_CONV: 0
    NUM_FC: 0
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
    SMOOTH_L1_BETA: 0.0
    TRAIN_ON_PRED_BOXES: false
  ROI_HEADS:
    BATCH_SIZE_PER_IMAGE: 512
    IN_FEATURES:
    - res4
    IOU_LABELS:
    - 0
    - 1
    IOU_THRESHOLDS:
    - 0.5
    NAME: Res5ROIHeads
    NMS_THRESH_TEST: 0.5
    NUM_CLASSES: 80
    POSITIVE_FRACTION: 0.25
    PROPOSAL_APPEND_GT: true
    SCORE_THRESH_TEST: 0.05
  ROI_KEYPOINT_HEAD:
    CONV_DIMS:
    - 512
    - 512
    - 512
    - 512
    - 512
    - 512
    - 512
    - 512
    LOSS_WEIGHT: 1.0
    MIN_KEYPOINTS_PER_IMAGE: 1
    NAME: KRCNNConvDeconvUpsampleHead
    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
    NUM_KEYPOINTS: 17
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
  ROI_MASK_HEAD:
    CLS_AGNOSTIC_MASK: false
    CONV_DIM: 256
    NAME: MaskRCNNConvUpsampleHead
    NORM: ''
    NUM_CONV: 0
    POOLER_RESOLUTION: 14
    POOLER_SAMPLING_RATIO: 0
    POOLER_TYPE: ROIAlignV2
  RPN:
    BATCH_SIZE_PER_IMAGE: 256
    BBOX_REG_LOSS_TYPE: smooth_l1
    BBOX_REG_LOSS_WEIGHT: 1.0
    BBOX_REG_WEIGHTS: *id002
    BOUNDARY_THRESH: -1
    CONV_DIMS:
    - -1
    HEAD_NAME: StandardRPNHead
    IN_FEATURES:
    - res4
    IOU_LABELS:
    - 0
    - -1
    - 1
    IOU_THRESHOLDS:
    - 0.3
    - 0.7
    LOSS_WEIGHT: 1.0
    NMS_THRESH: 0.7
    POSITIVE_FRACTION: 0.5
    POST_NMS_TOPK_TEST: 1000
    POST_NMS_TOPK_TRAIN: 2000
    PRE_NMS_TOPK_TEST: 6000
    PRE_NMS_TOPK_TRAIN: 12000
    SMOOTH_L1_BETA: 0.0
  SEM_SEG_HEAD:
    ASPP_CHANNELS: 256
    ASPP_DILATIONS:
    - 6
    - 12
    - 18
    ASPP_DROPOUT: 0.1
    COMMON_STRIDE: 4
    CONVS_DIM: 256
    DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES:
    - res3
    - res4
    - res5
    DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS: 8
    DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS: 4
    DIM_FEEDFORWARD: 2048
    FEATURE_ORDER: low2high
    IGNORE_VALUE: 255
    IN_FEATURES:
    - res2
    - res3
    - res4
    - res5
    LOSS_TYPE: hard_pixel_mining
    LOSS_WEIGHT: 1.0
    MASK_DIM: 256
    NAME: MaskDINOHead
    NORM: GN
    NUM_CLASSES: 80
    NUM_FEATURE_LEVELS: 3
    PIXEL_DECODER_NAME: MaskDINOEncoder
    PROJECT_CHANNELS:
    - 48
    PROJECT_FEATURES:
    - res2
    TOTAL_NUM_FEATURE_LEVELS: 4
    TRANSFORMER_ENC_LAYERS: 6
    USE_DEPTHWISE_SEPARABLE_CONV: false
  SWIN:
    APE: false
    ATTN_DROP_RATE: 0.0
    DEPTHS:
    - 2
    - 2
    - 6
    - 2
    DROP_PATH_RATE: 0.3
    DROP_RATE: 0.0
    EMBED_DIM: 96
    MLP_RATIO: 4.0
    NUM_HEADS:
    - 3
    - 6
    - 12
    - 24
    OUT_FEATURES:
    - res2
    - res3
    - res4
    - res5
    PATCH_NORM: true
    PATCH_SIZE: 4
    PRETRAINED_WEIGHT: null
    PRETRAIN_IMG_SIZE: 224
    QKV_BIAS: true
    QK_SCALE: null
    USE_CHECKPOINT: false
    WINDOW_SIZE: 7
  TAU: false
  TEXT:
    ARCH: clip_teacher
    AUTOGRESSIVE: true
    CONTEXT_LENGTH: 77
    HEADS: 8
    LAYERS: 12
    NAME: transformer
    TOKENIZER: clip
    WIDTH: 512
  TRACK_VERSION: v3
  VG_SYN_NUM: 100
  VIDEO_WINDOW_SIZE: 10
  VISUAL_PROMPT: false
  WEIGHTS: weights/converted_maskdino_r50_withoutclip.pth
OUTPUT_DIR: ./exp/GLEE_clipteacher_basecoco_4scale
SEED: -1
SOLVER:
  AMP:
    ENABLED: true
  BACKBONE_MULTIPLIER: 0.1
  BASE_LR: 0.0001
  BASE_LR_END: 0.0
  BIAS_LR_FACTOR: 1.0
  CHECKPOINT_PERIOD: 3700
  CLIP_GRADIENTS:
    CLIP_TYPE: full_model
    CLIP_VALUE: 0.01
    ENABLED: true
    NORM_TYPE: 2.0
  GAMMA: 0.1
  IMS_PER_BATCH: 16
  LR_DECAY_RATE: null
  LR_DECAY_RATE_NUM_LAYERS: null
  LR_SCHEDULER_NAME: WarmupMultiStepLR
  MAX_ITER: 90000
  MOMENTUM: 0.9
  NESTEROV: false
  OPTIMIZER: ADAMW
  POLY_LR_CONSTANT_ENDING: 0.0
  POLY_LR_POWER: 0.9
  REFERENCE_WORLD_SIZE: 0
  STEPS:
  - 79200
  - 86400
  TEXTENCODER_MULTIPLIER: 1.0
  WARMUP_FACTOR: 0.01
  WARMUP_ITERS: 100
  WARMUP_METHOD: linear
  WEIGHT_DECAY: 0.05
  WEIGHT_DECAY_BIAS: null
  WEIGHT_DECAY_NORM: 0.0
TEST:
  AUG:
    ENABLED: false
    FLIP: true
    MAX_SIZE: 4000
    MIN_SIZES:
    - 400
    - 500
    - 600
    - 700
    - 800
    - 900
    - 1000
    - 1100
    - 1200
  DETECTIONS_PER_IMAGE: 100
  EVAL_PERIOD: 3700
  EXPECTED_RESULTS: []
  KEYPOINT_OKS_SIGMAS: []
  PRECISE_BN:
    ENABLED: false
    NUM_ITER: 200
VERSION: 2
VIS_PERIOD: 0
WANDB_NAME: test
WANDB_PROJECT: vg
