_BASE_: ../coco-stuff-164k-171/maskformer_R50_bs32_60k.yaml
ORACLE: True
MODEL:
  WEIGHTS: ""
  META_ARCHITECTURE: "ProposalClipClassifier"
  MASK_ON: True
  CLIP_ADAPTER:
    PROMPT_LEARNER: "pomp"
    # for learnable prompt
    PROMPT_DIM: 512
    PROMPT_SHAPE: (4, 0)
    CLIP_MODEL_NAME: "ViT-B/16"
    PROMPT_CHECKPOINT: /home/ubuntu/efs/multimodal-prompt-learning/output/imagenet_21k_o/GPT_tutorial_sampled_softmax/vit_b16_ep20_randaug2_unc1000_16shots/nctx4_cscFalse_ctpend/seed42/prompt_learner/model-best.pth.tar
DATASETS:
  TRAIN: ("coco_2017_train_stuff_base_sem_seg_classification",)
  TEST: ("coco_2017_test_stuff_sem_seg_classification",)
  SAMPLE_PER_CLASS: 32
INPUT:
  MIN_SIZE_TRAIN: (224,244)
  MIN_SIZE_TEST: 224
  MAX_SIZE_TEST: 2560
  SIZE_DIVISIBILITY: -1
  FORMAT: "RGB"
  DATASET_MAPPER_NAME: "mask_former_binary_semantic"
SOLVER:
  OPTIMIZER: "SGD"
  BASE_LR: 0.02
  WEIGHT_DECAY: 0.0005
  LR_SCHEDULER_NAME: "WarmupCosineLR"
  WARMUP_METHOD: "constant"  
  WARMUP_FACTOR: 0.005
  WARMUP_ITERS: 100
  IMS_PER_BATCH: 32
  TEST_IMS_PER_BATCH: 4
  MAX_ITER: 16000
  CHECKPOINT_PERIOD: 1000
TEST:
  EVAL_PERIOD: 1000
OUTPUT_DIR: output/coco-stuff-164k-156/zero_shot_proposal_classification_pomp_bsz32_10k/