MODEL:
  META_ARCHITECTURE: Baseline
  PIXEL_MEAN: [127.5, 127.5, 127.5]
  PIXEL_STD: [127.5, 127.5, 127.5]
  WEIGHTS:


  BACKBONE:
    NAME: build_gvit_backbone
    DEPTH: base
    FEAT_DIM: 768
    PRETRAIN: True
    PRETRAIN_PATH: models/pretrained/vit_base_p16.pth
    STRIDE_SIZE: (16, 16)
    DROP_PATH_RATIO: 0.1
    DROP_RATIO: 0.0
    ATT_DROP_RATE: 0.0

  HEADS:
    NAME: EmbeddingHead
    NORM: BN
    WITH_BNNECK: True
    POOL_LAYER: Identity
    NECK_FEAT: before
    CLS_LAYER: Linear

  LOSSES:
    NAME: ("CrossEntropyLoss", "TripletLoss",)

    CE:
      EPSILON: 0.1
      SCALE: 1.

    TRI:
      MARGIN: 0.0
      HARD_MINING: True
      NORM_FEAT: False
      SCALE: 1.

INPUT:
  SIZE_TRAIN: [ 256, 128 ]
  SIZE_TEST: [ 256, 128 ]

  REA:
    ENABLED: True
    PROB: 0.5

  FLIP:
    ENABLED: True

  PADDING:
    ENABLED: True

DATALOADER:
  SAMPLER_TRAIN: NaiveIdentitySampler
  NUM_INSTANCE: 4
  NUM_WORKERS: 2

SOLVER:
  AMP:
    ENABLED: False
  OPT: SGD
  MAX_EPOCH: 400
  BASE_LR:  0.002
  WEIGHT_DECAY: 0.0001
  IMS_PER_BATCH: 8

  SCHED: CosineAnnealingLR
  ETA_MIN_LR:  0.00016

  WARMUP_FACTOR: 0.01
  WARMUP_ITERS: 30

  CLIP_GRADIENTS:
    ENABLED: True

  CHECKPOINT_PERIOD: 5

TEST:
  EVAL_PERIOD: 5
  IMS_PER_BATCH: 8

CUDNN_BENCHMARK: True

DATASETS:
  NAMES: ("CSG",)
  TESTS: ("CSG",)

OUTPUT_DIR: logs/CSG
