OUTPUT_DIR: 'OUTPUT/'
WORKERS: 6
PRINT_FREQ: 500
AMP:
  ENABLED: true

MODEL:
  NAME: cvt_v4_transformer
  SPEC:
    INIT: 'trunc_norm'
    NUM_STAGES: 4
    REL_POS_EMBED: False
    SHIFT: [False, False, False, False]
    DROP_PATH_RATE: 0.1
    PATCH_SIZE: [7, 3, 3, 3]
    PATCH_STRIDE: [4, 2, 2, 2]
    PATCH_PADDING: [2, 1, 1, 1]
    WINDOW_SIZE: [7, 7, 7, 7]
    DIM_EMBED: [64, 192, 384, 768]
    NUM_HEADS: [1, 3, 6, 12]
    DEPTH: [2, 2, 6, 2]
    MLP_RATIO: [4.0, 4.0, 4.0, 4.0]
    QKV_BIAS: [True, True, True, True]
    KERNEL_QKV: [3, 3, 3, 3]
    PADDING_QKV: [1, 1, 1, 1]
AUG:
  MIXUP_PROB: 1.0
  MIXUP: 0.8
  MIXCUT: 1.0
  TIMM_AUG:
    USE_LOADER: true
    RE_COUNT: 1
    RE_MODE: pixel
    RE_SPLIT: false
    RE_PROB: 0.25
    AUTO_AUGMENT: rand-m9-mstd0.5-inc1
    HFLIP: 0.5
    VFLIP: 0.0
    COLOR_JITTER: 0.4
    INTERPOLATION: bicubic
LOSS:
  LABEL_SMOOTHING: 0.1
CUDNN:
  BENCHMARK: true
  DETERMINISTIC: false
  ENABLED: true
DATASET:
  DATASET: 'imagenet'
  DATA_FORMAT: 'tsv'
  ROOT: 'DATASET/imagenet-tsv/'
  TEST_SET: 'val'
  TRAIN_SET: 'train'
TEST:
  BATCH_SIZE_PER_GPU: 32
  IMAGE_SIZE: [224, 224]
  MODEL_FILE: ''
  INTERPOLATION: 3
TRAIN:
  BATCH_SIZE_PER_GPU: 256
  LR: 0.00025
  IMAGE_SIZE: [224, 224]
  BEGIN_EPOCH: 0
  END_EPOCH: 300
  LR_SCHEDULER:
    METHOD: 'timm'
    ARGS:
      sched: 'cosine'
      warmup_epochs: 5
      warmup_lr: 0.000001
      min_lr: 0.00001
      cooldown_epochs: 10
      decay_rate: 0.1
  OPTIMIZER: adamW
  WD: 0.05
  WITHOUT_WD_LIST: ['bn', 'bias', 'ln']
  MOMENTUM: 0.9
  NESTEROV: true
  SHUFFLE: true
DEBUG:
  DEBUG: false
