OUTPUT_DIR: 'OUTPUT/'
WORKERS: 6
PRINT_FREQ: 500
AMP:
  ENABLED: true

MODEL:
  NAME: cls_vit
  SPEC:
    PATCH_SIZE: 16
    EMBED_DIM: 384
    NUM_HEADS: 6
    DEPTH: 12
    MLP_RATIO: [4.0]
    ATTN_DROP_RATE: 0.0
    DROP_RATE: 0.0
    DROP_PATH_RATE: 0.1
    QKV_BIAS: True
    USE_CLS_TOKEN: True
    NORM_EMBED: True
    USE_AXIAL_ATTN:
      ENABLED: True
      SHARE_QKV: False
      WITH_INTER_PROJ: True
      USE_FULL_ATTN_FOR_CLS: False
AUG:
  MIXUP_PROB: 1.0
  MIXUP: 0.8
  MIXCUT: 1.0
  TIMM_AUG:
    USE_LOADER: true
    RE_COUNT: 1
    RE_MODE: pixel
    RE_SPLIT: false
    RE_PROB: 0.25
    AUTO_AUGMENT: rand-m9-mstd0.5-inc1
    HFLIP: 0.5
    VFLIP: 0.0
    COLOR_JITTER: 0.4
    INTERPOLATION: bicubic
LOSS:
  LABEL_SMOOTHING: 0.1
CUDNN:
  BENCHMARK: true
  DETERMINISTIC: false
  ENABLED: true
DATASET:
  DATASET: 'imagenet'
  DATA_FORMAT: 'tsv'
  ROOT: 'DATASET/imagenet-tsv/'
  TEST_SET: 'val'
  TRAIN_SET: 'train'
TEST:
  BATCH_SIZE_PER_GPU: 32
  IMAGE_SIZE: [224, 224]
  MODEL_FILE: ''
TRAIN:
  BATCH_SIZE_PER_GPU: 256
  LR: 0.00025
  IMAGE_SIZE: [224, 224]
  BEGIN_EPOCH: 0
  END_EPOCH: 300
  LR_SCHEDULER:
    METHOD: 'timm'
    ARGS:
      sched: 'cosine'
      warmup_epochs: 5
      warmup_lr: 0.000001
      min_lr: 0.00001
      cooldown_epochs: 10
      decay_rate: 0.1
  OPTIMIZER: adamW
  WD: 0.05
  WITHOUT_WD_LIST: ['bn', 'bias']
  MOMENTUM: 0.9
  NESTEROV: true
  SHUFFLE: true
DEBUG:
  DEBUG: false
