TRAIN:
  ENABLE: True
  DATASET: imagenet
  BATCH_SIZE: 256
  EVAL_PERIOD: 10
  CHECKPOINT_PERIOD: 1
  AUTO_RESUME: True
DATA:
  # PATH_TO_DATA_DIR: path-to-imagenet-dir
  MEAN: [0.485, 0.456, 0.406]
  STD: [0.229, 0.224, 0.225]
  NUM_FRAMES: 1
  TRAIN_CROP_SIZE: 224
  TEST_CROP_SIZE: 224
  INPUT_CHANNEL_NUM: [3]
MVIT:
  PATCH_2D: True
  ZERO_DECAY_POS_CLS: False
  MODE: "conv"
  CLS_EMBED_ON: True
  PATCH_KERNEL: [7, 7]
  PATCH_STRIDE: [4, 4]
  PATCH_PADDING: [3, 3]
  EMBED_DIM: 96
  NUM_HEADS: 1
  MLP_RATIO: 4.0
  QKV_BIAS: True
  DROPPATH_RATE: 0.1
  DEPTH: 16
  NORM: "layernorm"
  DIM_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]]
  HEAD_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]]
  POOL_KVQ_KERNEL: [1, 3, 3]
  POOL_KV_STRIDE_ADAPTIVE: [1, 4, 4]
  POOL_Q_STRIDE: [[1, 1, 2, 2], [3, 1, 2, 2], [14, 1, 2, 2]]
AUG:
  ENABLE: True
  COLOR_JITTER: 0.4
  AA_TYPE: rand-m9-n6-mstd0.5-inc1
  INTERPOLATION: bicubic
  RE_PROB: 0.25
  RE_MODE: pixel
  RE_COUNT: 1
  RE_SPLIT: False
MIXUP:
  ENABLE: True
  ALPHA: 0.8
  CUTMIX_ALPHA: 1.0
  PROB: 1.0
  SWITCH_PROB: 0.5
  LABEL_SMOOTH_VALUE: 0.1
SOLVER:
  BASE_LR_SCALE_NUM_SHARDS: True
  BASE_LR: 0.00025
  LR_POLICY: cosine
  MAX_EPOCH: 300
  MOMENTUM: 0.9
  WEIGHT_DECAY: 0.05
  WARMUP_EPOCHS: 70.0
  WARMUP_START_LR: 1e-8
  OPTIMIZING_METHOD: adamw
  COSINE_AFTER_WARMUP: True
  COSINE_END_LR: 1e-6
  ZERO_WD_1D_PARAM: True
  CLIP_GRAD_L2NORM: 1.0
MODEL:
  NUM_CLASSES: 1000
  ARCH: mvit
  MODEL_NAME: MViT
  LOSS_FUNC: soft_cross_entropy
  DROPOUT_RATE: 0.0
TEST:
  ENABLE: False
  DATASET: imagenet
  BATCH_SIZE: 256
DATA_LOADER:
  NUM_WORKERS: 8
  PIN_MEMORY: True
NUM_GPUS: 8
NUM_SHARDS: 1
RNG_SEED: 0
OUTPUT_DIR: .
