TRAIN:
  ENABLE: True
  DATASET: kinetics
  BATCH_SIZE: 128
  EVAL_PERIOD: 5
  CHECKPOINT_PERIOD: 2
  AUTO_RESUME: True
DATA:
  PATH_TO_DATA_DIR: xxx/k400/
  NUM_FRAMES: 8
  SAMPLING_RATE: 32
  TRAIN_JITTER_SCALES: [256, 320]
  TRAIN_CROP_SIZE: 224
  TEST_CROP_SIZE: 224
  INPUT_CHANNEL_NUM: [3]
TIMESFORMER:
  ATTENTION_TYPE: 'divided_space_time'
SOLVER:
  BASE_LR: 1e-4
  LR_POLICY: cosine
  STEPS: [0, 11, 14]
  LRS: [1, 0.1, 0.01]
  MAX_EPOCH: 15
  MOMENTUM: 0.9
  WEIGHT_DECAY: 0.05
  OPTIMIZING_METHOD: adamw
  WARMUP_EPOCHS: 3.0
  WARMUP_START_LR: 1e-6
MODEL:
  MODEL_NAME: vit_base_patch16_224_ndrope
  NUM_CLASSES: 400
  ARCH: vit
  LOSS_FUNC: cross_entropy
  DROPOUT_RATE: 0.0   
  DROP_PATH_RATE: 0.2
TEST:
  ENABLE: True
  DATASET: kinetics
  BATCH_SIZE: 128
  NUM_ENSEMBLE_VIEWS: 1
  NUM_SPATIAL_CROPS: 3
MIXUP:
  ENABLED: True
  ALPHA: 0.8
  CUTMIX_ALPHA: 1.0
  PROB: 1.0
  SWITCH_PROB: 0.5
  MODE: 'batch'
DATA_LOADER:
  NUM_WORKERS: 16
  PIN_MEMORY: True
NUM_GPUS: 4
NUM_SHARDS: 1
RNG_SEED: 0
OUTPUT_DIR: ./check_ndrope_adamw

# TRAIN:
#   ENABLE: True
#   DATASET: kinetics
#   BATCH_SIZE: 128
#   EVAL_PERIOD: 5
#   CHECKPOINT_PERIOD: 5
#   AUTO_RESUME: True
# DATA:
#   PATH_TO_DATA_DIR: xxx/k400/
#   NUM_FRAMES: 8
#   SAMPLING_RATE: 32
#   TRAIN_JITTER_SCALES: [256, 320]
#   TRAIN_CROP_SIZE: 224
#   TEST_CROP_SIZE: 224
#   INPUT_CHANNEL_NUM: [3]
# TIMESFORMER:
#   ATTENTION_TYPE: 'divided_space_time'
# SOLVER:
#   BASE_LR: 0.005
#   LR_POLICY: steps_with_relative_lrs
#   STEPS: [0, 11, 14]
#   LRS: [1, 0.1, 0.01]
#   MAX_EPOCH: 15
#   MOMENTUM: 0.9
#   WEIGHT_DECAY: 1e-4
#   OPTIMIZING_METHOD: sgd
# MODEL:
#   MODEL_NAME: vit_base_patch16_224_ndrope
#   NUM_CLASSES: 400
#   ARCH: vit
#   LOSS_FUNC: cross_entropy
#   DROPOUT_RATE: 0.5
# TEST:
#   ENABLE: True
#   DATASET: kinetics
#   BATCH_SIZE: 128
#   NUM_ENSEMBLE_VIEWS: 1
#   NUM_SPATIAL_CROPS: 3
# DATA_LOADER:
#   NUM_WORKERS: 8
#   PIN_MEMORY: True
# NUM_GPUS: 4
# NUM_SHARDS: 1
# RNG_SEED: 0
# OUTPUT_DIR: ./check_ndrope
