EXPERIMENT:
  NAME: ""
  TAG: "vitkd-official,vit-large,vit-tiny"
  PROJECT: "vit-baselines"
DATASET:
  TYPE: "imagenet"
  SUBSET: False
  NUM_WORKERS: 32
  INPUT_SIZE: [224, 224]
  TEST:
    BATCH_SIZE: 128
DISTILLER:
  TYPE: "VITKD_O"
  TEACHER: "vit_large"
  STUDENT: "vit_tiny"
SOLVER:
  BATCH_SIZE: 512
  EPOCHS: 100
  LR: 0.2
  SGD:
    MOMENTUM: 0.9
  SCHEDULE: 
    MULTISTEP:
      STAGES: [30, 60, 90]
      RATE: 0.1
  WEIGHT_DECAY: 0.0001
  TYPE: "SGD"
LOG:
  TENSORBOARD_FREQ: 50
  SAVE_CHECKPOINT_FREQ: 10
FITNET:
  LOSS:
    CE_WEIGHT: 0.0
VITKD:
  REF_AMD: False
  HPARAMS:
    ALPHA: 0.00003
    BETA: 0.000003
  MASKING_RATIO: 0.5
