EXPERIMENT:
  NAME: ""
  TAG: "vitkd,vit-large,vit-tiny,layer17"
  PROJECT: "vit-baselines"
DATASET:
  TYPE: "imagenet"
  NUM_WORKERS: 32
  INPUT_SIZE: [224, 224]
  TEST:
    BATCH_SIZE: 128
DISTILLER:
  TYPE: "VITKD"
  TEACHER: "vit_large"
  STUDENT: "vit_tiny"
SOLVER:
  BATCH_SIZE: 512
  EPOCHS: 100
  LR: 0.2
  SGD:
    MOMENTUM: 0.9
  SCHEDULE: 
    MULTISTEP:
      STAGES: [30, 60, 90]
      RATE: 0.1
  WEIGHT_DECAY: 0.0001
  TYPE: "SGD"
LOG:
  TENSORBOARD_FREQ: 50
  SAVE_CHECKPOINT_FREQ: 10
FITNET:
  LOSS:
    CE_WEIGHT: 0.0
VITKD:
  REF_AMD: False
  MASKING_RATIO: 0.5
  M_LAYERS: []
  LAYERS: [0, 2]
