model:
  name: vit
  transformer:
    embed_dim: 192
    num_encoder_layers: 12
    num_heads: 3
    dim_feedforward: 768
    dropout: 0.1
    activation: gelu
    final_norm: True
    norm_eps: 1.0e-6
  patch_embed:
    name: vit_like
    img_size: 224
    patch_size: 16
    image_channels: 3
  pos_encoding:
    name: learnable
    dropout: Null

resume:
  cifar_10: run/cifar_10/vanilla/deit_tiny/ckpt/iter-xxxx.pth
  cifar_100: run/cifar_100/vanilla/deit_tiny/ckpt/iter-xxxx.pth
  caltech_101: run/caltech_101/vanilla/deit_tiny/ckpt/iter-xxxx.pth
