dataset: imagenet
num_classes: 1000
img_size: 224
dim: 384
patch_size: 16
num_heads: 8
mlp_ratio: 4
mean:
    - 0.485
    - 0.456
    - 0.406
std:
    - 0.229
    - 0.224
    - 0.225
crop_pct: 1.0
scale:
    - 1.0
    - 1.0
interpolation: bicubic
train_interpolation: bicubic
aa: rand-m9-mstd0.5-inc1
mixup: 0.8
mixup_off_epoch: 0
mixup_prob: 1.0
mixup_mode: batch
mixup_switch_prob: 0.5
cutmix: 1.0
reprob: 0.25
remode: const
amp: True
batch_size: 4
val_batch_size: 48
lr: 5e-4
min_lr: 0.00001
sched: cosine
weight_decay: 5e-2
epochs: 300
cooldown_epochs: 10
warmup_epochs: 20
warmup_lr: 0.000001
opt: adamw
smoothing: 0.1
workers: 8
depths: 8
distill: True
distill_type: wsld
distill_weight: 1.
model_teacher_name: vit-b
teacher_emb: 768
student_emb: 384
alpha_mgd: 7e-5
lambda_mgd: 0.15
resume: /root/SNN/code/Spikingformer-CML/imagenet/output/train/20230914-170841-vitsnn-small-vit_b-wsld-224/last.pth.tar 
