CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 train.py \
    --data_path ./dataset/imagenet \
    --model  QKFormer_10_768_td \
    --td \
    --eval \
    --input_size 224 \
    --time_step 4 \
    --batch_size 60 \
    --accum_iter 1 \
    --resume ./122Qkformer/output/511_NO6/checkpoint-23.pth