CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -m torch.distributed.launch --nproc_per_node=8 run_beitv2_pretraining.py \
    --data_set image_folder \
    --data_path /workspace/sync/imagenet-1k/train \
    --output_dir /workspace/sync/Feature-Distillation/results/deit/beit2/pretrained \
    --log_dir /workspace/sync/Feature-Distillation/results/deit/beit2/pretrained \
    --model beit_base_patch16_224_8k_vocab \
    --shared_lm_head True \
    --early_layers 9 \
    --head_layers 2 \
    --num_mask_patches 75 \
    --second_input_size 224 \
    --second_interpolation bicubic \
    --min_crop_scale 0.2 \
    --tokenizer_model vqkd_encoder_base_decoder_1x768x12_deit \
    --tokenizer_weight /workspace/sync/Feature-Distillation/results/deit/beit2/tokenizer/checkpoint-99.pth \
    --batch_size 128 \
    --accum_iter 2 \
    --lr 1.5e-3 \
    --warmup_epochs 10 \
    --clip_grad 3.0 \
    --drop_path 0.0 \
    --layer_scale_init_value 0.1 \
    --imagenet_default_mean_and_std \
    --opt_betas 0.98 0.999 \
    --opt_eps 1e-8  \
    --epochs 300 \
    --save_ckpt_freq 20

python -m torch.distributed.launch --nproc_per_node=8 run_class_finetuning.py \
        --data_path /workspace/sync/imagenet-1k/train \
        --eval_data_path /workspace/sync/imagenet-1k/val \
        --nb_classes 1000 \
        --data_set image_folder \
        --output_dir /workspace/sync/Feature-Distillation/results/deit/beit2/pretrained/finetuned \
        --log_dir /workspace/sync/Feature-Distillation/results/deit/beit2/pretrained/finetuned \
        --model beit_base_patch16_224 \
        --weight_decay 0.05 \
        --finetune /workspace/sync/Feature-Distillation/results/deit/beit2/pretrained/checkpoint-299.pth \
        --batch_size 128 \
        --lr 5e-5 \
        --update_freq 1 \
        --warmup_epochs 10 \
        --epochs 50 \
        --layer_decay 0.75 \
        --drop_path 0.0 \
        --mixup 0. \
        --cutmix 0. \
        --imagenet_default_mean_and_std \
        --dist_eval \
        --save_ckpt_freq 20 \