HF_ENDPOINT=https://hf-mirror.com torchrun --nproc_per_node 8 -m training.main --batch-size=32 --lr=1e-5 --wd=0.1 --epochs=6 --workers=4 \
--model ViT-B-16 --pretrained laion2b_s34b_b88k --warmup 1000  --zeroshot-frequency 1 --dataset-type proposals_distill_new \
--test-type coco_panoptic --train-data data/coco/coco_proposals.json --max-boxes 20 \
--val-data data/coco/annotations/panoptic_val2017.json --image-caption-path ./data/pretrain_4m/cc3m_region_caption.json\
--embed-path metadata/coco_panoptic_clip_hand_craft_laion2b_ViTB16.npy --train-image-root data/coco/train2017 \
--val-image-root data/coco/val2017  --cache-dir checkpoints/ --log-every-n-steps 50 \
--save-frequency 1 --extract-type="v2" --loss-type="clipself,only_itc,region_roi" --force-image-size 224 \
--name openai --downsample-factor 16 --det-image-size 384 \
--alpha 1