python region_features/extract_features.py \
    --image_dir ./playground/data/LLaVA/LLaVA-Pretrain/images/ \
    --data_path ./playground/data/LLaVA/LLaVA-Pretrain/blip_laion_cc_sbu_558k.json \
    --feature_dir ./playground/data/regions/LLaVA-Pretrain/features \
    --model clip \
    --clip_model ViT-L/14@336px  \
    --dtype fp32 \
    --multiple 14