CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 main.py \
  --batch_size 1 \
  --output_prefix llava_1_5_w_vig_value_ \
  --cache_preprocessed \
  --cache_dir data/llava/vig/.cache_preprocessed \
  --shard_dir ./vig_shards \
  --image_folder data/llava \
  --num_workers 4 \
  --loss_dtype fp32
