date

cd YOUR_ROOT_PATH/MLLM

RUN_NAME="LCA_capsfusion"
LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME.log"
NUM_GPUS=8

echo "RUN_NAME: $RUN_NAME, LOG_PATH: $LOG_PATH, NUM_GPUS: $NUM_GPUS"

# prepare dataset
CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_GPUS --dataset_name "laion-coco-aesthetic" --dataset_shard_index 0 --skip_special_tokens --spaces_between_special_tokens --use_beam_search > $LOG_PATH 2>&1

# generate captions in one node.
for i in $(seq 0 $((NUM_GPUS - 1)))
do
    if [ $i -eq 0 ]
    then
        CUDA_VISIBLE_DEVICES=$i python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_GPUS --dataset_name "laion-coco-aesthetic" --dataset_shard_index $i --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $LOG_PATH 2>&1 &
    else
        CUDA_VISIBLE_DEVICES=$i python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_GPUS --dataset_name "laion-coco-aesthetic" --dataset_shard_index $i --skip_special_tokens --spaces_between_special_tokens --use_beam_search > /dev/null 2>&1 &
    fi
done

wait

# merge data
CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_GPUS --dataset_name "laion-coco-aesthetic" --dataset_shard_index -1 --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $LOG_PATH 2>&1

# merge into original dataset
CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_GPUS --dataset_name "laion-coco-aesthetic" --dataset_shard_index -1 --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $LOG_PATH 2>&1

date