date

cd YOUR_ROOT_PATH/MLLM

RUN_NAME="Merged_capsfusion_multi_node"
BASE_LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME.log"
NNODES=5
GPUS_PER_NODE=8
NUM_PROCESSES=$(expr $NNODES \* $GPUS_PER_NODE)

if [ $HOSTNAME = 'gpu03' ]
then
    MACHINE_RANK=0
    LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME-0.log"
elif [ $HOSTNAME = 'gpu04' ]
then
    MACHINE_RANK=1
    LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME-1.log"
elif [ $HOSTNAME = 'gpu05' ]
then
    MACHINE_RANK=2
    LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME-2.log"
elif [ $HOSTNAME = 'gpu06' ]
then
    MACHINE_RANK=3
    LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME-3.log"
elif [ $HOSTNAME = 'gpu07' ]
then
    MACHINE_RANK=4
    LOG_PATH="YOUR_ROOT_PATH/MLLM/logs/data/$RUN_NAME-4.log"
fi

echo "RUN_NAME: $RUN_NAME, LOG_PATH: $LOG_PATH, NNODES: $NNODES, GPUS_PER_NODE: $GPUS_PER_NODE, NUM_PROCESSES: $NUM_PROCESSES, MACHINE_RANK: $MACHINE_RANK"

# # prepare dataset in one node
# CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_PROCESSES --dataset_shard_index 0 --skip_special_tokens --spaces_between_special_tokens --use_beam_search > $BASE_LOG_PATH 2>&1

# generate captions in multi-node
for i in $(seq 0 $((GPUS_PER_NODE - 1)))
do
    DATASET_SHARD_INDEX=$(expr $MACHINE_RANK \* $GPUS_PER_NODE + $i)
    echo "DATASET_SHARD_INDEX: $DATASET_SHARD_INDEX"
    if [ $i -eq 0 ]
    then
        CUDA_VISIBLE_DEVICES=$i python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_PROCESSES --dataset_shard_index $DATASET_SHARD_INDEX --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $LOG_PATH 2>&1 &
    else
        CUDA_VISIBLE_DEVICES=$i python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_PROCESSES --dataset_shard_index $DATASET_SHARD_INDEX --skip_special_tokens --spaces_between_special_tokens --use_beam_search > /dev/null 2>&1 &
    fi
done

wait

# # merge data in one node
# CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_PROCESSES --dataset_shard_index -1 --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $BASE_LOG_PATH 2>&1

# # merge into original dataset in one node
# CUDA_VISIBLE_DEVICES=0 python data/IC/capsfusion_vllm_multi_gpu.py --num_gpu $NUM_PROCESSES --dataset_shard_index -1 --skip_special_tokens --spaces_between_special_tokens --use_beam_search >> $BASE_LOG_PATH 2>&1

date