
set -e
CUDA_DEVICES="0,1,2,3"

NUM_GPUS=4
NUM_TOTAL_SAMPLES=50000

ANNO_PATH="/home/wangb/lwt/dataset/VLN/instruction_filter_data/rxrV2/annotations.json"
IMAGE_FOLDER="/home/wangb/lwt/dataset/VLN/instruction_filter_data/rxrV2/images"

MODEL_PATH="/home/wangb/lwt/xzh_wp/model_zoo/Qwen/Qwen2.5-VL-3B-Instruct"
OUTPUT_PATH="./results/instructions/qwen2vl_7B.json" # _multi_turn

# =====================================================================================================
EVAL_MODE="online"
PREDICTIONS_FILE="/path/to/predictions.json"
LORA_PATH=""

MAX_NEW_TOKENS=1024
FUZZY_THRESHOLD=0.9
MAX_SEQ_LENGTH=2048

MULTI_TURN=false # true

run_single_gpu() {
    local GPU_ID=$1
    local RANK=$2
    local WORLD_SIZE=$3
    local OUTPUT_SHARD="${OUTPUT_PATH%.json}_shard${RANK}.json"
    echo "[GPU ${GPU_ID}] Starting shard ${RANK}/${WORLD_SIZE}..."
    CMD="CUDA_VISIBLE_DEVICES=${GPU_ID} python instruction_eval_qwen2vl.py \
        --mode ${EVAL_MODE} \
        --anno_path ${ANNO_PATH} \
        --output_path ${OUTPUT_SHARD} \
        --max_new_tokens ${MAX_NEW_TOKENS} \
        --fuzzy_threshold ${FUZZY_THRESHOLD} \
        --max_seq_length ${MAX_SEQ_LENGTH} \
        --shard_id ${RANK} \
        --num_shards ${WORLD_SIZE} \
        --total_samples ${NUM_TOTAL_SAMPLES}"
    
    if [ "${EVAL_MODE}" == "offline" ]; then
        CMD="${CMD} --predictions_file ${PREDICTIONS_FILE}"
    elif [ "${EVAL_MODE}" == "online" ]; then
        CMD="${CMD} --model_path ${MODEL_PATH} --image_folder ${IMAGE_FOLDER}"
        
        if [ -n "${LORA_PATH}" ]; then
            CMD="${CMD} --lora_path ${LORA_PATH}"
        fi
    fi
    

    if [ "${MULTI_TURN}" == "true" ]; then
        CMD="${CMD} --multi_turn"
    fi
    
    eval ${CMD}
    echo "[GPU ${GPU_ID}] Shard ${RANK} completed."
}

echo "Mode: ${EVAL_MODE}"
echo "Num GPUs: ${NUM_GPUS}"
echo "CUDA Devices: ${CUDA_DEVICES}"
echo "Anno Path: ${ANNO_PATH}"
if [ "${EVAL_MODE}" == "offline" ]; then
    echo "Predictions: ${PREDICTIONS_FILE}"
else
    echo "Model: ${MODEL_PATH}"
    if [ -n "${LORA_PATH}" ]; then
        echo "LoRA: ${LORA_PATH}"
    fi
    echo "Images: ${IMAGE_FOLDER}"
fi
echo "Output: ${OUTPUT_PATH}"
echo "Max New Tokens: ${MAX_NEW_TOKENS}"
echo "Multi-turn: ${MULTI_TURN}"
echo "=============================================="

IFS=',' read -ra GPU_ARRAY <<< "${CUDA_DEVICES}"

if [ ${NUM_GPUS} -eq 1 ]; then
    echo "Running single GPU evaluation..."
    export CUDA_VISIBLE_DEVICES=${GPU_ARRAY[0]}
    
    CMD="python nstruction_eval_qwen2vl.py \
        --mode ${EVAL_MODE} \
        --anno_path ${ANNO_PATH} \
        --output_path ${OUTPUT_PATH} \
        --max_new_tokens ${MAX_NEW_TOKENS} \
        --fuzzy_threshold ${FUZZY_THRESHOLD} \
        --max_seq_length ${MAX_SEQ_LENGTH} \
        --total_samples ${NUM_TOTAL_SAMPLES}"
    
    if [ "${EVAL_MODE}" == "offline" ]; then
        CMD="${CMD} --predictions_file ${PREDICTIONS_FILE}"
    elif [ "${EVAL_MODE}" == "online" ]; then
        CMD="${CMD} --model_path ${MODEL_PATH} --image_folder ${IMAGE_FOLDER}"
        
        if [ -n "${LORA_PATH}" ]; then
            CMD="${CMD} --lora_path ${LORA_PATH}"
        fi
    fi
    

    if [ "${MULTI_TURN}" == "true" ]; then
        CMD="${CMD} --multi_turn"
    fi
    
    eval ${CMD}
else
    echo "Running multi-GPU parallel evaluation with ${NUM_GPUS} GPUs..."
        rm -f "${OUTPUT_PATH%.json}_shard"*.json
    
    PIDS=()
    for (( i=0; i<${NUM_GPUS}; i++ )); do
        GPU_ID=${GPU_ARRAY[i]}
        run_single_gpu ${GPU_ID} ${i} ${NUM_GPUS} &
        PIDS+=($!)
    done
    
    echo "Waiting for all GPU tasks to complete..."
    FAILED=0
    for pid in "${PIDS[@]}"; do
        if ! wait $pid; then
            echo "Error: Process $pid failed"
            FAILED=1
        fi
    done
    
    if [ $FAILED -eq 1 ]; then
        echo "Some GPU tasks failed. Check the logs above."
        exit 1
    fi
    
    echo "All GPU tasks completed. Merging results..."
    
    SHARD_FILES=""
    for (( i=0; i<${NUM_GPUS}; i++ )); do
        SHARD_FILES="${SHARD_FILES} ${OUTPUT_PATH%.json}_shard${i}.json"
    done
    
    python instruction_eval_qwen2vl.py \
        --mode merge \
        --output_path ${OUTPUT_PATH} \
        --shard_files ${SHARD_FILES} \
        --fuzzy_threshold ${FUZZY_THRESHOLD}
    
    rm -f "${OUTPUT_PATH%.json}_shard"*.json
    echo "Shard files preserved for debugging. Run 'rm ${OUTPUT_PATH%.json}_shard*.json' to clean up."
fi

echo "Evaluation completed!"