BASE_PATH="[PATH_to_THIS_REPO]"
# 1. longbenchv2_data_path
INPUT_DATA_PATH="${BASE_PTH}/evaluation/eval_by_LongBenchV2/LongBench/data.json"
# 2. model_path_with_YARN
MODEL_DIR_PATH="${BASE_PTH}/output_yarn/qwen_yarn"

cd ${BASE_PTH}/evaluation/eval_by_LongBenchV2

export VLLM_WORKER_MULTIPROC_METHOD=spawn
export CUDA_VISIBLE_DEVICES=0,1,2,3
NUM_GPUS=4
PORT=8092

OUTPUT_PATH="./results"

mkdir -p ${OUTPUT_PATH}

RUN_TIME="5"

MAX_MODEL_LEN=128000
MODEL_NAMES=("[NAME_OF_THE_MODEL_W_YARN]")
for MODEL_NAME in "${MODEL_NAMES[@]}"; do
    MODEL_PATH="${MODEL_DIR_PATH}/${MODEL_NAME}"
    mkdir -p ./logs
    LOG_FILE="./logs/vllm_${MODEL_NAME}_${RUN_TIME}.log"
    rm -fr ${LOG_FILE}
    python -m vllm.entrypoints.openai.api_server \
        --served-model-name $MODEL_NAME \
        --model ${MODEL_PATH}  \
        --tensor-parallel-size=${NUM_GPUS} \
        --port ${PORT} \
        --trust-remote-code --max-model-len ${MAX_MODEL_LEN} \
        --disable-log-requests > ${LOG_FILE} 2>&1 &
    VLLM_PID=$!
    TIMEOUT=1800 
    START_TIME=$(date +%s)
    SUCCESS=0
    while true; do
        if grep -q "Application startup complete." ${LOG_FILE}; then
            SUCCESS=1
            echo "vllm serve LAUNCHED"
            break
        else
            echo "${VLLM_PID}"
            echo "vllm serve has not been started yet. The last five lines of logs are as follows:"
            tail -n 20 ${LOG_FILE}
        fi

        CURRENT_TIME=$(date +%s)
        if (( CURRENT_TIME - START_TIME > TIMEOUT )); then
            echo "The vllm serve has timed out during startup"
            kill $VLLM_PID
            exit 1
        fi

        sleep 5
    done
    if [ $SUCCESS -eq 1 ]; then
        echo "vllm serve runs successfully and begins to execute pred_batch.py"
        python ./pred_batch.py \
            --input_file ${INPUT_DATA_PATH} \
            --tokenizer_path ${MODEL_PATH}  \
            --output_dir ${OUTPUT_PATH} \
            --model_name ${MODEL_NAME} \
            --port ${PORT} \
            --run_time ${RUN_TIME}
        kill $VLLM_PID
        ps aux | grep vllm | awk '{print $2}' | xargs kill -9
        fuser -k -9 /dev/nvidia*
    else
        echo "The vllm serve failed to operate"
        kill $VLLM_PID
        exit 1
    fi

done

python ./result_diy_parallel.py