name: mllm_api_eval

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM/lmdeploy'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch']"
      execution_mode:
        required: false
        description: 'Select execution mode: infer, eval, or both. Default is "both"'
        type: choice
        options:
          - both
          - infer
          - eval
        default: 'both'
      run_id:
        required: false
        description: 'Set custom run ID. If not provided, github.run_id will be used'
        type: string
        default: ''


env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/mllm_evaluation_report/allure_report/${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  FAIL_CONFIG: '--lf'
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL
  LMUData: /nvme/qa_test_models/LMUData
  LOCAL_LLM: Qwen2.5-32B-Instruct
  OPENAI_API_KEY: sk-empty
  HF_DATASETS_OFFLINE: 1
  HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets
  HF_HUB_OFFLINE: 1
  HF_EVALUATE_OFFLINE: 1

jobs:
  linux-build:
    if: ${{ !cancelled() }}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda12.8
      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
    steps:
      - name: Free disk space
        uses: jlumbroso/free-disk-space@main
        with:
          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
          tool-cache: false
          docker-images: false
          # All of these default to true, but feel free to set to "false" if necessary for your workflow
          android: true
          dotnet: true
          haskell: true
          large-packages: true
          swap-storage: false
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}

  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && chmod 777 ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Mark as start
        run: |
          chmod -R 777 ${{env.TEST_CODE_PATH}}
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt

  test_evaluation:
    needs: download_pkgs
    if: ${{ !cancelled() }}
    runs-on: [self-hosted, test-140]
    timeout-minutes: 2400
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}
        gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']
        include:
          - n: 8
            gpu_num: gpu_num_1
          - n: 4
            gpu_num: gpu_num_2
          - n: 2
            gpu_num: gpu_num_4
          - n: 1
            gpu_num: gpu_num_8
    container:
      image: openmmlab/lmdeploy:latest-cu12.8
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/resources:/root/resources
        - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /nvme/huggingface_hub:/nvme/huggingface_hub
        - /mnt/121:/mnt/121
        - /mnt/104:/mnt/104
        - /mnt/bigdisk:/mnt/bigdisk
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: |
          cp -r ${{env.TEST_CODE_PATH}}/. .
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Install lmdeploy - dependency
        run: |
          python3 -m pip install -r /nvme/qa_test_models/offline_pkg/requirements.txt
      - name: Install lmdeploy
        run: |
          python3 -m pip uninstall lmdeploy -y && python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Install vlmeval
        run: |
          python3 -m pip install pandas datasets scikit-learn pylatexenc math_verify
          apt update && apt install -y libgl1 libglib2.0-0
          cp -r /nvme/qa_test_models/offline_pkg/VLMEvalKit .
          cd VLMEvalKit && pip install .
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
          mkdir ${{env.REPORT_DIR}} -p
          echo "starttime=$(date +%s)" > ${{env.REPORT_DIR}}/status.txt
      - name: Setup paths for evaluation
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind')
        run: |
          unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy;
          cd VLMEvalKit && cp -r ../autotest .
          execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then
            pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" -n ${{matrix.n}} --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then
            pytest autotest/evaluate/test_mllm_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?
          fi
          exit $overall_exit
      - name: Clear workspace
        if: always()
        run: |
          echo "status=done" >> ${{env.REPORT_DIR}}/status.txt
          export workdir=$(pwd)
          rm -rf $workdir/*
