name: pr_run_test

on:
  pull_request:
    paths-ignore:
      - 'README.md'
      - 'README_zh-CN.md'
      - 'docs/**'
      - 'configs/**'
      - 'tools/**'
  workflow_dispatch:
  schedule:
    - cron:  '56 22 * * *'

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

env:
  CONDA_ENV: pr_test
  HF_DATASETS_OFFLINE: 1
  HF_EVALUATE_OFFLINE: 1
  TRANSFORMERS_OFFLINE: 1
  VLLM_USE_MODELSCOPE: false
  LMDEPLOY_USE_MODELSCOPE: false
  HF_HUB_OFFLINE: 1
  CONDA_PATH: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/miniconda3
  REPORT_ROOT: /mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/eval_report/prtest
  COMPASS_DATA_CACHE: /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache
  HF_DATASETS_CACHE: /mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/hf_cache
  HF_HUB_CACHE: /mnt/shared-storage-user/large-model-center-share-weights/hf_hub
  KUBEBRAIN_CLUSTER_ENTRY: https://h.pjlab.org.cn
  KUBEBRAIN_NAMESPACE: ailab-opencompass

jobs:
  pr_run_test:
    runs-on: yidian_cu12
    timeout-minutes: 45
    steps:
      - name: Checkout repository
        uses: actions/checkout@v2
      - name: Prepare - Install opencompass
        run: |
          . ${{env.CONDA_PATH}}/bin/activate
          conda activate ${{env.CONDA_ENV}}
          python3 -m pip uninstall opencompass -y
          python3 -m pip install .[full] -i https://pkg.pjlab.org.cn/repository/pypi-proxy/simple/ --trusted-host pkg.pjlab.org.cn --no-cache-dir
          conda info --envs
      - name: conda env
        run: |
          . ${{env.CONDA_PATH}}/bin/activate
          conda activate ${{env.CONDA_ENV}}
          conda info --envs
          pip list
          lmdeploy check_env
      - name: Run test
        run: |
          . ${{env.CONDA_PATH}}/bin/activate
          conda activate ${{env.CONDA_ENV}}
          conda info --envs
          rjob submit --name=pr-test-${{ github.run_id }}-${{ github.run_attempt }} --charged-group=opencompass_gpu --private-machine=group --group=opencompass_gpu --gpu=2 --cpu=32 --memory=32568 --private-machine=group --image=registry.h.pjlab.org.cn/ailab-puyu/xpuyu:torch-2.6.0-45d96d5f-0607 --env=COMPASS_DATA_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/compass_data_cache --env=TIKTOKEN_CACHE_DIR=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/share_tiktoken --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=HF_ENDPOINT=https://hf-mirror.com --env=HF_DATASETS_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/llmeval/hf_cache --env=HF_HUB_CACHE=/mnt/shared-storage-user/large-model-center-share-weights/hf_hub --env=CUDA_MODULE_LOADING=EAGER --env=HF_DATASETS_OFFLINE=1 --env=TRANSFORMERS_OFFLINE=1 --env=HF_EVALUATE_OFFLINE=1 --env=HF_HUB_OFFLINE=1 --mount=gpfs://gpfs1/opencompass-shared:/mnt/shared-storage-user/opencompass-shared --mount=gpfs://gpfs1/auto-eval-pipeline:/mnt/shared-storage-user/auto-eval-pipeline --mount=gpfs://gpfs1/large-model-center-share-weights:/mnt/shared-storage-user/large-model-center-share-weights --host-network=True -- bash -exc '/mnt/shared-storage-user/opencompass-shared/qa-llm-cicd/pr_test.sh ${{env.REPORT_ROOT}}/${{ github.run_id }}'

          for i in {1..300}; do
            current_status=$(rjob get pr-test-${{ github.run_id }}-${{ github.run_attempt }} | grep -oP 'rjob [^:]+: \K[^ ]+')
            if [[ $current_status == "Succeeded" || $current_status == "Failed" || $current_status == "Stopped" ]]; then
                echo "Current status: $current_status, stop checking"
                break
            fi
            sleep 6
          done
      - name:  Get result
        run: |
          score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result1/*/summary/*.csv | awk -F ',' '{print $NF}')
          if (( ${score%.*} >= 75 && ${score%.*} <= 80 )); then
             echo "score is $score between 75 and 80"
          else
             echo "score is $score not between 75 and 80"
             exit 1
          fi
          score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result2/*/summary/*.csv | awk -F ',' '{print $NF}')
          if (( ${score%.*} >= 75 && ${score%.*} <= 80 )); then
             echo "score is $score between 75 and 80"
          else
             echo "score is $score not between 75 and 80"
             exit 1
          fi
          score=$(sed -n '$p' ${{env.REPORT_ROOT}}/${{ github.run_id }}/regression_result3/*/summary/*.csv | awk -F ',' '{print $NF}')
          if (( ${score%.*} >= 75 && ${score%.*} <= 80 )); then
             echo "score is $score between 75 and 80"
          else
             echo "score is $score not between 75 and 80"
             exit 1
          fi
      - name:  Uninstall opencompass
        if: always()
        run: |
          . ${{env.CONDA_PATH}}/bin/activate
          conda activate ${{env.CONDA_ENV}}
          python3 -m pip uninstall opencompass -y
          conda info --envs

  notify_to_feishu:
    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
    needs: [pr_run_test]
    timeout-minutes: 5
    runs-on: self-hosted
    steps:
      - name: notify
        run: |
          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
