name: Build cmake
inputs:
  opt_level:
    description: 'Compile options / optimization level.'
    required: false
    default: generic
  gpu:
    description: 'Enable GPU support.'
    required: false
    default: OFF
  cuvs:
    description: 'Enable cuVS support.'
    required: false
    default: OFF
  rocm:
    description: 'Enable ROCm support.'
    required: false
    default: OFF
runs:
  using: composite
  steps:
    - name: Setup miniconda
      uses: conda-incubator/setup-miniconda@v3
      with:
        python-version: '3.11'
        miniforge-version: latest # ensures conda-forge channel is used.
        channels: conda-forge
        conda-remove-defaults: 'true'
        # Set to aarch64 if we're on arm64 because there's no miniforge ARM64 package, just aarch64.
        # They are the same thing, just named differently.
        architecture: ${{ runner.arch  == 'ARM64' && 'aarch64' || runner.arch }}
    - name: Configure build environment
      shell: bash
      run: |
        # initialize Conda
        conda config --set solver libmamba
        # Ensure starting packages are from conda-forge.
        conda list --show-channel-urls
        conda update -y -q conda
        echo "$CONDA/bin" >> $GITHUB_PATH

        conda install -y -q python=3.11 cmake=3.30.4 make=4.2 swig=4.0 "numpy<2" scipy=1.14 pytest=7.4 gflags=2.2

        # install base packages for ARM64
        if [ "${{ runner.arch }}" = "ARM64" ]; then
          conda install -y -q -c conda-forge openblas=0.3.29 gxx_linux-aarch64=14.2 sysroot_linux-aarch64=2.17
        fi

        # install base packages for X86_64
        if [ "${{ runner.arch }}" = "X64" ]; then
          # TODO: merge this with ARM64
          conda install -y -q -c conda-forge gxx_linux-64=14.2 sysroot_linux-64=2.17
          conda install -y -q mkl=2022.2.1 mkl-devel=2022.2.1
        fi

        # no CUDA needed for ROCm so skip this
        if [ "${{ inputs.rocm }}" = "ON" ]; then
          :
        # regular CUDA for GPU builds
        elif [ "${{ inputs.gpu }}" = "ON" ] && [ "${{ inputs.cuvs }}" = "OFF" ]; then
          conda install -y -q cuda-toolkit=12.4 -c "nvidia/label/cuda-12.4.0"
        # and CUDA from cuVS channel for cuVS builds
        elif [ "${{ inputs.cuvs }}" = "ON" ]; then
          conda install -y -q libcuvs=25.04 'cuda-version>=12.0,<=12.5' cuda-toolkit=12.4.1 gxx_linux-64=12.4 -c rapidsai -c rapidsai-nightly -c conda-forge
        fi

        # install test packages
        if [ "${{ inputs.rocm }}" = "ON" ]; then
          : # skip torch install via conda, we need to install via pip to get
            #  ROCm-enabled version until it's supported in conda by PyTorch
        elif [ "${{ inputs.gpu }}" = "ON" ]; then
          conda install -y -q "pytorch<2.5" pytorch-cuda=12.4 -c pytorch -c "nvidia/label/cuda-12.4.0"
        else
          conda install -y -q "pytorch<2.5" -c pytorch
        fi
    - name: ROCm - Install dependencies
      if: inputs.rocm == 'ON'
      shell: bash
      run: |
        # Update repos and install kmod, wget, gpg
        sudo apt-get -qq update >/dev/null
        sudo apt-get -qq install -y kmod wget gpg >/dev/null

        # Get UBUNTU version name
        UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`

        # Set ROCm version
        ROCM_VERSION="6.2"

        # Download, prepare, and install the package signing key
        mkdir --parents --mode=0755 /etc/apt/keyrings
        wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null

        # Add rocm repository
        wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | sudo apt-key add -
        rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
        echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" | sudo tee /etc/apt/sources.list.d/rocm.list
        sudo apt-get -qq update --allow-insecure-repositories >/dev/null
        sudo apt-get -qq install -y --allow-unauthenticated \
            "rocm-dev${ROCM_VERSION}" "rocm-utils${ROCM_VERSION}" \
            "rocm-libs${ROCM_VERSION}" >/dev/null

        # Fake presence of MI200-class accelerators
        echo "gfx90a" | sudo tee /opt/rocm/bin/target.lst

        # Cleanup
        sudo apt-get -qq autoclean >/dev/null
        sudo apt-get -qq clean >/dev/null
        sudo rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
    - name: Symblink system dependencies
      if: inputs.rocm == 'ON'
      shell: bash
      run: |
        # symblink system libraries for HIP compiler
        sudo ln -s /lib/x86_64-linux-gnu/libc.so.6 /lib64/libc.so.6
        sudo ln -s /lib/x86_64-linux-gnu/libc_nonshared.a /usr/lib64/libc_nonshared.a
        sudo ln -s /usr/lib/x86_64-linux-gnu/libpthread.so.0 /lib64/libpthread.so.0
        sudo ln -s $HOME/miniconda3/x86_64-conda-linux-gnu/sysroot/usr/lib64/libpthread_nonshared.a /usr/lib64/libpthread_nonshared.a
    - name: Build all targets
      shell: bash
      run: |
        eval "$(conda shell.bash hook)"
        conda activate
        cmake -B build \
              -DBUILD_TESTING=ON \
              -DBUILD_SHARED_LIBS=ON \
              -DFAISS_ENABLE_GPU=${{ inputs.gpu }} \
              -DFAISS_ENABLE_CUVS=${{ inputs.cuvs }} \
              -DFAISS_ENABLE_ROCM=${{ inputs.rocm }} \
              -DFAISS_OPT_LEVEL=${{ inputs.opt_level }} \
              -DFAISS_ENABLE_C_API=ON \
              -DPYTHON_EXECUTABLE=$CONDA/bin/python \
              -DCMAKE_BUILD_TYPE=Release \
              -DBLA_VENDOR=${{ runner.arch == 'X64' && 'Intel10_64_dyn' || '' }} \
              -DCMAKE_CUDA_FLAGS=${{ runner.arch == 'X64' && '"-gencode arch=compute_75,code=sm_75"' || '' }} \
              .
        make -k -C build -j$(nproc)
    - name: C++ tests
      shell: bash
      run: |
        export GTEST_OUTPUT="xml:$(realpath .)/test-results/googletest/"
        make -C build test
    - name: C++ perf benchmarks
      shell: bash
      if: inputs.rocm == 'OFF'
      run: |
        find ./build/perf_tests/ -executable -type f -name "bench*" -exec '{}' -v \;
    - name: Install Python extension
      shell: bash
      working-directory: build/faiss/python
      run: |
        $CONDA/bin/python setup.py install
    - name: ROCm - install ROCm-enabled torch via pip
      if: inputs.rocm == 'ON'
      shell: bash
      run: |
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.1
    - name: Python tests (CPU only)
      if: inputs.gpu == 'OFF'
      shell: bash
      run: |
        pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
        pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
    - name: Python tests (CPU + GPU)
      if: inputs.gpu == 'ON'
      shell: bash
      run: |
        pytest --junitxml=test-results/pytest/results.xml tests/test_*.py
        pytest --junitxml=test-results/pytest/results-torch.xml tests/torch_*.py
        cp tests/common_faiss_tests.py faiss/gpu/test
        pytest --junitxml=test-results/pytest/results-gpu.xml faiss/gpu/test/test_*.py
        pytest --junitxml=test-results/pytest/results-gpu-torch.xml faiss/gpu/test/torch_*.py
    - name: Test avx2 loading
      if: inputs.opt_level == 'avx2'
      shell: bash
      run: |
        FAISS_DISABLE_CPU_FEATURES=AVX2 LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss.so
        LD_DEBUG=libs $CONDA/bin/python -c "import faiss" 2>&1 | grep faiss_avx2.so
    - name: Upload test results
      if: always()
      uses: actions/upload-artifact@v4
      with:
        name: test-results-arch=${{ runner.arch }}-opt=${{ inputs.opt_level }}-gpu=${{ inputs.gpu }}-cuvs=${{ inputs.cuvs }}-rocm=${{ inputs.rocm }}
        path: test-results
    - name: Check installed packages channel
      shell: bash
      run: |
        # Shows that all installed packages are from conda-forge.
        conda list --show-channel-urls
