name: Fast GPU Tests on main

on:
  workflow_dispatch:
  push:
    branches:
      - main
    paths:
      - "src/diffusers/**.py"
      - "examples/**.py"
      - "tests/**.py"

env:
  DIFFUSERS_IS_CI: yes
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  HF_XET_HIGH_PERFORMANCE: 1
  PYTEST_TIMEOUT: 600
  PIPELINE_USAGE_CUTOFF: 50000

jobs:
  setup_torch_cuda_pipeline_matrix:
    name: Setup Torch Pipelines CUDA Slow Tests Matrix
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Fetch Pipeline Matrix
        id: fetch_pipeline_matrix
        run: |
          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
          echo $matrix
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT
      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test-pipelines.json
          path: reports

  torch_pipelines_cuda_tests:
    name: Torch Pipelines CUDA Tests
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: |
          nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
      - name: Environment
        run: |
          python utils/print_env.py
      - name: PyTorch CUDA checkpoint tests on Ubuntu
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
            tests/pipelines/${{ matrix.module }}
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports

  torch_cuda_tests:
    name: Torch CUDA Tests
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    defaults:
      run:
        shell: bash
    strategy:
      fail-fast: false
      max-parallel: 2
      matrix:
        module: [models, schedulers, lora, others, single_file]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1

    - name: Environment
      run: |
        python utils/print_env.py

    - name: Run PyTorch CUDA tests
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -k "not Flax and not Onnx" \
          --make-reports=tests_torch_cuda_${{ matrix.module }} \
          tests/${{ matrix.module }}

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_torch_cuda_${{ matrix.module }}_stats.txt
        cat reports/tests_torch_cuda_${{ matrix.module }}_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: torch_cuda_test_reports_${{ matrix.module }}
        path: reports

  run_torch_compile_tests:
    name: PyTorch Compile CUDA tests

    runs-on:
      group: aws-g4dn-2xlarge

    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host

    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality,training]"
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        RUN_COMPILE: yes
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: torch_compile_test_reports
        path: reports

  run_xformers_tests:
    name: PyTorch xformers CUDA tests

    runs-on:
      group: aws-g4dn-2xlarge

    container:
      image: diffusers/diffusers-pytorch-xformers-cuda
      options: --gpus all --shm-size "16gb" --ipc host

    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality,training]"
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run example tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "xformers" --make-reports=tests_torch_xformers_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_xformers_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: torch_xformers_test_reports
        path: reports

  run_examples_tests:
    name: Examples PyTorch CUDA tests on Ubuntu

    runs-on:
      group: aws-g4dn-2xlarge

    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality,training]"

    - name: Environment
      run: |
        python utils/print_env.py

    - name: Run example tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
      run: |
        uv pip install ".[training]"
        pytest -n 1 --max-worker-restart=0 --dist=loadfile --make-reports=examples_torch_cuda examples/

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/examples_torch_cuda_stats.txt
        cat reports/examples_torch_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: examples_test_reports
        path: reports
