name: Nightly and release tests on main/release branch

on:
  workflow_dispatch:
  schedule:
    - cron: "0 0 * * *" # every day at midnight

env:
  DIFFUSERS_IS_CI: yes
  HF_XET_HIGH_PERFORMANCE: 1
  OMP_NUM_THREADS: 8
  MKL_NUM_THREADS: 8
  PYTEST_TIMEOUT: 600
  RUN_SLOW: yes
  RUN_NIGHTLY: yes
  PIPELINE_USAGE_CUTOFF: 0
  SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
  CONSOLIDATED_REPORT_PATH: consolidated_test_report.md

jobs:
  setup_torch_cuda_pipeline_matrix:
    name: Setup Torch Pipelines CUDA Slow Tests Matrix
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    outputs:
      pipeline_test_matrix: ${{ steps.fetch_pipeline_matrix.outputs.pipeline_test_matrix }}
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: Install dependencies
        run: |
          pip install -e .[test]
          pip install huggingface_hub
      - name: Fetch Pipeline Matrix
        id: fetch_pipeline_matrix
        run: |
          matrix=$(python utils/fetch_torch_cuda_pipeline_test_matrix.py)
          echo $matrix
          echo "pipeline_test_matrix=$matrix" >> $GITHUB_OUTPUT

      - name: Pipeline Tests Artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: test-pipelines.json
          path: reports

  run_nightly_tests_for_torch_pipelines:
    name: Nightly Torch Pipelines CUDA Tests
    needs: setup_torch_cuda_pipeline_matrix
    strategy:
      fail-fast: false
      max-parallel: 8
      matrix:
        module: ${{ fromJson(needs.setup_torch_cuda_pipeline_matrix.outputs.pipeline_test_matrix) }}
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Pipeline CUDA Test
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
             -k "not Flax and not Onnx" \
            --make-reports=tests_pipeline_${{ matrix.module }}_cuda \
            --report-log=tests_pipeline_${{ matrix.module }}_cuda.log \
            tests/pipelines/${{ matrix.module }}
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_stats.txt
          cat reports/tests_pipeline_${{ matrix.module }}_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: pipeline_${{ matrix.module }}_test_reports
          path: reports

  run_nightly_tests_for_other_torch_modules:
    name: Nightly Torch CUDA Tests
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    defaults:
      run:
        shell: bash
    strategy:
      fail-fast: false
      max-parallel: 2
      matrix:
        module: [models, schedulers, lora, others, single_file, examples]
    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: Install dependencies
      run: |
        uv pip install -e ".[quality]"
        uv pip install peft@git+https://github.com/huggingface/peft.git
        uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
        uv pip install pytest-reportlog
    - name: Environment
      run: python utils/print_env.py

    - name: Run nightly PyTorch CUDA tests for non-pipeline modules
      if: ${{ matrix.module != 'examples'}}
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          -k "not Flax and not Onnx" \
          --make-reports=tests_torch_${{ matrix.module }}_cuda \
          --report-log=tests_torch_${{ matrix.module }}_cuda.log \
          tests/${{ matrix.module }}

    - name: Run nightly example tests with Torch
      if: ${{ matrix.module == 'examples' }}
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
        CUBLAS_WORKSPACE_CONFIG: :16:8
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile \
          --make-reports=examples_torch_cuda \
          --report-log=examples_torch_cuda.log \
          examples/

    - name: Failure short reports
      if: ${{ failure() }}
      run: |
        cat reports/tests_torch_${{ matrix.module }}_cuda_stats.txt
        cat reports/tests_torch_${{ matrix.module }}_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: torch_${{ matrix.module }}_cuda_test_reports
        path: reports

  run_torch_compile_tests:
    name: PyTorch Compile CUDA tests

    runs-on:
      group: aws-g4dn-2xlarge

    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --gpus all --shm-size "16gb" --ipc host

    steps:
    - name: Checkout diffusers
      uses: actions/checkout@v3
      with:
        fetch-depth: 2

    - name: NVIDIA-SMI
      run: |
        nvidia-smi
    - name: Install dependencies
      run: |
        uv pip install -e ".[quality,training]"
        #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
        uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
    - name: Environment
      run: |
        python utils/print_env.py
    - name: Run torch compile tests on GPU
      env:
        HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
        RUN_COMPILE: yes
      run: |
        pytest -n 1 --max-worker-restart=0 --dist=loadfile -k "compile" --make-reports=tests_torch_compile_cuda tests/
    - name: Failure short reports
      if: ${{ failure() }}
      run: cat reports/tests_torch_compile_cuda_failures_short.txt

    - name: Test suite reports artifacts
      if: ${{ always() }}
      uses: actions/upload-artifact@v4
      with:
        name: torch_compile_test_reports
        path: reports

  run_big_gpu_torch_tests:
    name: Torch tests on big GPU
    strategy:
      fail-fast: false
      max-parallel: 2
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip install peft@git+https://github.com/huggingface/peft.git
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Selected Torch CUDA Test on big GPU
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            -m "big_accelerator" \
            --make-reports=tests_big_gpu_torch_cuda \
            --report-log=tests_big_gpu_torch_cuda.log \
            tests/
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_big_gpu_torch_cuda_stats.txt
          cat reports/tests_big_gpu_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: torch_cuda_big_gpu_test_reports
          path: reports

  torch_minimum_version_cuda_tests:
    name: Torch Minimum Version CUDA Tests
    runs-on:
      group: aws-g4dn-2xlarge
    container:
      image: diffusers/diffusers-pytorch-minimum-cuda
      options: --shm-size "16gb" --ipc host --gpus all
    defaults:
      run:
        shell: bash
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2

      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip install peft@git+https://github.com/huggingface/peft.git
          uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 

      - name: Environment
        run: |
          python utils/print_env.py

      - name: Run PyTorch CUDA tests
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            -k "not Flax and not Onnx" \
            --make-reports=tests_torch_minimum_version_cuda \
            tests/models/test_modeling_common.py \
            tests/pipelines/test_pipelines_common.py \
            tests/pipelines/test_pipeline_utils.py \
            tests/pipelines/test_pipelines.py \
            tests/pipelines/test_pipelines_auto.py \
            tests/schedulers/test_schedulers.py \
            tests/others

      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_torch_minimum_version_cuda_stats.txt
          cat reports/tests_torch_minimum_version_cuda_failures_short.txt

      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: torch_minimum_version_cuda_test_reports
          path: reports

  run_nightly_quantization_tests:
    name: Torch quantization nightly tests
    strategy:
      fail-fast: false
      max-parallel: 2
      matrix:
        config:
          - backend: "bitsandbytes"
            test_location: "bnb"
            additional_deps: ["peft"]
          - backend: "gguf"
            test_location: "gguf"
            additional_deps: ["peft", "kernels"]
          - backend: "torchao"
            test_location: "torchao"
            additional_deps: []
          - backend: "optimum_quanto"
            test_location: "quanto"
            additional_deps: []
          - backend: "nvidia_modelopt"
            test_location: "modelopt"
            additional_deps: []
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "20gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip install -U ${{ matrix.config.backend }}
          if [ "${{ join(matrix.config.additional_deps, ' ') }}" != "" ]; then
              uv pip install ${{ join(matrix.config.additional_deps, ' ') }}
          fi
          uv pip install pytest-reportlog
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
      - name: Environment
        run: |
          python utils/print_env.py
      - name: ${{ matrix.config.backend }} quantization tests on GPU
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            --make-reports=tests_${{ matrix.config.backend }}_torch_cuda \
            --report-log=tests_${{ matrix.config.backend }}_torch_cuda.log \
            tests/quantization/${{ matrix.config.test_location }}
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_${{ matrix.config.backend }}_torch_cuda_stats.txt
          cat reports/tests_${{ matrix.config.backend }}_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: torch_cuda_${{ matrix.config.backend }}_reports
          path: reports
          
  run_nightly_pipeline_level_quantization_tests:
    name: Torch quantization nightly tests
    strategy:
      fail-fast: false
      max-parallel: 2
    runs-on:
      group: aws-g6e-xlarge-plus
    container:
      image: diffusers/diffusers-pytorch-cuda
      options: --shm-size "20gb" --ipc host --gpus all
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2
      - name: NVIDIA-SMI
        run: nvidia-smi
      - name: Install dependencies
        run: |
          uv pip install -e ".[quality]"
          uv pip install -U bitsandbytes optimum_quanto
          #uv pip uninstall transformers huggingface_hub && uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git
          uv pip uninstall transformers huggingface_hub && uv pip install transformers==4.57.1 
          uv pip install pytest-reportlog
      - name: Environment
        run: |
          python utils/print_env.py
      - name: Pipeline-level quantization tests on GPU
        env:
          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
          # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms
          CUBLAS_WORKSPACE_CONFIG: :16:8
          BIG_GPU_MEMORY: 40
        run: |
          pytest -n 1 --max-worker-restart=0 --dist=loadfile \
            --make-reports=tests_pipeline_level_quant_torch_cuda \
            --report-log=tests_pipeline_level_quant_torch_cuda.log \
            tests/quantization/test_pipeline_level_quantization.py
      - name: Failure short reports
        if: ${{ failure() }}
        run: |
          cat reports/tests_pipeline_level_quant_torch_cuda_stats.txt
          cat reports/tests_pipeline_level_quant_torch_cuda_failures_short.txt
      - name: Test suite reports artifacts
        if: ${{ always() }}
        uses: actions/upload-artifact@v4
        with:
          name: torch_cuda_pipeline_level_quant_reports
          path: reports

  generate_consolidated_report:
    name: Generate Consolidated Test Report
    needs: [
      run_nightly_tests_for_torch_pipelines,
      run_nightly_tests_for_other_torch_modules,
      run_torch_compile_tests,
      run_big_gpu_torch_tests,
      run_nightly_quantization_tests,
      run_nightly_pipeline_level_quantization_tests,
      # run_nightly_onnx_tests,
      torch_minimum_version_cuda_tests,
      # run_flax_tpu_tests
    ]
    if: always()
    runs-on:
      group: aws-general-8-plus
    container:
      image: diffusers/diffusers-pytorch-cpu
    steps:
      - name: Checkout diffusers
        uses: actions/checkout@v3
        with:
          fetch-depth: 2

      - name: Create reports directory
        run: mkdir -p combined_reports

      - name: Download all test reports
        uses: actions/download-artifact@v4
        with:
          path: artifacts

      - name: Prepare reports
        run: |
          # Move all report files to a single directory for processing
          find artifacts -name "*.txt" -exec cp {} combined_reports/ \;

      - name: Install dependencies
        run: |
          pip install -e .[test]
          pip install slack_sdk tabulate

      - name: Generate consolidated report
        run: |
          python utils/consolidated_test_report.py \
            --reports_dir combined_reports \
            --output_file $CONSOLIDATED_REPORT_PATH \
            --slack_channel_name diffusers-ci-nightly

      - name: Show consolidated report
        run: |
          cat $CONSOLIDATED_REPORT_PATH >> $GITHUB_STEP_SUMMARY

      - name: Upload consolidated report
        uses: actions/upload-artifact@v4
        with:
          name: consolidated_test_report
          path: ${{ env.CONSOLIDATED_REPORT_PATH }}

# M1 runner currently not well supported
# TODO: (Dhruv) add these back when we setup better testing for Apple Silicon
#  run_nightly_tests_apple_m1:
#    name: Nightly PyTorch MPS tests on MacOS
#    runs-on: [ self-hosted, apple-m1 ]
#    if: github.event_name == 'schedule'
#
#    steps:
#      - name: Checkout diffusers
#        uses: actions/checkout@v3
#        with:
#          fetch-depth: 2
#
#      - name: Clean checkout
#        shell: arch -arch arm64 bash {0}
#        run: |
#          git clean -fxd
#      - name: Setup miniconda
#        uses: ./.github/actions/setup-miniconda
#        with:
#          python-version: 3.9
#
#      - name: Install dependencies
#        shell: arch -arch arm64 bash {0}
#        run: |
#          ${CONDA_RUN} pip install --upgrade pip uv
#          ${CONDA_RUN} uv pip install -e ".[quality]"
#          ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
#          ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate
#          ${CONDA_RUN} uv pip install pytest-reportlog
#      - name: Environment
#        shell: arch -arch arm64 bash {0}
#        run: |
#          ${CONDA_RUN} python utils/print_env.py
#      - name: Run nightly PyTorch tests on M1 (MPS)
#        shell: arch -arch arm64 bash {0}
#        env:
#          HF_HOME: /System/Volumes/Data/mnt/cache
#          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
#        run: |
#          ${CONDA_RUN} pytest -n 1  --make-reports=tests_torch_mps \
#            --report-log=tests_torch_mps.log \
#            tests/
#      - name: Failure short reports
#        if: ${{ failure() }}
#        run: cat reports/tests_torch_mps_failures_short.txt
#
#      - name: Test suite reports artifacts
#        if: ${{ always() }}
#        uses: actions/upload-artifact@v4
#        with:
#          name: torch_mps_test_reports
#          path: reports
#
#      - name: Generate Report and Notify Channel
#        if: always()
#        run: |
#          pip install slack_sdk tabulate
#          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY  run_nightly_tests_apple_m1:
#    name: Nightly PyTorch MPS tests on MacOS
#    runs-on: [ self-hosted, apple-m1 ]
#    if: github.event_name == 'schedule'
#
#    steps:
#      - name: Checkout diffusers
#        uses: actions/checkout@v3
#        with:
#          fetch-depth: 2
#
#      - name: Clean checkout
#        shell: arch -arch arm64 bash {0}
#        run: |
#          git clean -fxd
#      - name: Setup miniconda
#        uses: ./.github/actions/setup-miniconda
#        with:
#          python-version: 3.9
#
#      - name: Install dependencies
#        shell: arch -arch arm64 bash {0}
#        run: |
#          ${CONDA_RUN} pip install --upgrade pip uv
#          ${CONDA_RUN} uv pip install -e ".[quality]"
#          ${CONDA_RUN} uv pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
#          ${CONDA_RUN} uv pip install accelerate@git+https://github.com/huggingface/accelerate
#          ${CONDA_RUN} uv pip install pytest-reportlog
#      - name: Environment
#        shell: arch -arch arm64 bash {0}
#        run: |
#          ${CONDA_RUN} python utils/print_env.py
#      - name: Run nightly PyTorch tests on M1 (MPS)
#        shell: arch -arch arm64 bash {0}
#        env:
#          HF_HOME: /System/Volumes/Data/mnt/cache
#          HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }}
#        run: |
#          ${CONDA_RUN} pytest -n 1  --make-reports=tests_torch_mps \
#            --report-log=tests_torch_mps.log \
#            tests/
#      - name: Failure short reports
#        if: ${{ failure() }}
#        run: cat reports/tests_torch_mps_failures_short.txt
#
#      - name: Test suite reports artifacts
#        if: ${{ always() }}
#        uses: actions/upload-artifact@v4
#        with:
#          name: torch_mps_test_reports
#          path: reports
#
#      - name: Generate Report and Notify Channel
#        if: always()
#        run: |
#          pip install slack_sdk tabulate
#          python utils/log_reports.py >> $GITHUB_STEP_SUMMARY
