<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Weakly-Supervised Audio-Visual Segmentation.">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Weakly-Supervised Audio-Visual Segmentation</title>

  <!-- Global site tag (gtag.js) - Google Analytics -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
        rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>


<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">Weakly-Supervised Audio-Visual Segmentation</h1>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              Anonymous NeurIPS 2023 Submission,
            </span>
          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block">Paper ID 1363</span>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <img src="./resources/framework.png" id="teaser" height="100%">
      <h2 class="subtitle has-text-centered">
        WS-AVS predicts sound source masks given both audio and image as input without pixel-level annotations.
      </h2>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Audio-visual segmentation is a challenging task that aims to predict pixel-level masks for sound sources in a video.
          </p>
          <p>
            Previous work applied a comprehensive manually designed architecture with countless pixel-wise accurate masks as supervision.
            However, these pixel-level masks are expensive and not available in all cases.
          </p>
          <p>
            In this work, we investigate a novel weakly-supervised multi-modal problem, 
            by simplifying the supervision as the instance-level annotation, 
            <em>i.e.</em>, weakly-supervised audio-visual segmentation.
            We present a novel framework for Weakly-Supervised Audio-Visual Segmentation, 
            namely WS-AVS, that can predict pixel-wise masks of high quality 
            for sounding objects without pixel-level annotations. 
          </p>
          <p>
            We conduct extensive experiments on AVSBench dataset. 
            The results demonstrate that the proposed WS-AVS can achieve 
            state-of-the-art weakly-supervised audio-visual segmentation performance.
          </p>
        </div>
      </div>
    </div>
    <!--/ Abstract. -->

  </div>
</section>


<section class="section">
  <div class="container is-max-desktop">

    <h2 class="title">Qualitative Results</h2>

    <!-- Video. -->
    <div class="columns is-centered has-text-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          Raw Video
        </div>
      </div>
      <div class="column">
        <div class="content">
          AVS(ws)
        </div>
      </div>
      <div class="column">
        <div class="content">
          CAM
        </div>
      </div>
      <div class="column">
        <div class="content">
          CCAM
        </div>
      </div>
      <div class="column">
        <div class="content">
          EZ-VSL
        </div>
      </div>
      <div class="column">
        <div class="content">
          WS-AVS (ours)
        </div>
      </div>
    </div>
    <!--/ Video. -->

    

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/1.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/1.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/2.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/2.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/3.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/3.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/4.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/4.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/5.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/5.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/6.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/6.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->

    <!-- Video. -->
    <div class="columns is-centered">
      <!-- <div class="column is-full-width"> -->
      <div class="column">
        <div class="content">
          <video id="instance_1" autoplay muted loop playsinline height="100%" controls>
            <source src="resources/mp4_wav/7.mp4"
                    type="video/mp4">
          </video>
          <audio controls>
            <source src="resources/mp4_wav/7.wav" type="audio/wav">
          Your browser does not support the audio element.
          </audio>
        </div>
      </div>
    </div>
    <!--/ Video. -->



  </div>
</section>


</body>
</html>
