<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
  <!-- Replace the content tag with appropriate information -->
  <meta name="description" content="DESCRIPTION META TAG">
  <meta property="og:title" content="SOCIAL MEDIA TITLE TAG"/>
  <meta property="og:description" content="SOCIAL MEDIA DESCRIPTION TAG TAG"/>
  <meta property="og:url" content="URL OF THE WEBSITE"/>
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
  <meta property="og:image" content="static/image/your_banner_image.png" />
  <meta property="og:image:width" content="1200"/>
  <meta property="og:image:height" content="630"/>


  <meta name="twitter:title" content="TWITTER BANNER TITLE META TAG">
  <meta name="twitter:description" content="TWITTER BANNER DESCRIPTION META TAG">
  <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600-->
  <meta name="twitter:image" content="static/images/your_twitter_banner_image.png">
  <meta name="twitter:card" content="summary_large_image">
  <!-- Keywords for your paper to be indexed by-->
  <meta name="keywords" content="KEYWORDS SHOULD BE PLACED HERE">
  <meta name="viewport" content="width=device-width, initial-scale=1">


  <title>RSCC: A Large-Scale Remote Sensing Change Caption Dataset for Disaster Events</title>
  <link rel="icon" type="image/x-icon" href="static/images/favicon.ico">
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
  rel="stylesheet">

  <link rel="stylesheet" href="static/css/bulma.min.css">
  <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
  href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="static/css/index.css">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
  <script defer src="static/js/fontawesome.all.min.js"></script>
  <script src="static/js/bulma-carousel.min.js"></script>
  <script src="static/js/bulma-slider.min.js"></script>
  <script src="static/js/index.js"></script>
</head>
<body>


  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">RSCC: A Large-Scale Remote Sensing Change Caption Dataset for Disaster Events</h1>
            <div class="is-size-5 publication-authors">
              <!-- Paper authors -->
              <span class="author-block">Zhenyuan Chen,</span>
              <span class="author-block">Chenxi Wang,</span>
              <span class="author-block">Ningyu Zhang,</span>
              <span class="author-block">Feng Zhang</span>
                  </div>

                  <div class="is-size-5 publication-authors">
                    <span class="author-block">Zhejiang University<br>under review 2025</span>
                    <!-- <span class="eql-cntrb"><small><br><sup>*</sup>Indicates Equal Contribution</small></span> -->
                  </div>

                  <div class="column has-text-centered">
                    <!-- <div class="publication-links">
                      <span class="link-block">
                        <a href="https://arxiv.org/pdf/<ARXIV PAPER ID>.pdf" target="_blank"
                        class="external-link button is-normal is-rounded is-dark">
                        <span class="icon">
                          <i class="fas fa-file-pdf"></i>
                        </span>
                        <span>Paper</span>
                      </a>
                    </span> -->

                    <!-- Supplementary PDF link -->
                    <!-- <span class="link-block">
                      <a href="static/pdfs/supplementary_material.pdf" target="_blank"
                      class="external-link button is-normal is-rounded is-dark">
                      <span class="icon">
                        <i class="fas fa-file-pdf"></i>
                      </span>
                      <span>Supplementary</span>
                    </a>
                  </span> -->

                  <!-- Github link -->
                  <span class="link-block">
                    <a href="https://github.com/Bili-Sakura/RSCC" target="_blank"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="fab fa-github"></i>
                    </span>
                    <span>Code</span>
                  </a>
                </span>

                <span class="link-block">
                <a href="https://huggingface.co/BiliSakura/RSCCM" target="_blank"
                class="external-link button is-normal is-rounded is-dark">
                <span class="icon">
                  <i class="fas fa-database"></i>
                </span>
                <span>Model</span>
              </a>
            </span>
                <!-- ArXiv abstract Link -->
                <!-- <span class="link-block">
                  <a href="https://arxiv.org/abs/<ARXIV PAPER ID>" target="_blank"
                  class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                    <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span> -->
            <!-- HuggingFace Datasets link -->
            <span class="link-block">
              <a href="https://huggingface.co/datasets/BiliSakura/RSCC" target="_blank"
              class="external-link button is-normal is-rounded is-dark">
              <span class="icon">
                <i class="fas fa-database"></i>
              </span>
              <span>Dataset</span>
            </a>
            </span>
            </div>
          </div>
        </div>
      </div>
    </div>
  </div>
</section>


<!-- Teaser video-->
<!-- <section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
      <video poster="" id="tree" autoplay controls muted loop height="100%">
        <source src="static/videos/banner_video.mp4"
        type="video/mp4">
      </video>
      <h2 class="subtitle has-text-centered">
        Aliquam vitae elit ullamcorper tellus egestas pellentesque. Ut lacus tellus, maximus vel lectus at, placerat pretium mi. Maecenas dignissim tincidunt vestibulum. Sed consequat hendrerit nisl ut maximus. 
      </h2>
    </div>
  </div>
</section> -->
<!-- End teaser video -->

<!-- Paper abstract -->
<section class="section hero is-light">
  <div class="container is-max-desktop">
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            Remote sensing is critical for disaster monitoring, yet existing datasets lack temporal image pairs and detailed textual annotations. While single-snapshot imagery dominates current resources, it fails to capture dynamic disaster impacts  over time. To address this gap, we introduce the Remote Sensing Change Caption (RSCC) dataset, a large-scale dataset comprising 62,351 pre-event and post-event remote sensing image pairs (spanning earthquakes, floods, wildfires, and more) paired with detailed change captions. Based on RSCC dataset, we develop a change caption benchmark and evaluate the performance of several state-of-the-art temporal MLLMs. Given the quantitative and qualitative results, we demonstrate the limitations of models' capability in complex temporal remote sensing image understanding. Our work aims to facilitate the training and evaluation of vision-language models on temporal remote sensing image understanding tasks.
          </p>
        </div>
      </div>
    </div>
  </div>
</section>
<!-- End paper abstract -->

<section class="section">
  <div class="content">
    <!-- RSCC Example -->
    <div class="has-text-centered">
      <img src="static/images/rscc_overview2.png" alt="RSCC Example" style="width: 800px; height: auto;">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 800px; margin-left: auto; margin-right: auto;">
        An example of RSCC.
      </div>
    </div>

    <!-- Construction Pipeline -->
    <div class="has-text-centered">
      <img src="static/images/pipeline.png" alt="Construction Pipeline" style="width: 800px; height: auto;">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 800px; margin-left: auto; margin-right: auto;">
        Construction pipeline.
      </div>
    </div>

    <!-- Model Performance Table -->
    <div class="has-text-centered">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 1000px; margin-left: auto; margin-right: auto;">
        Model Performance Comparison</div>
        <div style="overflow-x: auto; max-width: 100%;">
          <table class="table is-bordered is-striped is-hoverable is-fullwidth" style="font-size: 0.85rem; table-layout: fixed; width: 1000px; margin-left: auto; margin-right: auto;">
            <colgroup><col style="width: 15%;"><col style="width: 10%;"><col style="width: 10%;"><col style="width: 12%;"><col style="width: 12%;"><col style="width: 8%;"></colgroup>
            <thead>
              <tr>
                <th style="font-size: 0.9rem; text-align: center;">Model</th>
                <th colspan="2" style="font-size: 0.9rem; text-align: center;">N-Gram</th>
                <th colspan="2" style="font-size: 0.9rem; text-align: center;">Contextual</th>
                <th style="font-size: 0.9rem; text-align: center;">Avg_L</th>
              </tr>
              <tr>
                <th style="font-size: 0.8rem; text-align: center;">(#Activate Params)</th>
                <th style="font-size: 0.8rem; text-align: center;">ROUGE(%)↑</th>
                <th style="font-size: 0.8rem; text-align: center;">METEOR(%)↑</th>
                <th style="font-size: 0.8rem; text-align: center;">BERT(%)↑</th>
                <th style="font-size: 0.8rem; text-align: center;">ST5-SCS(%)↑</th>
                <th style="font-size: 0.8rem; text-align: center;">(#Words)</th>
              </tr>
            </thead>
            <tbody style="font-size: 0.8rem; text-align: left;">
            <tr><td>BLIP-3 (3B)</td><td>4.53</td><td>10.85</td><td>98.83</td><td>44.05</td><td><span style="color:red;">*456</span></td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>10.07 (<span style="color:green;">+5.54↑</span>)</td><td>20.69 (<span style="color:green;">+9.84↑</span>)</td><td>98.95 (<span style="color:green;">+0.12↑</span>)</td><td>63.67 (<span style="color:green;">+19.62↑</span>)</td><td><span style="color:red;">*302</span></td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>8.45 (<span style="color:red;">-1.62↓</span>)</td><td>19.18 (<span style="color:red;">-1.51↓</span>)</td><td>99.01 (<span style="color:green;">+0.06↑</span>)</td><td>68.34 (<span style="color:green;">+4.67↑</span>)</td><td><span style="color:red;">*354</span></td></tr>
            <tr><td>Kimi-VL (3B)</td><td>12.47</td><td>16.95</td><td>98.83</td><td>51.35</td><td>87</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>16.83 (<span style="color:green;">+4.36↑</span>)</td><td>25.47 (<span style="color:green;">+8.52↑</span>)</td><td>99.22 (<span style="color:green;">+0.39↑</span>)</td><td>70.75 (<span style="color:green;">+19.40↑</span>)</td><td>108</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>16.83 (+0.00)</td><td>25.39 (<span style="color:red;">-0.08↓</span>)</td><td>99.30 (<span style="color:green;">+0.08↑</span>)</td><td>69.97 (<span style="color:red;">-0.78↓</span>)</td><td>109</td></tr>
            <tr><td>Phi-4-Multimodal (4B)</td><td>4.09</td><td>1.45</td><td>98.60</td><td>34.55</td><td>7</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>17.08 (<span style="color:green;">+13.00↑</span>)</td><td>19.70 (<span style="color:green;">+18.25↑</span>)</td><td>98.93 (<span style="color:green;">+0.33↑</span>)</td><td>67.62 (<span style="color:green;">+33.07↑</span>)</td><td>75</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>17.05 (<span style="color:red;">-0.03↓</span>)</td><td>19.09 (<span style="color:red;">-0.61↓</span>)</td><td>98.90 (<span style="color:red;">-0.03↓</span>)</td><td>66.69 (<span style="color:red;">-0.93↓</span>)</td><td>70</td></tr>
            <tr><td>Qwen2-VL (7B)</td><td>11.02</td><td>9.95</td><td>99.11</td><td>45.55</td><td>42</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>19.04 (<span style="color:green;">+8.02↑</span>)</td><td>25.20 (<span style="color:green;">+15.25↑</span>)</td><td>99.01 (<span style="color:red;">-0.10↓</span>)</td><td>72.65 (<span style="color:green;">+27.10↑</span>)</td><td>84</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>18.43 (<span style="color:red;">-0.61↓</span>)</td><td>25.03 (<span style="color:red;">-0.17↓</span>)</td><td>99.03 (<span style="color:green;">+0.02↑</span>)</td><td>72.89 (<span style="color:green;">+0.24↑</span>)</td><td>88</td></tr>
            <tr><td>LLaVA-NeXT-Interleave (8B)</td><td>12.51</td><td>13.29</td><td>99.11</td><td>46.99</td><td>57</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>16.09 (<span style="color:green;">+3.58↑</span>)</td><td>20.73 (<span style="color:green;">+7.44↑</span>)</td><td>99.22 (<span style="color:green;">+0.11↑</span>)</td><td>62.60 (<span style="color:green;">+15.61↑</span>)</td><td>75</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>15.76 (<span style="color:red;">-0.33↓</span>)</td><td>21.17 (<span style="color:green;">+0.44↑</span>)</td><td>99.24 (<span style="color:green;">+0.02↑</span>)</td><td>65.75 (<span style="color:green;">+3.15↑</span>)</td><td>88</td></tr>
            <tr><td>LLaVA-OneVision (8B)</td><td>8.40</td><td>10.97</td><td>98.64</td><td>46.15</td><td><span style="color:red;">*221</span></td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>11.15 (<span style="color:green;">+2.75↑</span>)</td><td>19.09 (<span style="color:green;">+8.12↑</span>)</td><td>98.85 (<span style="color:green;">+0.21↑</span>)</td><td>70.08 (<span style="color:green;">+23.93↑</span>)</td><td><span style="color:red;">*285</span></td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>10.68 (<span style="color:red;">-0.47↓</span>)</td><td>18.27 (<span style="color:red;">-0.82↓</span>)</td><td>98.79 (<span style="color:red;">-0.06↓</span>)</td><td>69.34 (<span style="color:red;">-0.74↓</span>)</td><td><span style="color:red;">*290</span></td></tr>
            <tr><td>InternVL 3 (8B)</td><td>12.76</td><td>15.77</td><td>99.31</td><td>51.84</td><td>64</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td><em>19.81</em> (<span style="color:green;">+7.05↑</span>)</td><td><em>28.51</em> (<span style="color:green;">+12.74↑</span>)</td><td><strong>99.55</strong> (<span style="color:green;">+0.24↑</span>)</td><td>78.57 (<span style="color:green;">+26.73↑</span>)</td><td>81</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>19.70 (<span style="color:red;">-0.11↓</span>)</td><td>28.46 (<span style="color:red;">-0.05↓</span>)</td><td>99.51 (<span style="color:red;">-0.04↓</span>)</td><td><strong>79.18</strong> (<span style="color:green;">+0.61↑</span>)</td><td>84</td></tr>
            <tr><td>Pixtral (12B)</td><td>12.34</td><td>15.94</td><td>99.34</td><td>49.36</td><td>70</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td><strong>19.87</strong> (<span style="color:green;">+7.53↑</span>)</td><td><strong>29.01</strong> (<span style="color:green;">+13.07↑</span>)</td><td>99.51 (<span style="color:green;">+0.17↑</span>)</td><td><em>79.07</em> (<span style="color:green;">+29.71↑</span>)</td><td>97</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>19.03 (<span style="color:red;">-0.84↓</span>)</td><td>28.44 (<span style="color:red;">-0.57↓</span>)</td><td><em>99.52</em> (<span style="color:green;">+0.01↑</span>)</td><td>78.71 (<span style="color:red;">-0.36↓</span>)</td><td>102</td></tr>
            <tr><td>CCExpert (7B)</td><td>7.61</td><td>4.32</td><td>99.17</td><td>40.81</td><td>12</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>8.71 (<span style="color:green;">+1.10↑</span>)</td><td>5.35 (<span style="color:green;">+1.03↑</span>)</td><td>99.23 (<span style="color:green;">+0.06↑</span>)</td><td>47.13 (<span style="color:green;">+6.32↑</span>)</td><td>14</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>8.84 (<span style="color:green;">+0.13↑</span>)</td><td>5.41 (<span style="color:green;">+0.06↑</span>)</td><td>99.23 (+0.00)</td><td>46.58 (<span style="color:red;">-0.55↓</span>)</td><td>14</td></tr>
            <tr><td>TEOChat (7B)</td><td>7.86</td><td>5.77</td><td>98.99</td><td>52.64</td><td>15</td></tr>
            <tr><td>&nbsp;&nbsp;+ Textual Prompt</td><td>11.81 (<span style="color:green;">+3.95↑</span>)</td><td>10.24 (<span style="color:green;">+4.47↑</span>)</td><td>99.12 (<span style="color:green;">+0.13↑</span>)</td><td>61.73 (<span style="color:green;">+9.09↑</span>)</td><td>22</td></tr>
            <tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;+ Visual Prompt</td><td>11.55 (<span style="color:red;">-0.26↓</span>)</td><td>10.04 (<span style="color:red;">-0.20↓</span>)</td><td>99.09 (<span style="color:red;">-0.03↓</span>)</td><td>62.53 (<span style="color:green;">+0.80↑</span>)</td><td>22</td></tr>
          </tbody>
        </table>
      </div>
    </div>

    <!-- Qualitative Results 1 -->
    <div class="has-text-centered">
      <img src="static/images/qualitative_results1.png" alt="Qualitative Results 1" style="width: 800px; height: auto;">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 800px; margin-left: auto; margin-right: auto;">
        Visualization of qualitative results. Critical descriptions are colored in <span style="color:green">green</span> while incorrect and hallucinated sentences/words are <span style="color:red">red</span>.
      </div>
    </div>

    <!-- Qualitative Results 2 -->
    <div class="has-text-centered">
      <img src="static/images/qualitative_results2.png" alt="Qualitative Results 2" style="width: 800px; height: auto;">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 800px; margin-left: auto; margin-right: auto;">
        Visualization of qualitative results. Critical descriptions are colored in <span style="color:green">green</span> while incorrect and hallucinated sentences/words are <span style="color:red">red</span>.
      </div>
    </div>

    <!-- Win Rate Plot -->
    <div class="has-text-centered">
      <img src="static/images/win_rate_plot.png" alt="Win Rate Plot" style="width: 800px; height: auto;">
      <div class="figure-caption" style="margin-top: 1rem; font-size: 1.1rem; color: #333; max-width: 800px; margin-left: auto; margin-right: auto;">
        Win-rate from QvQ-Max (ground truth) to all baseline models on RSCC subset.
      </div>
    </div>
  </div>
</section>



<!-- Youtube video -->
<!-- <section class="hero is-small is-light">
  <div class="hero-body">
    <div class="container">
      <h2 class="title is-3">Video Presentation</h2>
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          
          <div class="publication-video">
            <iframe src="https://www.youtube.com/embed/JkaxUblCGz0" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
          </div>
        </div>
      </div>
    </div>
  </div>
</section> -->
<!-- End youtube video -->


<!-- Video carousel -->
<!-- <section class="hero is-small">
  <div class="hero-body">
    <div class="container">
      <h2 class="title is-3">Another Carousel</h2>
      <div id="results-carousel" class="carousel results-carousel">
        <div class="item item-video1">
          <video poster="" id="video1" autoplay controls muted loop height="100%">
            <source src="static/videos/carousel1.mp4"
            type="video/mp4">
          </video>
        </div>
        <div class="item item-video2">
          <video poster="" id="video2" autoplay controls muted loop height="100%">
            <source src="static/videos/carousel2.mp4"
            type="video/mp4">
          </video>
        </div>
        <div class="item item-video3">
          <video poster="" id="video3" autoplay controls muted loop height="100%">\
            <source src="static/videos/carousel3.mp4"
            type="video/mp4">
          </video>
        </div>
      </div>
    </div>
  </div>
</section> -->
<!-- End video carousel -->






<!-- Paper poster -->
<!-- <section class="hero is-small is-light">
  <div class="hero-body">
    <div class="container">
      <h2 class="title">Poster</h2>

      <iframe  src="static/pdfs/sample.pdf" width="100%" height="550">
          </iframe>
        
      </div>
    </div>
  </section> -->
<!--End paper poster -->


<!--BibTex citation -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>@misc{rscc_chen_2025,
  title = {RSCC: A Large-Scale Remote Sensing Change Caption Dataset for Disaster Events},
  author = {Zhenyuan Chen, Chenxi Wang, Ningyu Zhang, Feng Zhang},
  howpublished = {\url{https://github.com/Bili-Sakura/RSCC}},
  year = {2025}
}</code></pre>
    </div>
</section>
<!--End BibTex citation -->


  <footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">

          <p>
            This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
            You are free to borrow the source code of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license"  href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>

        </div>
      </div>
    </div>
  </div>
</footer>

<!-- Statcounter tracking code -->
  
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->

    <!-- End of Statcounter Code -->

  </body>
  </html>
