<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="utf-8">
    <meta name="description" content="Unified Generative PDE Solving via Video Inpainting Diffusion Models">
    <meta name="keywords" content="VideoPDE, PDE, Diffusion">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    <title>VideoPDE: Unified Generative PDE Solving via Video Inpainting Diffusion Models</title>

    <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
    <link rel="stylesheet" href="./static/css/bulma.min.css">

    <script defer src="./static/js/fontawesome.all.min.js"></script>

    <style>
        body {
            font-family: 'Noto Sans', sans-serif;
        }


        .media-row {
            display: flex;
            justify-content: space-evenly;
            flex-wrap: wrap;
            margin-top: 10px;
        }

        .media-container {
            display: flex;
            flex-direction: column;
            align-items: center;
            width: 16%;
            margin-bottom: 10px;
        }

        video,
        img {
            width: 100%;
            background-color: black;
            object-fit: cover;
        }

        .caption {
            margin-top: 0px;
            font-size: 0.9em;
            text-align: center;
        }

        .link-block a {
            margin-top: 5px;
            margin-bottom: 5px;
        }

        .teaser .hero-body {
            padding-top: 0;
            padding-bottom: 3rem;
        }

        .publication-title {
            font-family: 'Google Sans', sans-serif;
        }

        .publication-authors {
            font-family: 'Google Sans', sans-serif;
        }


        .publication-authors a {
            color: hsl(204, 86%, 53%) !important;
        }

        .publication-authors a:hover {
            text-decoration: underline;
        }

        section:nth-of-type(even) {
            background-color: #f8f8f8;
        }

        section:nth-of-type(odd) {
            background-color: #ffffff;
        }

        #more-results {
            text-align: center;
            margin-top: 2em;
        }

        #more-results button {
            padding: 0.75em 2em;
            font-size: 1rem;
            font-weight: 500;
            color: white;
            background-color: #2e2e2e;
            border: none;
            border-radius: 9999px;
            cursor: pointer;
            font-family: 'Google Sans', sans-serif;
            transition: background-color 0.2s;
        }

        #more-results button:hover {
            background-color: #1f1f1f;
        }

        .extra-results {
            display: none;
        }

        /* Tight spacing utility classes */
        .tight-section {
            padding-top: 1rem !important;
            padding-bottom: 1rem !important;
        }

        .no-padding {
            padding-top: 0 !important;
            padding-bottom: 0 !important;
            margin-top: 0 !important;
            margin-bottom: 0 !important;
        }

        section.section {
            padding-top: 1.5rem !important;
            padding-bottom: 1.5rem !important;
        }

        .section h2,
        .section p,
        .section .content {
            margin-top: 0 !important;
            margin-bottom: 1rem !important;
        }
    </style>
</head>

<script>
    function toggleAbstract() {
        const abs = document.getElementById('abstract');
        abs.style.display = abs.style.display === 'none' ? 'block' : 'none';
    }
</script>

<!-- d -->

<body>
    <section class="hero">
        <div class="hero-body">
            <div class="container is-max-desktop">
                <div class="columns is-centered">
                    <div class="column has-text-centered">
                        <h1 class="title is-1 publication-title">
                            VideoPDE: Unified Generative PDE Solving via Video Inpainting Diffusion Models
                        </h1>
                    </div>
                </div>
            </div>
        </div>
    </section>

    <section class="section">
        <div class="container is-max-desktop">
            <div class="columns is-centered has-text-centered">
                <div class="column is-four-fifths">
                    <!-- <h2 class="title is-3">TL;DR</h2> -->
                    <div class="content has-text-justified">
                        <p>
                            <strong>TL;DR:</strong> We present a unified framework for predicting
                            forward/inverse/partial
                            PDE soltuions using a video inpainting diffusion model.
                        </p>
                    </div>
                </div>
            </div>
            <div class="columns is-centered extra-results">
                <div class="column is-full-width">
                    <div class="media-row">
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.01/masked.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Input Video (1% pixels)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.01/pino.mp4" , type="video/mp4">
                            </video>
                            <div class="caption">PINO</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.01/ours.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Ours</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.01/gt.mp4" type="video/mp4">
                            </video>
                            <div class="caption">GT</div>
                        </div>
                    </div>
                </div>
            </div>
            <div class="columns is-centered has-text-centered">
                <div class="column is-four-fifths">
                    <!-- <h2 class="title is-3">TL;DR</h2> -->
                    <div class="content has-text-justified">
                        <p>
                            <strong> Flexible PDE solution predictions. </strong>From sparse spatiotemporal observations
                            (left), our method can predict future/past and reconstruct the full field solutions more
                            flexibly and accurately compared to existing state-of-the-art methods, e.g., PINO (Li et.
                            al.).
                        </p>
                    </div>
                </div>
            </div>

        </div>
    </section>

    <section class="section tight-section">
        <div class="container is-max-desktop no-padding">
            <div class="hero-body no-padding">
                <img src="./static/images/teaser.png" style="margin-bottom: 0.5rem;">
                <div class="content has-text-justified" style="margin-top: 0.5rem;">
                    <p style="margin: 0;">
                        <strong>VideoPDE pipeline.</strong>
                        We cast PDE solving as a video inpainting task. Our Hierarchical Video Diffusion Transformer
                        (HV-DiT) denoises initial noise into a full video, conditioned on pixel-level sparse
                        measurements. Its ability to handle arbitrary input patterns enables flexible application to
                        diverse PDE scenarios, including forward, inverse, and continuous measurement tasks.
                    </p>
                </div>


                <!-- Toggle Button -->
                <div style="text-align: center; margin-top: 0.5rem;">
                    <button class="button is-small is-dark is-rounded" onclick="toggleAbstract()">
                        View Abstract
                    </button>
                </div>

                <!-- Collapsible Abstract -->
                <div style="margin-top: 1.25rem;">
                    <div id="abstract" class="content has-text-justified" style="display: none;">
                        <p style="margin: 0;">
                            We present a unified framework for solving partial differential equations (PDEs) using
                            video-inpainting diffusion transformer models. Unlike existing methods that devise
                            specialized strategies for either forward or inverse problems under full or partial
                            observation, our
                            approach unifies these tasks under a single, flexible generative framework. Specifically, we
                            recast PDE-solving as a generalized inpainting problem, e.g., treating forward prediction as
                            inferring missing spatiotemporal information of future states from initial conditions. To
                            this end, we design a transformer-based architecture that conditions on arbitrary patterns
                            of known data to infer missing values across time and space. Our method proposes pixel-space
                            video diffusion models for fine-grained, high-fidelity inpainting and conditioning, while
                            enhancing computational efficiency through hierarchical modeling. Extensive experiments show
                            that our video inpainting-based diffusion model offers an accurate and versatile solution
                            across a wide range of PDEs and problem setups, outperforming state-of-the-art baselines.
                        </p>
                    </div>
                </div>
            </div>
        </div>
    </section>

    <!--     <section class="section tight-section">
        <div class="container is-max-desktop no-padding">
            <div class="hero-body no-padding">
                <img src="./static/images/teaser.png" style="margin-bottom: 0.5rem;">
                <div class="content has-text-justified" style="margin-top: 0.5rem;">
                    <p style="margin: 0;">
                        <strong>VideoPDE pipeline.</strong>
                        We cast PDE solving as a video inpainting task. Our Hierarchical Video Diffusion Transformer
                        (HV-DiT) denoises initial noise into a full video, conditioned on pixel-level sparse
                        measurements. Its ability to handle arbitrary input patterns enables flexible application to
                        diverse PDE scenarios, including forward, inverse, and continuous measurement tasks.
                    </p>
                </div>
            </div>
        </div>
    </section>
 -->
    <section class="section tight-section">
        <div class="container is-max-desktop no-padding">
            <div class="hero-body no-padding">
                <img src="./static/images/table-1.png" style="margin-bottom: 0.5rem;">
                <div class="content has-text-justified" style="margin-top: 0.5rem;">
                    <p style="margin: 0;">
                        <strong>Conceptual comparison of PDE-solving methods.</strong>
                        Neural operator methods struggle with partial inputs. Only PINN and VideoPDE handle forward,
                        inverse, and continuous measurements flexibly. Generative baselines focus on reconstructing one
                        or two frames (instead of dense temporal frames) and are often not designed for forward
                        prediction, where VideoPDE excels. The forward error is measured on the Navier-Stokes dataset.
                    </p>
                </div>
            </div>
        </div>
    </section>

    <section class="section">
        <div class="container is-max-desktop">
            <div class="columns is-centered">
                <div class="column is-full-width">
                    <h2 class="title is-4">Kolmogorov Flow Forward Prediction</h2>
                    <div class="content has-text-justified" style="margin-top: 0.5rem;">
                        Using our video inpainting framework, we can predict the future frames from the first frame
                        initial condition. For the complex Kolmogorov Flow, VideoPDE performs noticeably better than
                        prior ML-based methods.
                    </div>
                    <div class="media-row">
                        <div class="media-container">
                            <img src="./static/videos/100frames/input.png" alt="First image">
                            <div class="caption">Input (First Frame)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/100frames/deeponet.mp4" type="video/mp4">
                            </video>
                            <div class="caption">DeepONet</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/100frames/fno.mp4" type="video/mp4">
                            </video>
                            <div class="caption">FNO</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/100frames/pino.mp4" type="video/mp4">
                            </video>
                            <div class="caption">PINO</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/100frames/ours.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Ours</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/100frames/gt.mp4" type="video/mp4">
                            </video>
                            <div class="caption">GT</div>
                        </div>
                    </div>
                </div>
            </div>

            <div class="columns is-centered">
                <div class="column is-full-width">
                    <h2 class="title is-4">Kolmogorov Flow Forward Prediction from 3% Observation</h2>
                    <div class="content has-text-justified" style="margin-top: 0.5rem;">
                        Thanks to our flexible video inpainting framework, VideoPDE can predict the full field future
                        frames from a partial pixels of the initial condition frame, 3% shown here. The SOTA in the
                        forward modeling, PINO, is given the interpolated first frame, which performs significantly
                        worse than our generative approach.
                    </div>
                    <div class="media-row">
                        <div class="media-container">
                            <img src="./static/videos/kf_0.03_fwd_100frame/input.png" alt="First image">
                            </video>
                            <div class="caption">Input (First Frame)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.03_fwd_100frame/pino.mp4" type="video/mp4">
                            </video>
                            <div class="caption">PINO </div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.03_fwd_100frame/ours.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Ours</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/kf_0.03_fwd_100frame/gt.mp4" type="video/mp4">
                            </video>
                            <div class="caption">GT</div>
                        </div>
                    </div>
                </div>
            </div>

            <div class="columns is-centered">
                <div class="column is-full-width">
                    <h2 class="title is-4">Wave-Equation Inverse Modeling</h2>
                    <div class="content has-text-justified" style="margin-top: 0.5rem;">
                        Similarly, our unified framework allows for inverse prediction, where we predict the past from
                        the future observations. Here, from the last frame, we accurately predict the previous frames.
                    </div>
                    <div class="media-row">
                        <div class="media-container">
                            <img src="./static/videos/wave-layer-inverse/input.png" alt="First image">
                            <div class="caption">Input (Last Frame)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/wave-layer-inverse/deeponet.mp4" type="video/mp4">
                            </video>
                            <div class="caption">DeepONet</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/wave-layer-inverse/fno.mp4" type="video/mp4">
                            </video>
                            <div class="caption">FNO</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/wave-layer-inverse/pino.mp4" type="video/mp4">
                            </video>
                            <div class="caption">PINO</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/wave-layer-inverse/ours.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Ours</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/wave-layer-inverse/gt.mp4" type="video/mp4">
                            </video>
                            <div class="caption">GT</div>
                        </div>
                    </div>
                </div>
            </div>

            <div class="columns is-centered">
                <div class="column is-full-width">
                    <h2 class="title is-4">Navier–Stokes Continuous 1% observations</h2>
                    <div class="content has-text-justified" style="margin-top: 0.5rem;">
                        In this Navier-Stokes experiments, similar to the teaser videos, 1% of the pixels provide
                        continuous sensor readings, from which VideoPDE almost perfectly reconstructs the full field
                        solution, noticeably better than SOTA methods for this task.
                    </div>

                    <div class="media-row">
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/masked.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Input Video (1% pixels)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/dpde.mp4" type="video/mp4">
                            </video>
                            <div class="caption">DiffusionPDE (Ext.)</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/shu.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Shu et al.</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/zhuang.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Zhuang et al.</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/ours.mp4" type="video/mp4">
                            </video>
                            <div class="caption">Ours</div>
                        </div>
                        <div class="media-container">
                            <video controls loop autoplay muted>
                                <source src="./static/videos/ns-0.99/gt.mp4" type="video/mp4">
                            </video>
                            <div class="caption">GT</div>
                        </div>
                    </div>
                </div>
            </div>


            <div id="more-results">
                <a href="more_results.html" class="external-link button is-rounded is-dark">
                    More Results
                </a>
            </div>

    </section>

    <footer class="footer">
        <div class="container">
            <div class="columns is-centered">
                <div class="column is-8">
                    <div class="content">
                        <p>
                            This website is adapted from the <a
                                href="https://github.com/nerfies/nerfies.github.io">Nerfies
                                website</a>.
                        </p>
                    </div>
                </div>
            </div>
        </div>
    </footer>

    <script>
        document.addEventListener('DOMContentLoaded', () => {
            const delayOnLastFrame = 4000;
            const delayOnFirstFrame = 2000;

            document.querySelectorAll('video').forEach(video => {
                video.controls = false;
                video.loop = false;
                video.playbackRate = 0.75;

                // Your custom "loop with delays" logic
                video.addEventListener('ended', () => {
                    setTimeout(() => {
                        video.currentTime = 0;
                        setTimeout(() => {
                            video.play();
                        }, delayOnFirstFrame);
                    }, delayOnLastFrame);
                });
            });
        });
    </script>
</body>

</html>