<!DOCTYPE html>
<html lang="en">

<script type="text/javascript">
    google.load("jquery", "1.3.2");
</script>

<head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <link href="https://fonts.googleapis.com/css2?family=Titillium+Web:wght@300;400;600;700&display=swap"
        rel="stylesheet">
    <link rel="preconnect" href="https://fonts.googleapis.com/">
    <link rel="preconnect" href="https://fonts.gstatic.com/" crossorigin="">

    <title>Retinking Prompt Design for Inference-time Scaling
        in Text-to-visual Generation</title>
    <style>
        html {
            scroll-behavior: smooth;
        }

        .nav-button {
            padding: 0.4rem 0.8rem;
            background: #f5f5f5;
            border-radius: 6px;
            text-decoration: none;
            font-size: 0.9rem;
            font-weight: 600;
            color: #333;
            border: none;
            cursor: pointer;
            transition: background 0.2s ease;
        }

        .nav-button:hover {
            background: #ddd;
        }

        /* Optional: Make refresh button stand out slightly */
        .refresh-button {
            background: #e5e5e5;
            font-weight: 700;
        }

        .refresh-button:hover {
            background: #ccc;
        }


        /* --- Sticky Global Header --- */
        .global-header {
            position: fixed;
            top: 0;
            left: 0;
            right: 0;

            display: flex;
            align-items: center;
            justify-content: space-between;
            gap: 1rem;

            padding: 1.2rem 1.5rem;
            /* ↑ 상하 패딩 키워서 높이 커짐 */
            z-index: 1000;
            backdrop-filter: blur(8px);
            background: rgba(245, 200, 200, 0.75);
            box-shadow: 0 6px 20px rgba(0, 0, 0, 0.12);
            border-bottom: 1px solid rgba(0, 0, 0, 0.06);
        }

        .header-left {
            display: flex;
            align-items: center;
            gap: 0.75rem;
            min-width: 0;
        }

        .header-title {
            font-weight: 700;
            font-size: 1.25rem;
            color: #1a202c;
            white-space: nowrap;
            overflow: hidden;
            text-overflow: ellipsis;
        }

        .header-qrs {
            display: flex;
            align-items: center;
            gap: 1rem;
        }

        .qr img {
            width: 120px;
            height: 120px;
            object-fit: contain;
            border-radius: 8px;
            box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
            background: #fff;
        }


        .qr {
            display: flex;
            flex-direction: column;
            align-items: center;
            text-align: center;
            gap: 0.5rem;
        }

        .qr-divider {
            width: 2px;
            height: 80px;
            background-color: rgba(0, 0, 0, 0.1);
        }

        .qr-label {
            font-size: 0.9rem;
            color: #2d3748;
            font-weight: 600;
        }

        body.with-fixed-header {
            padding-top: 72px;
        }

        @media (max-width: 768px) {
            .header-qrs .qr .qr-label {
                display: none;
            }

            .qr img {
                width: 40px;
                height: 40px;
            }

            body.with-fixed-header {
                padding-top: 64px;
            }
        }


        body {
            font-family: "Titillium Web", "HelveticaNeue-Light", "Helvetica Neue Light", "Helvetica Neue", Helvetica, Arial, "Lucida Grande", sans-serif;
            font-weight: 300;
            font-size: 17px;
            margin-left: auto;
            margin-right: auto;
        }


        .container {
            max-width: 1200px;
            margin: 0 auto;
            padding: 2rem;
        }

        .header {
            text-align: center;
            margin-bottom: 3rem;
            color: rgb(0, 0, 0);
        }

        .header h1 {
            font-size: 3.2rem;
            font-weight: 500;
            margin-bottom: 0.5rem;
            line-height: 1.2;
        }

        .header h3 {
            font-size: 1.5rem;
            font-weight: 300;
            margin-bottom: 0.5rem;
            color: rgb(109, 108, 108);
            line-height: 1.2;
        }

        .header p {
            font-size: 1.15rem;
            opacity: 0.9;
            max-width: 700px;
            margin: 0 auto;
        }

        .section {
            background: transparent;
            padding: 0;
            margin-bottom: 2rem;
            list-style: none;
        }

        .subsection {
            background: white;
            border-radius: 20px;
            padding: 2rem;
            margin-bottom: 2rem;
            list-style: none;
        }

        .section-title {
            font-size: 2rem;
            font-weight: 700;
            margin: 0 0 0.75rem 0;
            color: #111;
            padding-bottom: 0.25rem;
            border-bottom: 1px solid #4d4d4d;
        }


        .section p {
            font-size: 1.2rem;
            line-height: 1.6;
            color: #000000;
            margin-bottom: 1rem;
        }

        .subsection {
            background: #fff;
            border-radius: 16px;
            padding: 1.25rem;
            margin: 1rem 0 1.25rem;
            box-shadow: 0 10px 28px rgba(0, 0, 0, 0.12);
        }




        .subsection-title {
            position: relative;
            display: block;
            width: fit-content;
            margin: 1rem auto;
            text-align: center;

            font-size: 2rem;
            font-weight: 600;
            color: #222;

            padding-bottom: 0.35rem;
        }

        .subsection-title::after {
            content: "";
            position: absolute;
            left: -10%;
            bottom: 0;
            width: 120%;
            height: 0.5px;
            background: linear-gradient(to right, #636363, #b5b5b5, #636363);
            border-radius: 1.5px;
        }

        .subsection p {
            font-size: 1rem;
            line-height: 1.6;
            color: #333;
            margin: 0.5rem 0;
        }


        .subsection+.subsection {
            margin-top: 1rem;

        }

        .media-label {
            padding: 0.6rem 0.8rem;
            font-weight: 700;
            font-size: 1.2rem;
            text-align: center;
            border-radius: 8px 8px 0 0;
            color: white;
        }

        .baseline {
            background: linear-gradient(135deg, #ffa94d, #f9844a);
        }

        .ours {
            background: linear-gradient(135deg, #6c8efb, #5a7bca);
        }

        .static-image {
            width: 100%;
            max-height: 600px;
            object-fit: contain;
            border-radius: 10px;
        }

        .video-comparison {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 2rem;
        }

        .video-wrapper {
            background: white;
            border-radius: 15px;
            overflow: hidden;
            box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15);
            transition: transform 0.3s ease;
        }


        .video-frame {
            width: 100%;
            height: 300px;
            object-fit: cover;
            border: none;
        }

        .image-wrapper {
            position: relative;
            display: flex;
            flex-direction: column;
            align-items: center;
            padding-top: 2.5rem;
        }

        .video-wrapper:hover {
            transform: translateY(-6px) scale(1.02);
        }

        .image-comparison {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 1.5rem 2rem;
            align-items: start;
        }

        .prompt-caption {
            grid-column: span 2;
            text-align: center;
            margin-bottom: 0;
        }

        .prompt-caption p {
            font-size: 1.5rem;
            font-weight: 300;
            color: #000205;
            margin: 0;
            line-height: 1.;
            font-style: italic;
        }

        .prompt-caption h3 {
            font-size: 1rem;
            font-weight: 500;
            color: #2d3748;
            margin: 0;
            line-height: 1.1;
        }

        .image-wrapper img.static-image {
            width: 100%;
            height: auto;
            border-radius: 10px;
            box-shadow: 0 10px 25px rgba(0, 0, 0, 0.1);
        }

        .image-pair {
            display: flex;
            gap: 1rem;
            justify-content: center;
        }

        .image-pair .static-image {
            width: 50%;
        }

        .three-image-layout {
            display: grid;
            grid-template-columns: 1.0fr auto 1.0fr 1.0fr;
            align-items: center;
            column-gap: 0.1rem;
        }

        .three-image-layout>* {
            min-width: 0;
        }

        .separator {
            display: flex;
            align-items: center;
            justify-content: center;
            color: #888;
            font-size: 2rem;
            margin-left: -0.5rem;
        }

        .img {
            width: 100%;
            height: auto;
            max-height: none;
            object-fit: contain;
            border-radius: 8px;
            display: block;
        }

        .image-with-caption {
            display: flex;
            flex-direction: column;
            align-items: center;
            gap: 0.3rem;
        }

        .image-with-caption figcaption {
            font-size: 1.5rem;
            color: #555;
            text-align: center;
        }

        /* .section,
        .container {
            overflow-x: hidden;
        } */

        @media (max-width: 768px) {
            .video-comparison {
                grid-template-columns: 1fr;
            }

            .image-comparison {
                grid-template-columns: 1fr;
            }

            .image-pair {
                flex-direction: column;
            }

            .header h1 {
                font-size: 2rem;
            }

            .section {
                padding: 1.5rem;
            }
        }
    </style>
</head>

<body class="with-fixed-header">

    <div class="global-header">
        <div class="header-left">
            <span class="header-title">Retinking Prompt Design for Inference-time Scaling
                in Text-to-visual Generation</span>
        </div>

        <div class="header-right">
            <a href="#scaling-behaviors" class="nav-button">Scaling Behaviors</a>
            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
            <a href="#text-to-image" class="nav-button">Text-to-image</a>
            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
            <a href="#text-to-video" class="nav-button">Text-to-video</a>
            &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
            <button class="nav-button refresh-button" onclick="refreshPage()">⟳ Refresh</button>

        </div>
    </div>

    <div class="container">
        <div class="header">
            <h1>Rethinking Prompt Design for Inference-time Scaling <br>in Text-to-visual Generation</h1>

            <h3>TL; DR: Scaling visuals alone plateaus - revise prompts alongside visuals to unlock the best results.</h3>
        </div>


        <div class="section">
            <h2 id="scaling-behaviors" class="section-title">Scaling Behaviors</h2>
            <p> <strong>Scaling visuals with prompts redesigned corresponding to the scaled visuals → break the
                    plateau.</strong><br>
                Simply scaling visuals with a fixed prompt quickly hits a performance ceiling - outputs keep missing
                parts of the prompt even as compute grows. By redesigning the prompt corresponding to the scaled
                visuals, we break through this plateau, achieving steadily improving generations and much higher
                prompt-adherence for both seen and unseen rewards as compute scales.</p>
            <img src="resources/scaling_behaviors/nfe_quan.png"
                alt="Image Comparison" class="static-image"
                style="width: 80%; max-height: 400px; display:block; margin:0 auto;" />
            <br><br>
            <p>
                <strong> Compute scales, but prompts must too - that’s how we get the shoelace-free shoe.</strong><br>
                No matter how much compute we scale, a fixed prompt still cannot generate <em>a shoe with no laces</em>.
                By redesigning the prompt to explicitly address the missing visual patterns at the new scale and
                emphasize how "no laces" should be realized, we overcome this limitation and produce faithful
                generations.
                <img src="resources/scaling_behaviors/nfe_qual.png"
                    alt="Image Comparison" class="static-image"
                    style="width: 80%; max-height: 400px; display:block; margin:0 auto;" />

        </div>

        <br><br>

        <div class="section">
            <h2 id="text-to-image" class="section-title">Scaling in Text-to-image Generation</h2>

            <h3 class="subsection-title">Effect of Prompt Redesign on Flux.1-dev</h3>
            <div class="subsection">
                <div class="prompt-caption">
                    <p>"A mother teaches her two children; the one without a hat looks more frustrated."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base6.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon6.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours6.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A boy looks at an aquarium with no fish."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base5.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon5.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours5.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A pencil holder with more pens than pencils."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base1.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon1.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours1.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"A bookshelf with no books, only picture frames."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base2.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon2.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours2.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"In a bright bedroom, there are no yellow pillows on the bed."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base3.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon3.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours3.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"In a room, all the chairs are occupied except one."</p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/base4.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/bon4.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_base/ours4.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS (Ours)</figcaption>
                    </figure>
                </div>

            </div>

            <br><br>
            <h3 class="subsection-title">Redesigned Prompts > Standard Prompt Expansion</h3>
            <p>We compare our prompt redesign - which analyzes the scaled visuals and updates the prompt accordingly -
                against standard prompt expansion that simply lengthens the original prompt.
                <strong style="color: rgb(200, 0, 0);">The * symbol denotes results using standard prompt expansion where scaling begins from the expanded
                prompt.</strong>
            </p>
            <div class="subsection">

                <div class="prompt-caption">
                    <p>"The balls on the table have a greater variety of colors than the ones on the floor."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base5.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon5.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours5.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"Two excited elephants to the right of a lost giraffe."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base1.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon1.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours1.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"A monkey with a backpack is jumping from one smaller three to another larger tree."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base2.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon2.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours2.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"A farm with a barn that does not shelter any sheep."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base3.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon3.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours3.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A bed without the usual cat sleeping on it."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base4.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon4.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours4.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"The two lay in bed, the long-haired one asleep, the short-haired one still awake."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/base6.png" alt="Main Image" class="img">
                        <figcaption>Flux.1-dev*</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/bon6.png" alt="Side Image 1" class="img">
                        <figcaption>Best-of-N* (Baseline)
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i_expand/ours6.png" alt="Side Image 2" class="img">
                        <figcaption>PRIS* (Ours)</figcaption>
                    </figure>
                </div>

            </div>
            
            <br><br>
            <h3 class="subsection-title">PRIS + T2I Inference-Time Scalings: Superior Results at the
                Same NFE</h3>
            <p>
                Our prompt redesign (PRIS) complements other inference-time scaling methods (e.g., SMC, RBF)
                that expand the visual search space with a fixed prompt, further improving generation quality, including
                both prompt adherence and aesthetics, under the same NFE budget.
            </p>

            <div class="subsection">
                <div class="prompt-caption">
                    <p>"A woman in a wheelchair is taller than the boy next to her."
                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon6.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc6.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours6.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A child not building a sandcastle at the beach."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon5.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc5.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours5.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A kitchen with a larger quantity of milk than juice."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon1.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc1.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours1.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"A tissue pack shows two cartoon characters: one in a red dress on the left, one without on the
                        right."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon2.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc2.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours2.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"Four cupcakes with sprinkles on a plate with two forks."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon3.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc3.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours3.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"In an early morning park, a man in a grey and white tracksuit is not running."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/bon4.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/smc4.png" alt="Side Image 1" class="img">
                        <figcaption>SMC
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/SMC/ours4.png" alt="Side Image 2" class="img">
                        <figcaption>SMC + PRIS</figcaption>
                    </figure>
                </div>

            </div>

            <div class="subsection">
                <div class="prompt-caption">
                    <p>"A teddy dog and a Persian cat watch a burning table, with the teddy dog at a farther distance."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon6.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf6.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours6.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"Four roses in a clear glass vase, all of which are red, and all of which are not open."


                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon5.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf5.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours5.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"A clock with no hands to tell the time."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon1.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf1.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours1.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"A shoe rack without any red pairs of shoes on it."


                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon2.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf2.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours2.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>

                <div class="prompt-caption">
                    <p>"There is a large fish aquarium in the center of the luxurious living room, but there are no fish
                        in it."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon3.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf3.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours3.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>


                <div class="prompt-caption">
                    <p>"Two frogs on a lotus leaf in a pond, and the one who is drinking is in front of the one who is
                        not."

                    </p>
                </div>
                <div class="three-image-layout">
                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/bon4.png" alt="Main Image" class="img">
                        <figcaption>Best-of-N</figcaption>
                    </figure>

                    <div class="separator">→</div>

                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/rbf4.png" alt="Side Image 1" class="img">
                        <figcaption>RBF
                        </figcaption>
                    </figure>


                    <figure class="image-with-caption">
                        <img src="resources/t2i/RBF/ours4.png" alt="Side Image 2" class="img">
                        <figcaption>RBF + Ours</figcaption>
                    </figure>
                </div>

            </div>

        </div>

        <br><br>
        <div class="section">
            <h2 id="text-to-video" class="section-title">Scaling in Text-to-video Generation</h2>


            <h3 class="subsection-title">Prompt Redesign on Wan2.1-1.3B: Better Prompt Adherence</h3>
            <div class="subsection">
                <div class="video-comparison">

                    <div class="prompt-caption">
                        <p>"Garden, pan-left."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="s_video1" autoplay muted playsinline>
                            <source src="resources/t2v/wan_small/va_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="s_video2" autoplay muted playsinline>
                            <source src="resources/t2v/wan_small/ours_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="prompt-caption">
                        <p>"The glass car window changed into a wooden car window."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="s_video3" autoplay muted playsinline>
                            <source src="resources/t2v/wan_small/va_2.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="s_video4" autoplay muted playsinline>
                            <source src="resources/t2v/wan_small/ours_2.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                </div>
            </div>

            <br><br>

            <h3 class="subsection-title">Prompt Redesign on Wan2.1-14B: Better Prompt Adherence</h3>
            <div class="subsection">
                <div class="video-comparison">
                    <div class="prompt-caption">
                        <p>"A person is working on a project, and then suddenly starts cooking dinner."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="video1" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/va_0.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="video2" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/ours_0.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="prompt-caption">
                        <p>"A car changes from black to white."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="video3" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/va_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="video4" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/ours_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="prompt-caption">
                        <p>"A person is turning on the desk lamp."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="video5" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/va_2.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="video6" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/ours_2.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="prompt-caption">
                        <p>"A person is breaking a chocolate bar into pieces."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="video7" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/va_4.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="video8" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/ours_4.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="prompt-caption">
                        <p>"The moon changes from silver to yellow."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">Best-of-N</div>
                        <video class="video-frame" id="video9" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/va_5.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">PRIS (Ours)</div>
                        <video class="video-frame" id="video10" autoplay muted playsinline>
                            <source src="resources/t2v/wan_large/ours_5.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>


                </div>
            </div>

            <br><br>
            <h3 class="subsection-title">PRIS + T2V Inference-Time Scalings: Superior Results at
                the Same NFE</h3>
            <p>
                Our prompt redesign (PRIS) complements other text-to-video (T2V) inference-time scaling methods (e.g.,
                EvoSearch),
                boosting prompt adherence and overall generation quality under the same NFE budget.
            </p>
            <div class="subsection">
                <div class="video-comparison">
                    <div class="prompt-caption">
                        <p>"A butterfly’s wing change from yellow to white."
                        </p>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label baseline">EvoSearch</div>
                        <video class="video-frame" id="evo_video1" autoplay muted playsinline>
                            <source src="resources/t2v/evo/evo_0.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">EvoSearch + PRIS</div>
                        <video class="video-frame" id="evo_video2" autoplay muted playsinline>
                            <source src="resources/t2v/evo/ours_0.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    
                    <div class="prompt-caption">
                        <p>"A person is opening the window."
                        </p>
                    </div>


                    <div class="video-wrapper">
                        <div class="media-label baseline">EvoSearch</div>
                        <video class="video-frame" id="evo_video3" autoplay muted playsinline>
                            <source src="resources/t2v/evo/evo_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                    <div class="video-wrapper">
                        <div class="media-label ours">EvoSearch + PRIS</div>
                        <video class="video-frame" id="evo_video4" autoplay muted playsinline>
                            <source src="resources/t2v/evo/ours_1.mp4" type="video/mp4" />
                            Your browser does not support the video tag.
                        </video>
                    </div>

                </div>
            </div>
        </div>


        <script>
            function refreshPage() {

                history.pushState("", document.title, window.location.pathname + window.location.search);
                window.scrollTo({
                    top: 0,
                    behavior: 'smooth'
                });
            }

            const videos = [
                ...Array.from({
                    length: 10
                }, (_, i) => document.getElementById(`video${i + 1}`)),
                ...Array.from({
                    length: 4
                }, (_, i) => document.getElementById(`s_video${i + 1}`)),
                ...Array.from({
                    length: 4
                }, (_, i) => document.getElementById(`evo_video${i + 1}`))
            ];

            Promise.all(
                videos.map(v => new Promise(res => v.oncanplay = res))
            ).then(() => {
                syncAndPlay();
            });

            function syncAndPlay() {
                videos.forEach(v => {
                    v.currentTime = 0;
                    v.play();
                });
            }

            videos.forEach(v => v.addEventListener('ended', syncAndPlay));

            setInterval(() => {
                const referenceTime = videos[0].currentTime;

                const maxDiff = Math.max(...videos.map(v => Math.abs(v.currentTime - referenceTime)));

                if (maxDiff > 0.1) {
                    const avg = videos.reduce((sum, v) => sum + v.currentTime, 0) / videos.length;
                    videos.forEach(v => v.currentTime = avg);
                }
            }, 1000);
        </script>


</body>

</html>