<link href="https://fonts.cdnfonts.com/css/chalkduster" rel="stylesheet">
<style>
    @import url('https://fonts.cdnfonts.com/css/chalkduster');
</style>

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
    <!--  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">-->
    <title>Supplementary Material #1249</title>
    <style>
        table {
            margin: auto;
            border-collapse: collapse;
            border-spacing: 0;
            width: 800px;

        }

        th, td {
            padding: 0;
            margin: 0;
        }

        video {
            display: block; /* turn off inline-level whitespace under the tag */
            margin: auto;
        }
    </style>
</head>


<body>

<div class="page-container">
    <h1 align="center">Taming Diffusion Transformer for Efficient Mobile Video Generation in Seconds</h1>
    <h2 align="center">Paper ID #1249 </h2>
    <h2 align="center">Supplementary Material</h2>

    <p align="center">&nbsp;</p>

    <ul>
        <li><a href="#comparison_container" style="font-size:x-large">Comparisons</a></li>
        <li><a href="#more_results_container" style="font-size:x-large">More Results</a></li>
        <li><a href="#demo_container" style="font-size:x-large">Mobile Demo</a></li>
    </ul>

    <!------------------ BEGIN SECTION ------------------>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <hr>
    <h2 id="comparison_container" align="center"><a name="comparison_results" id="comparison_results"></a>Comparisons
    </h2>

    <table>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                We provide video comparison results of our method with other methods in the paper. <br>
                For LTX-Video, we follow the diffuser example to generation samples using the same prompt.
            </td>
        </tr>
        <tr>
            <th>LTX-Video<a href="#ref-ltx-video">[1]</a></th>
            <th>CogVideoX-2B<a href="#ref-cogvideox">[2]</a></th>
            <th>Wan2.1-1.3B<a href="#ref-wan2.1">[3]</a></th>
            <th><p style="color:red;">Ours</p></th>
        </tr>
        <tr>
            <th>
                <video id="m_comp_1" height="200"
                       src="assets/comparison/ltx/3D%20animation%20of%20a%20small,%20round,%20fluffy%20creature%20with%20big,%20expressive%20eyes%20explores%20a%20vibrant,%20enchanted%20forest.%20The%20creature,%20a%20whimsical%20blend%20of%20a%20rabbit%20and%20a%20squirrel,%20has%20soft%20blue%20fur%20and%20a%20bush-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_1" height="200"
                       src="assets/comparison/cogvideo-2b/3D%20animation%20of%20a%20small,%20round,%20fluffy%20creature%20with%20big,%20expressive%20eyes%20explores%20a%20vibrant,%20enchanted%20forest.%20The%20creature,%20a%20whimsical%20blend%20of%20a%20rabbit%20and%20a%20squirrel,%20has%20soft%20blue%20fur%20and%20a%20bush-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_1" height="200"
                       src="assets/comparison/wan/3D%20animation%20of%20a%20small,%20round,%20fluffy%20creature%20with%20big,%20expressive%20eyes%20explores%20a%20vibrant,%20enchanted%20forest.%20The%20creature,%20a%20whimsical%20blend%20of%20a%20rabbit%20and%20a%20squirrel,%20has%20soft%20blue%20fur%20and%20a%20bush-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_1" height="200"
                       src="assets/comparison/ours/3D%20animation%20of%20a%20small,%20round,%20fluffy%20creature%20with%20big,%20expressive%20eyes%20explores%20a%20vibrant,%20enchanted%20forest.%20The%20creature,%20a%20whimsical%20blend%20of%20a%20rabbit%20and%20a%20squirrel,%20has%20soft%20blue%20fur%20and%20a%20bush-0.mp4"
                       autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    3D animation of a small, round, fluffy creature with big, expressive eyes explores a vibrant,
                    enchanted forest. The creature, a whimsical blend of a rabbit and a squirrel, has soft blue fur and
                    a bush.
                </div>
            </td>
        </tr>

        <tr>
            <th>
                <video id="m_comp_2" height="200"
                       src="assets/comparison/ltx/A%20cat%20sitting%20at%20a%20grand%20piano,%20elegantly%20playing%20a%20classical%20piece%20with%20its%20paws.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_2" height="200"
                       src="assets/comparison/cogvideo-2b/A%20cat%20sitting%20at%20a%20grand%20piano,%20elegantly%20playing%20a%20classical%20piece%20with%20its%20paws.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_2" height="200"
                       src="assets/comparison/wan/A%20cat%20sitting%20at%20a%20grand%20piano,%20elegantly%20playing%20a%20classical%20piece%20with%20its%20paws.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_2" height="200"
                       src="assets/comparison/ours/A%20cat%20sitting%20at%20a%20grand%20piano,%20elegantly%20playing%20a%20classical%20piece%20with%20its%20paws.-0.mp4"
                       autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    A cat sitting at a grand piano, elegantly playing a classical piece with its paws.
                </div>
            </td>
        </tr>

        <tr>
            <th>
                <video id="m_comp_3" height="200"
                       src="assets/comparison/ltx/A%20corgi%20vlogging%20itself%20in%20tropical%20Maui.-0.mp4" autoplay
                       loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_3" height="200"
                       src="assets/comparison/cogvideo-2b/A%20corgi%20vlogging%20itself%20in%20tropical%20Maui.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_3" height="200"
                       src="assets/comparison/wan/A%20corgi%20vlogging%20itself%20in%20tropical%20Maui.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_3" height="200"
                       src="assets/comparison/ours/A%20corgi%20vlogging%20itself%20in%20tropical%20Maui.-0.mp4" autoplay
                       loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    A corgi vlogging itself in tropical Maui.
                </div>
            </td>
        </tr>


        <tr>
            <th>
                <video id="m_comp_4" height="200"
                       src="assets/comparison/ltx/A%20movie%20trailer%20featuring%20the%20adventures%20of%20the%2030%20year%20old%20space%20man%20wearing%20a%20red%20wool%20knitted%20motorcycle%20helmet,%20blue%20sky,%20salt%20desert,%20cinematic%20style,%20shot%20on%2035mm%20film,%20vivid%20colors.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_4" height="200"
                       src="assets/comparison/cogvideo-2b/A%20movie%20trailer%20featuring%20the%20adventures%20of%20the%2030%20year%20old%20space%20man%20wearing%20a%20red%20wool%20knitted%20motorcycle%20helmet,%20blue%20sky,%20salt%20desert,%20cinematic%20style,%20shot%20on%2035mm%20film,%20vivid%20colors.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_4" height="200"
                       src="assets/comparison/wan/A%20movie%20trailer%20featuring%20the%20adventures%20of%20the%2030-year-old%20spaceman%20wearing%20a%20red%20wool%20knitted%20motorcycle%20helmet,%20blue%20sky,%20salt%20desert,%20cinematic%20style,%20shot%20on%2035mm%20film,%20vivid%20colors.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_4" height="200"
                       src="assets/comparison/ours/A%20movie%20trailer%20featuring%20the%20adventures%20of%20the%2030%20year%20old%20space%20man%20wearing%20a%20red%20wool%20knitted%20motorcycle%20helmet,%20blue%20sky,%20salt%20desert,%20cinematic%20style,%20shot%20on%2035mm%20film,%20vivid%20colors.-0.mp4"
                       autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    A movie trailer featuring the adventures of the 30-year-old spaceman wearing a red wool knitted
                    motorcycle helmet, blue sky, salt desert, cinematic style, shot on 35mm film, vivid colors.
                </div>
            </td>
        </tr>


        <tr>
            <th>
                <video id="m_comp_5" height="200"
                       src="assets/comparison/ltx/A%20pair%20of%20lovebirds%20preening%20each%20other's%20feathers.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_5" height="200"
                       src="assets/comparison/cogvideo-2b/A%20pair%20of%20lovebirds%20preening%20each%20other's%20feathers.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_5" height="200"
                       src="assets/comparison/wan/A%20pair%20of%20lovebirds%20preening%20each%20other's%20feathers.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_5" height="200"
                       src="assets/comparison/ours/A%20pair%20of%20lovebirds%20preening%20each%20other's%20feathers.-0.mp4"
                       autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    A pair of lovebirds preening each other's feathers.
                </div>
            </td>
        </tr>


        <tr>
            <th>
                <video id="m_comp_6" height="200"
                       src="assets/comparison/ltx/A%20skeleton%20wearing%20a%20flower%20hat%20and%20sunglasses%20dances%20in%20the%20wild%20at%20sunset.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_6" height="200"
                       src="assets/comparison/cogvideo-2b/A%20skeleton%20wearing%20a%20flower%20hat%20and%20sunglasses%20dances%20in%20the%20wild%20at%20sunset.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_6" height="200"
                       src="assets/comparison/wan/A%20skeleton%20wearing%20a%20flower%20hat%20and%20sunglasses%20dances%20in%20the%20wild%20at%20sunset.-0.mp4"
                       autoplay loop muted></video>
            </th>
            <th>
                <video data-master-id="m_comp_6" height="200"
                       src="assets/comparison/ours/A%20skeleton%20wearing%20a%20flower%20hat%20and%20sunglasses%20dances%20in%20the%20wild%20at%20sunset.-0.mp4"
                       autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <td colspan="4" style="text-align: left; padding-bottom: 30px">
                <div style="display: inline-block">
                    <b>Prompt:</b>
                    A skeleton wearing a flower hat and sunglasses dances in the wild at sunset.
                </div>
            </td>
        </tr>

    </table>

    <!------------------ END SECTION ------------------>

    <!------------------ BEGIN SECTION ------------------>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <hr>
    <h2 id="more_results_container" align="center"><a name="more_results" id="more_results"></a>More Results</h2>

    <table>
        <tr>
            <td colspan="3">
                In this section, we provide more results of our model. <br>
            </td>
        </tr>

        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_01.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_02.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_03.mp4" autoplay loop muted></video>
            </th>
        </tr>


        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_04.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_05.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_06.mp4" autoplay loop muted></video>
            </th>
        </tr>


        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_07.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_08.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_09.mp4" autoplay loop muted></video>
            </th>
        </tr>


        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_10.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_11.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_12.mp4" autoplay loop muted></video>
            </th>
        </tr>

        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_13.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_14.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_15.mp4" autoplay loop muted></video>
            </th>
        </tr>


        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_16.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_17.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_18.mp4" autoplay loop muted></video>
            </th>
        </tr>

        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_19.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_20.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_21.mp4" autoplay loop muted></video>
            </th>
        </tr>

        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_22.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_23.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_24.mp4" autoplay loop muted></video>
            </th>
        </tr>

        <tr>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_25.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_26.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="200" src="assets/ours/horizontal/sample_27.mp4" autoplay loop muted></video>
            </th>
        </tr>
    </table>

    <table>
        <tr>
            <td colspan="5">
                Below, we provide more results of our mobile model. <br>
            </td>
        </tr>

        <tr>
            <th>
                <video height="240" src="assets/ours/vertical/sample_01.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_02.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_03.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_04.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_05.mp4" autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <th>
                <video height="240" src="assets/ours/vertical/sample_06.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_07.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_08.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_09.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_10.mp4" autoplay loop muted></video>
            </th>
        </tr>
        <tr>
            <th>
                <video height="240" src="assets/ours/vertical/sample_11.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_12.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_13.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_14.mp4" autoplay loop muted></video>
            </th>
            <th>
                <video height="240" src="assets/ours/vertical/sample_15.mp4" autoplay loop muted></video>
            </th>
        </tr>
    </table>

    <!--  <h3 align="center">Vertical Results</h3>-->
    <!--  <table>-->

    <!--    <tr>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_01.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_02.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_03.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_04.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_05.mp4" autoplay loop muted/></th>-->
    <!--      </tr>-->

    <!--      <tr>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_06.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_07.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_08.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_09.mp4" autoplay loop muted/></th>-->
    <!--        <th><video width="200" src="assets/ours/vertical/sample_10.mp4" autoplay loop muted/></th>-->
    <!--      </tr>-->

    <!--  </table>-->

    <!------------------ END SECTION ------------------>

    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <hr>
    <section class="section hero is-light">
        <div class="hero-body">
            <div class="container is-max-desktop">
                <div class="columns is-centered has-text-centered">
                    <div class="column is-center">
                        <h2 id="demo_container" class="title is-3" style="text-align: center">Mobile Demo on iPhone 16
                            Pro Max</h2>
                        <table>
                            <tbody align="center">
                            <tr>
                                <td>
                                    <video width="30%" src="assets/demo/demo.mp4" preload="auto"
                                           playsinline controls loop muted autoplay controlsList="nodownload"></video>
                                </td>
                            </tr>


                            </tbody>
                        </table>

                    </div>
                </div>
            </div>
        </div>
    </section>
    <p>&nbsp;</p>
    <p>&nbsp;</p>
    <p>&nbsp;</p>

    <h2 id="ref" align="left">References</h2>

    <p>
        <a name="ref-ltx-video" id="ref-ltx-video"></a>
        [1] Yoav HaCohen et al. "LTX-Video: Realtime Video Latent Diffusion." https://arxiv.org/abs/2501.00103 (2024).
    </p>

    <p>
        <a name="ref-cogvideox" id="ref-cogvideox"></a>
        [2] Yang, Zhuoyi, et al. "CogVideoX: Text-to-Video Diffusion Models with An Expert Transformer." ICLR (2025).
    </p>

    <p>
        <a name="ref-wan2.1" id="ref-wan2.1"></a>
        [3] Team Wan, et al. "Wan: Open and Advanced Large-Scale Video Generative Models." https://arxiv.org/abs/2503.20314 (2025)
    </p>


</div>

</body>
</html>