<!DOCTYPE html>
<html>

<head>
    <meta charset="utf-8">
    <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
    <!-- Replace the content tag with appropriate information -->
    <meta name="description" content="Frame-Level Captions for Long Video Generation with Complex Multi Scenes">
    <meta property="og:title" content="Frame-Level Captions for Long Video Generation with Complex Multi Scenes" />
    <meta property="og:description"
        content="Frame-Level Captions for Long Video Generation with Complex Multi Scenes" />

    <!-- Keywords for your paper to be indexed by-->
    <meta name="viewport" content="width=device-width, initial-scale=1">

    <title>Frame-Level Captions for Long Video Generation with Complex Multi Scenes</title>
    <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
    <link href="https://fonts.googleapis.com/css2?family=Playfair+Display:ital,wght@1,400&display=swap"
        rel="stylesheet">

    <link rel="stylesheet" href="static/css/bulma.min.css">
    <link rel="stylesheet" href="static/css/bulma-carousel.min.css">
    <link rel="stylesheet" href="static/css/bulma-slider.min.css">
    <link rel="stylesheet" href="static/css/fontawesome.all.min.css">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
    <link rel="stylesheet" href="static/css/index.css">
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/dreampulse/computer-modern-web-font@master/fonts.css">






    <style>
        .video-container {
            display: flex;
            justify-content: center;
            gap: 4px;
        }

        .italic {
            font-family: 'Playfair Display';
            font-style: italic;
        }

        .container {
            max-width: 1200px;
            margin: 0 auto;
            padding: 0 20px;
        }

        a {
            color: #ddd;
            text-decoration: underline;
        }

        a:visited {
            color: #ddd;
        }

        /* Media containers */
        .vimeo-container {
            border-radius: 8px;
            overflow: hidden;
            flex: 1 1 auto;
            min-height: 300px;
            width: 100%;
        }

        .media-grid {
            display: flex;
            gap: 20px;
            margin: 40px 0;
            align-items: stretch;
            flex-wrap: nowrap;
        }

        .media-grid.wrap {
            flex-wrap: wrap;
        }

        .media-item {
            display: flex;
            flex-direction: column;
            align-items: center;
            width: 100%;
        }

        .media-grid .media-item {
            flex: 1 1 0;
            min-width: 0;
        }

        .media-grid>.media-item:only-child {
            flex: 1 0 100%;
            max-width: 100%;
        }

        .image-container {
            height: 100%;
            display: flex;
            align-items: center;
            justify-content: center;
            overflow: hidden;
            width: 100%;
        }

        .media-item img,
        .media-item video {
            max-height: 100%;
            max-width: 100%;
            object-fit: contain;
            border-radius: 8px;
            display: block;
        }

        .media-caption {
            margin-top: 8px;
            font-size: 16px;
            color: #ddd;
            text-align: center;
            width: 100%;
        }

        /* Flex column grid */
        .flex-column-grid {
            display: flex;
            gap: 20px;
            margin: 40px 0;
            width: 100%;
        }

        .column {
            flex: 1;
            display: flex;
            flex-direction: column;
            align-items: center;
        }

        .column .image-container {
            width: 100%;
            position: relative;
            border-radius: 8px;
            overflow: hidden;
            background-color: #0f1112;
        }

        .column video,
        .column img {
            width: 100%;
            height: auto;
            display: block;
            border-radius: 8px;
        }

        .column .media-caption {
            margin-top: 10px;
        }

        /* Video player styles */
        .scene-video-container {
            display: flex;
            flex-direction: column;
            gap: 20px;
            width: 100%;
            border-radius: 8px;
            margin-bottom: 15px;
        }

        .video-player-section {
            width: 100%;
            position: relative;
            border-radius: 8px;
            overflow: hidden;
            background-color: #000;
        }

        .video-player-section video {
            width: 100%;
            display: block;
            border-radius: 8px;
        }

        .vimeo-embed {
            position: relative;
            width: 100%;
            border-radius: 8px;
            overflow: hidden;
        }

        .video-controls {
            position: relative;
            margin-top: -10px;
        }

        .video-timeline {
            position: relative;
            height: 6px;
            width: 100%;
            background-color: rgba(255, 255, 255, 0.2);
            cursor: pointer;
            border-radius: 3px;
        }

        .video-progress {
            position: absolute;
            height: 100%;
            width: 0%;
            background-color: #fff;
            border-radius: 3px;
            transition: width 0.1s ease;
        }

        .video-time {
            color: #fff;
            font-size: 16px;
            margin-top: 4px;
            text-align: right;
        }

        /* Scenes list styles */
        .scenes-list {
            width: 100%;
            height: 90px;
            overflow-x: auto;
            overflow-y: hidden;
            background-color: rgba(30, 30, 30, 0.9);
            border-radius: 8px;
            display: flex;
            flex-direction: row;
            margin-top: -10px;
            padding: 5px;
        }

        .scene-item {
            display: flex;
            flex-direction: column;
            padding: 5px;
            border-right: 1px solid #2c3237;
            cursor: pointer;
            transition: background-color 0.2s;
            min-width: 100px;
            max-width: 100px;
            border-radius: 4px;
            margin: 0 2px;
            align-items: flex-start;
            justify-content: flex-start;
        }

        .scene-item:hover,
        .scene-item.active {
            background-color: #2c3237;
        }

        .scene-thumbnail {
            /* This is for static img/video thumbnails */
            width: 80px;
            height: 45px;
            display: flex;
            align-items: center;
            justify-content: center;
            margin: 0 0 4px 0;
            border-radius: 4px;
            overflow: hidden;
            background-color: #0f1112;
        }

        .thumbnail-video {
            width: 100%;
            height: 100%;
            object-fit: cover;
            pointer-events: none;
            -webkit-touch-callout: none;
        }

        .scene-thumbnail img {
            max-width: 100%;
            max-height: 100%;
            object-fit: contain;
            display: block;
            border-radius: 0;
        }

        .scene-info {
            width: 100%;
            text-align: left;
        }

        .scene-title,
        .scene-description {
            font-size: 10px;
            white-space: nowrap;
            overflow: hidden;
            text-overflow: ellipsis;
        }

        .scene-description {
            color: #a0a0a0;
        }
    </style>
</head>

<body>
    <!-- title and author -->
    <section class="hero">
        <div class="hero-body">
            <div class="container is-max-desktop">
                <div class="columns is-centered">
                    <div class="column has-text-centered">
                        <h1 class="title is-2 publication-title">
                            Frame-Level Captions for Long Video Generation with Complex Multi Scenes
                        </h1>
                    </div>
                </div>
            </div>
        </div>
    </section>

    <!-- abstract -->
    <section class="section hero is-light">
        <div class="container is-max-desktop">
            <div class="columns is-centered has-text-centered">
                <div class="column is-four-fifths">
                    <h2 class="title is-3">Abstract</h2>
                    <div class="content has-text-justified">
                        <p>
                            Generating long videos that can show complex stories, like movie scenes from scripts, has
                            great promise and offers much more than short clips.
                            However, current methods that use autoregression with diffusion models often struggle
                            because their step-by-step process naturally leads to a serious error accumulation (drift).
                            Also, many existing ways to make long videos focus on single, continuous scenes, making them
                            less useful for stories with many events and changes.
                            This paper introduces a new approach to solve these problems. First, we propose a novel way
                            to annotate datasets at the frame-level,
                            providing detailed text guidance needed for making complex, multi-scene long videos. This
                            detailed guidance works with a Frame-Level Attention Mechanism to make sure text and video
                            match precisely.
                            In inference, we develop Parallel Multi-Window Denoising, a new method that handles
                            a long video as multiple overlapping windows.
                            These windows are processed in parallel, and the noise prediction in overlapping areas is
                            averaged, which allows bidirectional information interaction and introduces no error
                            accumulation.
                            A key feature is that each part (frame) within these windows can be guided by its own
                            distinct text prompt. Our training uses Diffusion Forcing to provide the model with the
                            ability to handle time flexibly.
                            We tested our approach on difficult VBench 2.0 benchmarks ("Complex Plots" and "Complex
                            Landscapes") based on the WanX2.1-T2V-1.3B model. The results show our method is better at
                            following instructions in complex, changing scenes and creates high-quality long videos.
                            We plan to share our dataset annotation methods and trained models with the research
                            community.
                        </p>
                    </div>
                </div>
            </div>
        </div>
    </section>


    <!-- Keyframe Generation ！！！！！！！！！！！！！_!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -->
    <section class="section hero">
        <div class="container has-text-centered">
            <h2 class="title is-3">A. Keyframe Generation; Quick changes of multiple scenes (up to 6 scenes) in 5s short
                video)</h2>
            <div class="video-container">
                <div>
                    <video autoplay controls muted loop width=100%>
                        <source src="static/vis/key_frame_1.mp4" type="video/mp4" />
                    </video>
                </div>
                <div>
                    <video autoplay controls muted loop width=100%>
                        <source src="static/vis/key_frame_2.mp4" type="video/mp4" />
                    </video>
                </div>
                <div>
                    <video autoplay controls muted loop width=100%>
                        <source src="static/vis/key_frame_3.mp4" type="video/mp4" />
                    </video>
                </div>
            </div>
            <br>
            <div class="image-wrapper">
                <img src="static/picture/key_frame.jpeg" width=100% alt="">
            </div>
            <br>
        </div>
    </section>
    <br><br><br><br><br>



    <!-- first frame last frame generation ！！！！！！！！！！！！！_!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -->
    <section class="section hero">
        <div class="container has-text-centered">
            <h2 class="title is-3">B. First-Frame-Last-Frame-to-Video (5s short video)
            </h2>
            <div class="video-container">
                <div>
                    <video autoplay controls muted loop width=100%>
                        <source src="static/vis/first_frame_last_frame_to_video_1.mp4" type="video/mp4" />
                    </video>
                </div>

                <div>
                    <video autoplay controls muted loop width=100%>
                        <source src="static/vis/first_frame_last_frame_to_video_2.mp4" type="video/mp4" />
                    </video>
                </div>
            </div>

            <br>
        </div>
    </section>
    <br><br><br><br><br>

    <!-- 5s vs 30s vs 30s!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -->
    <!-- <div class="container">
        <div class="media-grid">
            <div class="media-item">
                <div class="image-container">
                    <div class="scene-video-container">
                        <div class="video-player-section">
                            <video id="sceneVideoPlayer" loop muted playsinline controls preload="auto"
                                src="static\vis/3_Complex_Plot_13_1A young adventurer set off in search of five mysterious trial towers. He first arrived at the fire tower.mp4"
                                type="video/mp4"></video>
                        </div>

                        <div class="scenes-list">
                            <div class="scene-item active" data-frame="0">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">
                                    <div class="scene-description">0</div>
                                </div>
                            </div>

                            <div class="scene-item active" data-frame="17">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">
                                    <div class="scene-description">17</div>
                                </div>
                            </div>



                            <div class="scene-item" data-frame="37">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">37</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="57">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">57</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="77">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">77</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="97">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">97</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="117">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">117</div>
                                </div>
                            </div>

                            <div class="scene-item" data-frame="137">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">137</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="157">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">157</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="177">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">177</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="197">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">197</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="217">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">217</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="237">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">237</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="257">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">257</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="277">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">277</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="297">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">297</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="317">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">317</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="337">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">337</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="357">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">337</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="377">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">377</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="397">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">397</div>
                                </div>
                            </div>


                            <div class="scene-item" data-frame="481">
                                <div class="scene-thumbnail">
                                    <img src="static\picture/1.png" alt="Scene 1">
                                </div>
                                <div class="scene-info">

                                    <div class="scene-description">481</div>
                                </div>
                            </div>

                        </div>

                        <div class="video-controls">
                            <div class="video-timeline">
                                <div class="video-progress"></div>
                            </div>
                            <div class="video-time">0:00 / 0:00</div>
                        </div>

                        <h2 class="content has-text-justified">
                            A young adventurer set off in search of five mysterious trial towers. He first arrived at
                            the fire tower, solved the riddle of the fire giant, and earned the flame symbol. He then
                            crossed the icy lands, faced the ice queen, and earned the frost symbol. Next, he entered an
                            ancient temple, defeated the necromancer, and obtained the soul symbol. He then ventured
                            into the thunder mountain range, underwent the lightning god's trial, and earned the thunder
                            symbol. Finally, he reached the tower of light, faced the mysterious celestial being, and
                            earned the light symbol. The adventurer combined the five symbols, uncovered a hidden world,
                            and restored the lost glory.
                        </h2>


                    </div>
                </div>
            </div>
        </div>
    </div> -->

    <section class="section hero">
        <div class="container has-text-centered">
            <h1 class="title is-3">C. Long Video in Complex Settinigs (5s video-level prompt Vs. 30s video-level prompt Vs.
                ours 30s frame-level prompt)</h2>
                </h3>
                <div class="content has-text-justified">
                    Dynamic prompts, particularly frame-level prompts, offer significant convenience during inference because each latent unit directly maps to a corresponding prompt. This characteristic makes them highly suitable for methods such as FIFO, which involves single-latent window slides per denoising step, and for chunk-level auto-regressive approaches that support variable chunk sizes. Our proposed Parallel Multi-Window Denoising (PMWD) method also leverages this: for generating very long sequences, each latent within every parallel denoising window aligns easily with its specific prompt, facilitating effective information exchange in overlapping regions.

                    While these three approaches (FIFO, chunk-level auto-regression, and PMWD) are based on fixed-length sliding windows, this can present limitations. 
                    For instance, maintaining ID consistency can be challenging when the historical frames within the window are insufficient, or when objects disappear or are occluded in complex scenes. 
                    However, the primary focus of this paper is the flexibility and potential offered by frame-level prompts across the entire pipeline—encompassing dataset collection and construction, through to training and inference stages. 
                    Thus, we leave this issue for future work.
                    Should enhanced ID preservation be a priority, our frame-level prompt system can be readily augmented with techniques that expand the historical context, such as those employed by FramePack or KV caching methods.
                </div>
                <div class="image-wrapper">
                    <img src="static/picture/three_inference_mode.jpeg" width=100% alt="">
                </div>

                <br><br><br><br><br><br><br><br><br><br>
                
                <div class="content has-text-justified">
                    1. The camera enters a golden autumn forest, where the leaves have turned brilliant shades from gold to
                    orange-red. A few leaves drift down with the wind. Sunlight filters through the gaps in the trees,
                    casting dappled spots of light on the forest floor. The scene shifts to a field outside the forest,
                    where ripe rice stalks sway in the breeze, their golden heads bowing under the weight of the grain.
                    A
                    few wild rabbits dart between the rows, occasionally pausing to nibble on the grass. The camera
                    moves
                    again to a winding stream, its water crystal clear, with a few fallen leaves floating gently on the
                    surface. A soft breeze ripples the water, creating subtle waves. Continuing onward, the scene shifts
                    to
                    a hillside, where the distant mountain range is bathed in the warm light of autumn, and a village
                    can be
                    seen nestled at the foot of the mountains, with smoke curling from chimneys.
                </div>
                <div class="video-container">
                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis/1_Complex_Landscape_7_1_The camera enters a golden autumn forest, where the leaves have turned brilliant shades from gold to orange-red. A few leaves drift down with the wind. Sunlight filte.mp4"
                                type="video/mp4" />
                        </video>
                    </div>

                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis\2_Complex_Landscape_7_2_The camera enters a golden autumn forest, where the leaves have turned brilliant shades from gold to orange-red. A few leaves drift down with the wind. Sunlight filte.mp4"
                                type="video/mp4" />
                        </video>
                    </div>

                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis\3_Complex_Landscape_7_1_The camera enters a golden autumn forest, where the leaves have turned brilliant shades from gold to orange-red. A few leaves drift down with the wind. Sunlight filte.mp4"
                                type="video/mp4" />
                        </video>
                    </div>
                </div>
                <br><br><br>

                <div class="content has-text-justified">
                    2. A young adventurer set off in search of five mysterious trial towers. He first arrived at the fire
                    tower, solved the riddle of the fire giant, and earned the flame symbol. He then crossed the icy
                    lands,
                    faced the ice queen, and earned the frost symbol. Next, he entered an ancient temple, defeated the
                    necromancer, and obtained the soul symbol. He then ventured into the thunder mountain range,
                    underwent
                    the lightning god's trial, and earned the thunder symbol. Finally, he reached the tower of light,
                    faced
                    the mysterious celestial being, and earned the light symbol. The adventurer combined the five
                    symbols,
                    uncovered a hidden world, and restored the lost glory.
                </div>
                <div class="video-container">
                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis/1_Complex_Plot_13_0_A young adventurer set off in search of five mysterious trial towers. He first arrived at the fire tower.mp4"
                                type="video/mp4" />
                        </video>
                    </div>

                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis/2_Complex_Plot_13_0_A young adventurer set off in search of five mysterious trial towers. He first arrived at the fire tower.mp4"
                                type="video/mp4" />
                        </video>
                    </div>


                    <div>
                        <video autoplay controls muted loop width=100%>
                            <source
                                src="static/vis/3_Complex_Plot_13_1A young adventurer set off in search of five mysterious trial towers. He first arrived at the fire tower.mp4"
                                type="video/mp4" />
                        </video>

                    </div>
                </div>
                <br><br><br>



      

        </div>
    </section>




    <!-- script!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -->
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
    <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
    <script defer src="static/js/fontawesome.all.min.js"></script>
    <script src="static/js/bulma-carousel.min.js"></script>
    <script src="static/js/bulma-slider.min.js"></script>
    <script src="static/js/index.js"></script>

    <script type="text/javascript" async
        src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_SVG"></script>
    <script type="text/x-mathjax-config">
        MathJax.Hub.Config({
            tex2jax: {
                inlineMath: [['$','$'], ['\\(','\\)']]
            }
        });
    </script>

    <script>
        document.addEventListener('DOMContentLoaded', function () {
            const videoPlayer = document.getElementById('sceneVideoPlayer');

            const videoProgress = document.querySelector('.video-progress');
            const videoTimeline = document.querySelector('.video-timeline');
            const videoTime = document.querySelector('.video-time');
            const sceneItems = document.querySelectorAll('.scene-item');
            const scenesList = document.querySelector('.scenes-list');

            let videoDuration = 0;

            // Get video duration once metadata is loaded
            videoPlayer.addEventListener('loadedmetadata', function () {
                videoDuration = videoPlayer.duration;
                updateTimeDisplay(0, videoDuration);
            });


            // Handle scene item clicks to jump to specific timestamps
            sceneItems.forEach(item => {
                item.addEventListener('click', function () {
                    const FRAME_RATE = 16;
                    const targetFrame = parseInt(this.dataset.frame);
                    if (isNaN(targetFrame)) {
                        console.error('无效的帧数值:', this.dataset.frame);
                        return;
                    }

                    const jumpTime = targetFrame / FRAME_RATE;
                    if (jumpTime > videoPlayer.duration) {
                        console.warn(`超出视频时长（最大${videoPlayer.duration.toFixed(1)}秒）`);
                        return;
                    }


                    videoPlayer.currentTime = jumpTime;
                    videoPlayer.pause();

                    // Update active scene
                    sceneItems.forEach(scene => {
                        scene.classList.remove('active');
                    });

                    this.classList.add('active');

                    // Scroll to make sure active scene is visible
                    const itemRect = this.getBoundingClientRect();
                    const listRect = scenesList.getBoundingClientRect();

                    if (itemRect.left < listRect.left || itemRect.right > listRect.right) {
                        const scrollLeft = this.offsetLeft - (listRect.width - itemRect.width) / 2;
                        scenesList.scrollTo({
                            left: scrollLeft,
                            behavior: 'smooth'
                        });
                    }
                });
            });

            // ================= 缩略图帧冻结初始化 ================= 
            document.querySelectorAll('.scene-item').forEach(item => {
                const video = item.querySelector('.thumbnail-video');
                const targetFrame = parseInt(item.dataset.frame);

                // 初始化帧冻结控制器
                new FrameFreeze(
                    video,          // 视频元素
                    targetFrame,    // 目标帧数
                    16,             // 帧率(需与主视频一致)
                    () => {         // 完成回调
                        item.style.opacity = 1; // 显示完成后的缩略图
                    }
                );
            });

            // Update progress bar and time display as video plays
            videoPlayer.addEventListener('timeupdate', function () {
                const currentTime = videoPlayer.currentTime;
                const progressPercent = (currentTime / videoDuration) * 100;

                videoProgress.style.width = `${progressPercent}%`;
                updateTimeDisplay(currentTime, videoDuration);

                updateActiveScene(currentTime);
            });

            // Handle timeline clicks to seek video
            videoTimeline.addEventListener('click', function (e) {
                const rect = this.getBoundingClientRect();
                const position = (e.clientX - rect.left) / rect.width;
                const seekTime = position * videoDuration;

                videoPlayer.currentTime = seekTime;
                videoPlayer.play();
            });

            // Format time display
            function formatTime(seconds) {
                const minutes = Math.floor(seconds / 60);
                const remainingSeconds = Math.floor(seconds % 60);
                return `${minutes}:${remainingSeconds.toString().padStart(2, '0')}`;
            }

            // Update time display
            function updateTimeDisplay(currentTime, duration) {
                videoTime.textContent = `${formatTime(currentTime)} / ${formatTime(duration)}`;
            }

            // Update active scene based on current playback time
            function updateActiveScene(currentTime) {
                let activeSceneFound = false;

                sceneItems.forEach((item, index) => {
                    const itemTime = parseFloat(item.getAttribute('data-frame'));
                    const nextItemTime = index < sceneItems.length - 1
                        ? parseFloat(sceneItems[index + 1].getAttribute('data-frame'))
                        : videoDuration;

                    if (currentTime >= itemTime && currentTime < nextItemTime) {
                        item.classList.add('active');
                        activeSceneFound = true;

                        // Auto-scroll scenes list to keep active scene visible
                        const itemRect = item.getBoundingClientRect();
                        const listRect = scenesList.getBoundingClientRect();

                        if (itemRect.left < listRect.left || itemRect.right > listRect.right) {
                            const scrollLeft = item.offsetLeft - (listRect.width - itemRect.width) / 2;
                            scenesList.scrollTo({
                                left: scrollLeft,
                                behavior: 'smooth'
                            });
                        }
                    } else {
                        item.classList.remove('active');
                    }
                });

                // Default to first scene if none found active
                if (!activeSceneFound && sceneItems.length > 0) {
                    sceneItems[0].classList.add('active');
                }
            }
        });
    </script>

    <script>
        /**
         * 初始化并控制单个视频片段的播放。
         */
        function setupVideoSegment(videoElementId, videoSrc, fps, startFrameNum, endFrameNum, shouldLoop = false) {
            const videoElement = document.getElementById(videoElementId);
            if (!videoElement) {
                console.error(`Video element with ID "${videoElementId}" not found.`);
                return;
            }

            if (!videoSrc) {
                console.error(`Missing video source for ${videoElementId}.`);
                return;
            }
            if (isNaN(fps) || fps <= 0) {
                console.error(`Invalid FPS value (${fps}) for ${videoElementId}.`);
                return;
            }
            if (isNaN(startFrameNum) || startFrameNum < 0 || isNaN(endFrameNum) || endFrameNum <= 0) {
                console.error(`Invalid start/end frame numbers for ${videoElementId}. Start: ${startFrameNum}, End: ${endFrameNum}`);
                return;
            }

            const startTimeInSeconds = startFrameNum / fps;
            const endTimeInSeconds = endFrameNum / fps;

            if (startTimeInSeconds >= endTimeInSeconds) {
                console.error(`For ${videoElementId}, start time (${startTimeInSeconds.toFixed(2)}s) must be less than end time (${endTimeInSeconds.toFixed(2)}s).`);
                return;
            }

            videoElement.src = videoSrc;
            videoElement.controls = true;
            videoElement.preload = 'metadata';
            videoElement.style.width = '100%';
            videoElement.style.maxWidth = '48%'; // 让两个视频能并排显示，如果父容器是flex
            videoElement.style.display = 'inline-block'; // 改为inline-block以允许并排
            videoElement.style.marginBottom = '10px';
            videoElement.style.marginRight = '2%'; // 给右边一点间距

            videoElement.addEventListener('loadedmetadata', () => {
                if (endTimeInSeconds > videoElement.duration) {
                    console.warn(`For ${videoElementId}, specified end time (${endTimeInSeconds.toFixed(2)}s) exceeds video duration (${videoElement.duration.toFixed(2)}s). Segment will play until video end.`);
                }
                if (startTimeInSeconds >= videoElement.duration) {
                    console.error(`For ${videoElementId}, specified start time (${startTimeInSeconds.toFixed(2)}s) is at or beyond video duration (${videoElement.duration.toFixed(2)}s).`);
                    videoElement.controls = false;
                    return;
                }
                videoElement.currentTime = startTimeInSeconds;
            });

            videoElement.addEventListener('timeupdate', () => {
                const currentVideoDuration = videoElement.duration;
                if (isNaN(currentVideoDuration)) return;

                const effectiveEndTime = Math.min(endTimeInSeconds, currentVideoDuration);

                if (videoElement.currentTime >= effectiveEndTime) {
                    if (shouldLoop) {
                        videoElement.currentTime = startTimeInSeconds;
                        videoElement.play().catch(e => console.warn("Play interrupted during loop for " + videoElementId, e.message));
                    } else {
                        videoElement.pause();
                        if (videoElement.currentTime > effectiveEndTime) {
                            videoElement.currentTime = effectiveEndTime;
                        }
                    }
                }
            });

            videoElement.addEventListener('play', () => {
                const currentVideoDuration = videoElement.duration;
                if (isNaN(currentVideoDuration)) return;
                const effectiveEndTime = Math.min(endTimeInSeconds, currentVideoDuration);
                if (videoElement.currentTime < startTimeInSeconds || videoElement.currentTime >= effectiveEndTime) {
                    if (!(shouldLoop && videoElement.currentTime >= startTimeInSeconds && videoElement.currentTime < startTimeInSeconds + 0.5)) {
                        videoElement.currentTime = startTimeInSeconds;
                    }
                }
            });

            videoElement.addEventListener('error', (e) => {
                let errorMsg = '视频加载失败。';
                if (videoElement.error) {
                    switch (videoElement.error.code) {
                        case MediaError.MEDIA_ERR_ABORTED: errorMsg += ' 用户中止。'; break;
                        case MediaError.MEDIA_ERR_NETWORK: errorMsg += ' 网络错误。'; break;
                        case MediaError.MEDIA_ERR_DECODE: errorMsg += ' 解码错误。'; break;
                        case MediaError.MEDIA_ERR_SRC_NOT_SUPPORTED: errorMsg += ' 格式不支持。'; break;
                        default: errorMsg += ' 未知错误。'; break;
                    }
                }
                console.error(`Error for ${videoElementId} (URL: ${videoSrc}): ${errorMsg}`, e);
            });
        }

        /**
         * 动态创建并渲染所有视频及其片段的播放器。
         */
        function renderAllVideoPlayers() {
            const allSegmentBlocks = document.querySelectorAll('.dynamic-video-segment-block');

            if (allSegmentBlocks.length === 0) {
                console.warn("No '.dynamic-video-segment-block' elements found to render videos into.");
                return;
            }

            allSegmentBlocks.forEach((block, blockIndex) => {
                const dataDefinitionsContainer = block.querySelector('.video-data-definitions');
                let targetOutputContainer = block.querySelector('.segmented-video-output-container');

                if (!dataDefinitionsContainer) {
                    console.error(`Required element '.video-data-definitions' not found in block ${blockIndex + 1}. Skipping this block.`);
                    return;
                }
                if (!targetOutputContainer) {
                    console.warn(`'.segmented-video-output-container' not found in block ${blockIndex + 1}. Creating one or appending to block.`);
                    // 如果没有明确的输出容器，可以创建一个或直接附加到block，或作为最后的手段附加到body
                    // 为了演示，我们尝试附加到block本身，如果它不是输出容器的话
                    targetOutputContainer = block; // Fallback to the block itself if specific output container is missing
                }

                const videoDataElements = dataDefinitionsContainer.querySelectorAll('.video-data');

                videoDataElements.forEach((videoDataEl, videoDataIndex) => {
                    const videoUrl = videoDataEl.dataset.videoUrl;
                    const fpsString = videoDataEl.dataset.fps;

                    if (!videoUrl) {
                        console.warn(`Skipping video data in block ${blockIndex + 1} due to missing 'data-video-url'.`);
                        return;
                    }
                    const fps = parseFloat(fpsString);
                    if (isNaN(fps) || fps <= 0) {
                        console.warn(`Skipping video data in block ${blockIndex + 1} for URL '${videoUrl}' due to invalid 'data-fps': ${fpsString}.`);
                        return;
                    }

                    const segmentDataElements = videoDataEl.querySelectorAll('.segment-data');
                    const videoElementsToSetup = [];

                    segmentDataElements.forEach((segmentEl, segmentIndex) => {
                        // 生成在整个页面中更可能唯一的ID，结合块索引和用户提供的后缀
                        const userSuffix = segmentEl.dataset.idSuffix || `seg${segmentIndex}`;
                        const videoElementId = `video_block${blockIndex}_data${videoDataIndex}_${userSuffix}`;

                        const videoElement = document.createElement('video');
                        videoElement.id = videoElementId;

                        targetOutputContainer.appendChild(videoElement);

                        videoElementsToSetup.push({
                            elementId: videoElementId,
                            startFrame: parseInt(segmentEl.dataset.startFrame),
                            endFrame: parseInt(segmentEl.dataset.endFrame),
                            loop: segmentEl.dataset.loop === 'true',
                            videoUrl: videoUrl,
                            fps: fps
                        });
                    });

                    videoElementsToSetup.forEach(config => {
                        setupVideoSegment(
                            config.elementId,
                            config.videoUrl,
                            config.fps,
                            config.startFrame,
                            config.endFrame,
                            config.loop
                        );
                    });
                });
            });
        }
        document.addEventListener('DOMContentLoaded', renderAllVideoPlayers);
    </script>


</body>

</html>