<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8" />
    <title>Video + JSON Viewer</title>
    <style>
        body {
            margin: 0;
            padding: 0;
            font-family: sans-serif;
            background-color: #f5f5f5;
        }
        .container {
            display: flex;
            width: 100%;
            height: 100vh;
            box-sizing: border-box;
        }
        .left-panel {
            flex: 1;
            padding: 15px;
            box-sizing: border-box;
            background-color: white;
            box-shadow: 0 0 10px rgba(0,0,0,0.1);
            margin-right: 10px;
            border-radius: 5px;
            overflow-y: auto;
        }
        .right-panel {
            flex: 1;
            padding: 15px;
            box-sizing: border-box;
            background-color: white;
            box-shadow: 0 0 10px rgba(0,0,0,0.1);
            border-radius: 5px;
            display: flex;
            flex-direction: column;
        }
        .video-wrapper {
            margin-bottom: 15px;
        }
        .panel-title {
            font-size: 18px;
            font-weight: bold;
            margin-bottom: 10px;
            color: #333;
            border-bottom: 1px solid #eee;
            padding-bottom: 5px;
        }
        video {
            max-width: 100%;
            height: 300px;
            width: auto;
            display: block;
            border-radius: 5px;
            background-color: #000;
        }
        .filename {
            font-weight: bold;
            margin-top: 5px;
            color: #555;
        }
        .controls {
            margin: 15px 0;
            display: flex;
            align-items: center;
        }
        .controls button {
            margin-right: 10px;
            padding: 8px 15px;
            background-color: #4a90e2;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-weight: bold;
        }
        .controls button:hover {
            background-color: #3a80d2;
        }
        .index-display {
            margin: 0 15px;
            font-weight: bold;
            color: #555;
        }
        textarea {
            width: 100%;
            height: 150px;
            box-sizing: border-box;
            font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
            padding: 15px;
            border: 1px solid #ddd;
            border-radius: 8px;
            resize: vertical;
            background-color: #f8f9fa;
            line-height: 1.4;
            font-size: 13px;
            box-shadow: inset 0 1px 3px rgba(0,0,0,0.1);
        }
        textarea:focus {
            outline: none;
            border-color: #4a90e2;
            box-shadow: inset 0 1px 3px rgba(0,0,0,0.1), 0 0 0 2px rgba(74,144,226,0.2);
        }
        .qa-section {
            margin-top: 15px;
            max-height: 400px;
            overflow-y: auto;
        }
        .qa-title {
            font-weight: bold;
            margin-bottom: 10px;
            color: #333;
            font-size: 16px;
            border-bottom: 2px solid #4a90e2;
            padding-bottom: 5px;
        }
        .qa-item {
            background-color: #f9f9f9;
            margin-bottom: 15px;
            border-radius: 8px;
            border: 1px solid #e0e0e0;
            overflow: hidden;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .qa-question {
            background-color: #4a90e2;
            color: white;
            padding: 12px 15px;
            font-weight: bold;
            font-size: 14px;
        }
        .qa-options {
            padding: 15px;
        }
        .qa-option {
            margin-bottom: 8px;
            padding: 8px 12px;
            background-color: #f5f5f5;
            border-radius: 4px;
            border-left: 3px solid #ddd;
            font-size: 13px;
        }
        .qa-option.correct {
            background-color: #e8f5e8;
            border-left-color: #28a745;
            font-weight: bold;
        }
        .qa-option-label {
            font-weight: bold;
            color: #666;
            margin-right: 8px;
        }
        .qa-counter {
            background-color: #6c757d;
            color: white;
            padding: 4px 8px;
            border-radius: 12px;
            font-size: 12px;
            font-weight: bold;
            margin-left: 10px;
        }
        .annotation-section {
            margin-top: 15px;
            border-top: 1px solid #eee;
            padding-top: 15px;
        }
        .annotation-checkbox {
            margin-bottom: 10px;
            display: flex;
            align-items: center;
        }
        .annotation-checkbox input {
            margin-right: 8px;
            transform: scale(1.2);
        }
        .annotation-checkbox label {
            font-weight: bold;
            color: #333;
        }
        .annotation-comment {
            width: 100%;
            height: 80px;
            margin-bottom: 10px;
            box-sizing: border-box;
            font-family: sans-serif;
            padding: 10px;
            border: 1px solid #ddd;
            border-radius: 4px;
        }
        .save-note {
            margin-top: 15px;
            color: #666;
            font-size: 12px;
            background-color: #f9f9f9;
            padding: 10px;
            border-radius: 4px;
            border-left: 3px solid #4a90e2;
        }
        .error-marker {
            background-color: #ffeeee;
            border-left: 3px solid #ff5555 !important;
        }
        .json-caption-section {
            flex: 1;
            display: flex;
            flex-direction: column;
            overflow: hidden;
        }
        .caption-container {
            margin-top: 15px;
            flex: 1;
            overflow-y: auto;
            background-color: #f8f9fa;
            border-radius: 8px;
            padding: 15px;
            border: 1px solid #e9ecef;
        }
        #saveAnnotationBtn {
            background-color: #5cb85c;
        }
        #saveAnnotationBtn:hover {
            background-color: #4ca84c;
        }
    </style>
</head>
<body>

<div class="container">
    <div class="left-panel">
        <div class="panel-title">Main Video</div>
        <div class="video-wrapper">
            <div id="videoName" class="filename"></div>
            <video id="videoPlayer" controls autoplay muted loop></video>
        </div>
        <div class="controls">
            <button id="prevBtn">Prev</button>
            <span class="index-display" id="indexDisplay"></span>
            <button id="nextBtn">Next</button>
            <button id="saveBtn">Save JSON</button>
        </div>
        <div class="qa-section">
            <div class="qa-title">Questions & Answers <span id="qaCounter" class="qa-counter">0</span></div>
            <div id="qaContainer"></div>
        </div>
        <div class="annotation-section">
            <div class="annotation-checkbox">
                <input type="checkbox" id="errorCheckbox">
                <label for="errorCheckbox">Mark this example as having an error</label>
            </div>
            <div>
                <label for="commentTextarea" class="qa-title">Add comment:</label>
                <textarea id="commentTextarea" class="annotation-comment" placeholder="Add your comment here..."></textarea>
            </div>
            <button id="saveAnnotationBtn">Save Annotation</button>
            <div id="annotationStatus" style="margin-top: 10px; color: green; font-weight: bold;"></div>
        </div>
        <div class="save-note">
            Clicking "Save JSON" will download the updated JSON. 
            Place it in <strong>video_QAs_updated</strong> to load the updated version next time.
            Annotations are automatically saved to <strong>video_annotations/user_annotations.json</strong>
        </div>
    </div>
    <div class="right-panel">
        <div class="panel-title">Processed Video</div>
        <div class="video-wrapper">
            <video id="bodyhandsVideoPlayer" controls autoplay muted loop></video>
        </div>
        <div class="json-caption-section">
            <div class="panel-title">Caption</div>
            <div class="caption-container">
                <div id="jsonName" class="filename" style="margin-bottom: 10px; color: #4a90e2; font-weight: bold;"></div>
                <textarea id="jsonEditor" placeholder="Caption content will appear here..."></textarea>
            </div>
        </div>
    </div>
</div>

<script>
window.FILE_DATA = [{"videoFile": "Videos_crop/0800.mp4", "videoName": "0800.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/0800.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/0800.json", "updatedJSONPath": "video_QAs_updated/0800.json", "jsonName": "0800.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a person applying a facial mask while engaging in expressive hand gestures, showcasing a beauty routine.\\n\\n**Action Sequence and Recognition:**\\n- The person begins by holding a bottle in their right hand while applying the mask with their left hand.\\n- They then gesture with both hands, explaining the application process.\\n- The person raises their hands to emphasize points, moving them in a circular motion.\\n\\n**Object-Action Associations:**\\n- The person uses their left hand to apply the mask to their face.\\n- The bottle in the right hand is likely the source of the mask being applied.\\n- The left hand is also involved in gesturing and demonstrating the application technique.\\n\\n**Location-based Motion Analysis:**\\n- The application of the mask occurs in the center of the frame, where the person's face is prominently visible.\\n- Hand gestures are made in the central and upper areas of the frame, drawing attention to the facial mask application.\\n\\n**Repetition and Frequency Details:**\\n- The person gestures with their hands multiple times, indicating the application process and possibly explaining steps.\\n- Hand movements are repeated in a rhythmic manner, emphasizing the importance of the application technique.\\n\\n**Traditional Motion Analysis:**\\n- The right hand moves in a downward motion to hold the bottle, then transitions to a gesturing position, moving back and forth.\\n- The left hand performs a circular motion while applying the mask, covering a small area on the face.\\n- The overall speed of the hand movements is moderate, allowing for clear communication of the process.\\n\\n**Spatial Relationships:**\\n- The bottle is held in the right hand, positioned slightly to the right of the person's face.\\n- The left hand is actively engaged in applying the mask, moving close to the face.\\n- As the person gestures, their hands move in a way that creates a dynamic interaction between the face and the objects being held.\\n\\nThis comprehensive analysis captures the actions, object interactions, and spatial dynamics throughout the video, highlighting the beauty routine being performed.\"\n}", "questions": [{"question": "What action is the person performing with their left hand?", "A": "The person is wiping their face with their left hand.", "B": "The person is holding a bottle with their left hand.", "C": "The person is gesturing with their left hand.", "D": "The person is applying a facial mask to their face with their left hand.", "correct_answer": "D"}, {"question": "Which action happens first in the video?", "A": "The person gestures with both hands after applying the mask.", "B": "The person holds the bottle in their right hand before applying the mask.", "C": "The person wipes their face after applying the mask.", "D": "The person raises their hands to emphasize points after applying the mask.", "correct_answer": "B"}, {"question": "What object performs the action of applying the mask?", "A": "The person's left hand performs the action of applying the mask.", "B": "The right hand performs the action of holding the bottle.", "C": "The bottle in the right hand performs the action of applying the mask.", "D": "The person's face performs the action of receiving the mask.", "correct_answer": "A"}, {"question": "Where in the scene does the application of the mask take place?", "A": "The application of the mask occurs in the upper-right area of the frame.", "B": "The application of the mask occurs in the center of the frame.", "C": "The application of the mask occurs off-screen.", "D": "The application of the mask occurs in the lower-left area of the frame.", "correct_answer": "B"}, {"question": "How many times does the person gesture with their hands?", "A": "The person gestures with their hands three times.", "B": "The person gestures with their hands only once.", "C": "The person does not gesture with their hands at all.", "D": "The person gestures with their hands multiple times throughout the video.", "correct_answer": "D"}, {"question": "In which direction does the person's right hand move?", "A": "The right hand moves upward to gesture.", "B": "The right hand moves to the right to pick up an object.", "C": "The right hand moves downward to hold the bottle.", "D": "The right hand moves to the left to apply the mask.", "correct_answer": "C"}, {"question": "How far does the left hand move while applying the mask?", "A": "The left hand moves across the entire face.", "B": "The left hand does not move at all during the application.", "C": "The left hand covers a small area on the face while applying the mask.", "D": "The left hand moves to the right side of the face.", "correct_answer": "C"}, {"question": "What type of trajectory does the left hand follow while applying the mask?", "A": "The left hand does not follow any specific trajectory.", "B": "The left hand follows a circular motion while applying the mask.", "C": "The left hand follows a zigzag pattern while applying the mask.", "D": "The left hand follows a straight line toward the face.", "correct_answer": "B"}, {"question": "Where is the bottle positioned relative to the person's face?", "A": "The bottle is held directly in front of the person's face.", "B": "The bottle is held behind the person's head.", "C": "The bottle is held to the left of the person's face.", "D": "The bottle is held slightly to the right of the person's face.", "correct_answer": "D"}, {"question": "How does the spatial relationship between the left hand and the face change during the video?", "A": "The left hand moves away from the face after applying the mask.", "B": "The left hand moves to the right side of the face.", "C": "The left hand moves closer to the face while applying the mask.", "D": "The left hand remains at a constant distance from the face.", "correct_answer": "C"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/2.mp4", "videoName": "2.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/2.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/2.json", "updatedJSONPath": "video_QAs_updated/2.json", "jsonName": "2.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a person seated at a table, engaging in various hand movements while interacting with objects, specifically a pen and a red object.\\n\\n**Action Sequence and Recognition:**\\n- The person first sits calmly, with both hands resting on their lap.\\n- They then reach for the red object with their right hand, lifting it slightly.\\n- Following this, they manipulate the red object, moving it back and forth.\\n- The person alternates between their right and left hands, occasionally raising their left hand while the right hand continues to interact with the red object.\\n- At one point, they place the red object down on the table and use their right hand to touch the surface.\\n\\n**Object-Action Associations:**\\n- The person's right hand is primarily responsible for reaching and manipulating the red object.\\n- The left hand is occasionally raised, possibly for balance or to assist in the manipulation of the red object.\\n- The pen remains stationary on the table, not actively involved in the actions.\\n\\n**Location-based Motion Analysis:**\\n- The actions occur primarily in the center of the frame, where the person is seated at the table.\\n- The red object is moved back and forth across the table's surface, while the pen remains on the left side of the frame.\\n\\n**Repetition and Frequency Details:**\\n- The person raises their left hand multiple times, approximately 4 times throughout the video.\\n- The manipulation of the red object involves back-and-forth movements, repeated around 6 times.\\n\\n**Traditional Motion Analysis:**\\n- The red object is moved in a horizontal direction across the table, covering a distance of about 15 cm at a moderate speed.\\n- The person's right hand approaches the red object from a resting position, moving downward and then back up in a fluid motion.\\n- The left hand's movements are more sporadic, primarily serving as a counterbalance.\\n\\n**Spatial Relationships:**\\n- Initially, the red object is positioned in the center of the table, with the pen located to the left.\\n- As the person manipulates the red object, it remains close to the center, while the pen stays stationary, maintaining a distance of about 20 cm from the red object.\\n- The final arrangement shows the red object placed back on the table, with the person's hands positioned on either side, creating a balanced spatial organization.\"\n}", "questions": [{"question": "What action is the person performing with their right hand?", "A": "The person is raising their left hand.", "B": "The person is writing with a pen using their left hand.", "C": "The person is manipulating the red object with their right hand.", "D": "The person is resting both hands on their lap.", "correct_answer": "C"}, {"question": "Which action happens first in the video?", "A": "The person places the red object down on the table.", "B": "The person reaches for the pen.", "C": "The person sits calmly with both hands resting on their lap.", "D": "The person raises their left hand.", "correct_answer": "C"}, {"question": "What object performs the manipulation motion?", "A": "The floor is involved in the actions.", "B": "The left hand is used to balance the red object.", "C": "The pen is used to write on the table.", "D": "The red object is manipulated by the person's right hand.", "correct_answer": "D"}, {"question": "Where in the scene does the manipulation of the red object take place?", "A": "The manipulation occurs on the left side of the frame.", "B": "The manipulation occurs near the window.", "C": "The manipulation occurs in the center of the table.", "D": "The manipulation occurs on the floor.", "correct_answer": "C"}, {"question": "How many times does the person raise their left hand?", "A": "The person does not raise their left hand at all.", "B": "The person raises their left hand approximately 4 times.", "C": "The person raises their left hand 10 times.", "D": "The person raises their left hand once.", "correct_answer": "B"}, {"question": "In which direction does the red object move?", "A": "The red object moves downward to the floor.", "B": "The red object moves upward toward the ceiling.", "C": "The red object moves back and forth horizontally across the table.", "D": "The red object moves diagonally to the left.", "correct_answer": "C"}, {"question": "How far does the red object travel during the manipulation?", "A": "The red object does not move at all.", "B": "The red object travels 30 cm to the right.", "C": "The red object travels about 15 cm across the table.", "D": "The red object travels 5 cm to the left.", "correct_answer": "C"}, {"question": "What type of trajectory does the red object follow?", "A": "The red object follows a circular motion.", "B": "The red object follows a zigzag pattern.", "C": "The red object follows a back-and-forth horizontal motion.", "D": "The red object follows a vertical path.", "correct_answer": "C"}, {"question": "Where is the pen positioned relative to the red object?", "A": "The pen is directly above the red object.", "B": "The pen is on the floor beneath the red object.", "C": "The pen is 10 cm to the right of the red object.", "D": "The pen is located 20 cm to the left of the red object.", "correct_answer": "D"}, {"question": "How does the spatial relationship between the red object and the person's hands change?", "A": "The red object is thrown across the room.", "B": "The red object is placed back on the table with hands positioned on either side.", "C": "The red object is lifted above the person's head.", "D": "The red object moves away from the person's hands completely.", "correct_answer": "B"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/4.mp4", "videoName": "4.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/4.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/4.json", "updatedJSONPath": "video_QAs_updated/4.json", "jsonName": "4.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a scene where two individuals are engaged in a close interaction, likely involving a beauty or grooming activity, with one person applying something to the other.\\n\\n**Action Sequence and Recognition:**\\n- Initially, the person on the left is seen smiling and interacting with the person on the right.\\n- The person on the right uses their left hand to apply an object to the face of the person on the left.\\n- The right hand of the person on the right is also involved, possibly holding or manipulating the object being applied.\\n\\n**Object-Action Associations:**\\n- The person on the right uses their left hand to perform the application action on the person on the left.\\n- The object being applied is likely a cosmetic or grooming tool, held in the right hand of the person on the right.\\n- The person on the left is the recipient of the action, sitting still while the application occurs.\\n\\n**Location-based Motion Analysis:**\\n- The interaction takes place in the center of the frame, with both individuals positioned closely together.\\n- The left hand of the person on the right is primarily active in the central area, near the face of the person on the left.\\n- The right hand is also engaged in the same central area, indicating a focused activity.\\n\\n**Repetition and Frequency Details:**\\n- The application action appears to be repeated multiple times, as the person on the right adjusts their hand movements.\\n- The left hand's application motion is likely performed in a rhythmic manner, suggesting a consistent technique.\\n\\n**Traditional Motion Analysis:**\\n- The left hand of the person on the right moves in a downward and upward motion towards the face of the person on the left, covering a small distance of approximately 10-15 cm.\\n- The right hand also performs similar motions, possibly with slight variations in speed and direction.\\n- The trajectory of both hands is primarily vertical, with some lateral adjustments as they navigate around the face.\\n\\n**Spatial Relationships:**\\n- The person on the left is positioned directly in front of the person on the right, with minimal distance between them, indicating a close interaction.\\n- The right hand and left hand of the person on the right are positioned close to the face of the person on the left, emphasizing the intimate nature of the action.\\n- Throughout the video, the spatial arrangement remains consistent, with both individuals maintaining their positions relative to each other.\\n\\nOverall, the video captures a detailed interaction focused on a grooming or beauty application, highlighting the motions, actions, and spatial dynamics between the two individuals.\"\n}", "questions": [{"question": "What action is the person on the right performing with their left hand?", "A": "The person on the right is applying an object to the face of the person on the left.", "B": "The person on the right is brushing their hair.", "C": "The person on the right is waving their hand.", "D": "The person on the right is holding a mirror.", "correct_answer": "A"}, {"question": "Which action happens first in the video?", "A": "The person on the right applies the object to the face.", "B": "The person on the left sits still.", "C": "The person on the left is smiling and interacting with the person on the right.", "D": "The person on the right adjusts their hand movements.", "correct_answer": "C"}, {"question": "Which object performs the application action?", "A": "The left hand of the person on the right performs the application action.", "B": "The right hand of the person on the left performs the action.", "C": "The right hand of the person on the left performs the action.", "D": "The object being applied is on the table.", "correct_answer": "A"}, {"question": "Where in the scene does the application action take place?", "A": "The application action takes place at the desk.", "B": "The application action takes place on the floor.", "C": "The application action takes place in the center of the frame.", "D": "The application action takes place near the window.", "correct_answer": "C"}, {"question": "How many times does the application action appear to be repeated?", "A": "The application action appears to be repeated multiple times.", "B": "The application action occurs only once.", "C": "The application action occurs continuously without pause.", "D": "The application action occurs three times.", "correct_answer": "A"}, {"question": "In which direction does the left hand of the person on the right move?", "A": "The left hand moves in a downward and upward motion.", "B": "The left hand moves diagonally across the face.", "C": "The left hand moves to the left side.", "D": "The left hand moves in a circular motion.", "correct_answer": "A"}, {"question": "How far does the left hand move towards the face?", "A": "The left hand moves 5 cm towards the face.", "B": "The left hand moves 30 cm towards the face.", "C": "The left hand covers a small distance of approximately 10-15 cm.", "D": "The left hand moves 20 cm away from the face.", "correct_answer": "C"}, {"question": "What path does the right hand follow during the application?", "A": "The right hand follows a vertical trajectory with lateral adjustments.", "B": "The right hand moves in a straight line to the left.", "C": "The right hand moves in a circular path around the face.", "D": "The right hand moves horizontally across the table.", "correct_answer": "A"}, {"question": "Where is the person on the left positioned relative to the person on the right?", "A": "The person on the left is positioned directly in front of the person on the right.", "B": "The person on the left is positioned to the right of the person on the right.", "C": "The person on the left is positioned to the left of the person on the right.", "D": "The person on the left is positioned behind the person on the right.", "correct_answer": "A"}, {"question": "How does the spatial relationship between the two individuals change during the video?", "A": "The distance between them increases significantly.", "B": "The person on the left moves away from the person on the right.", "C": "The person on the right moves closer to the wall.", "D": "The spatial arrangement remains consistent with minimal distance between them.", "correct_answer": "D"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/522dz-TAljQ.mp4", "videoName": "522dz-TAljQ.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/522dz-TAljQ.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/522dz-TAljQ.json", "updatedJSONPath": "video_QAs_updated/522dz-TAljQ.json", "jsonName": "522dz-TAljQ.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video features a series of frames showcasing two bull sculptures facing each other, accompanied by text and a logo, emphasizing a financial theme.\\n\\n**Action Sequence and Recognition:**\\n- The bulls remain stationary throughout the video, maintaining their positions facing each other.\\n- The text \\\"FORMATION TRUTH INVEST\\\" is consistently displayed, reinforcing the theme of investment.\\n\\n**Object-Action Associations:**\\n- The two bull sculptures are the primary objects, symbolizing strength and bullish market trends.\\n- The text serves as a branding element, promoting the investment theme.\\n\\n**Location-based Motion Analysis:**\\n- The bulls are positioned centrally in the frame, creating a focal point for the viewer.\\n- The logo appears in the upper-left corner, while the text is prominently displayed at the bottom.\\n\\n**Repetition and Frequency Details:**\\n- The same visual elements are repeated across multiple frames, with no changes in the positioning or actions of the bulls.\\n- The text appears consistently in each frame, emphasizing its importance.\\n\\n**Traditional Motion Analysis:**\\n- The bulls do not exhibit any movement; they are static throughout the video.\\n- The overall visual remains unchanged, maintaining a consistent presentation.\\n\\n**Spatial Relationships:**\\n- The two bulls are positioned closely together, facing each other, creating a sense of confrontation or dialogue.\\n- The logo and text maintain their respective positions relative to the bulls, ensuring a balanced composition in each frame.\\n\\nOverall, the video effectively communicates a financial message through the use of symbolic imagery and consistent branding elements.\"\n}", "questions": [{"question": "What action are the bull sculptures performing?", "A": "The bull sculptures are jumping up and down.", "B": "The bull sculptures are turning their heads.", "C": "The bull sculptures are remaining stationary, facing each other.", "D": "The bull sculptures are moving apart from each other.", "correct_answer": "C"}, {"question": "Which action happens first in the video?", "A": "The bulls are positioned facing each other before the text appears.", "B": "The text appears before the bulls are positioned.", "C": "The logo appears after the bulls are positioned.", "D": "The bulls move before the text appears.", "correct_answer": "A"}, {"question": "What do the bull sculptures symbolize in the video?", "A": "The bull sculptures symbolize peace and tranquility.", "B": "The bull sculptures symbolize chaos and disorder.", "C": "The bull sculptures symbolize strength and bullish market trends.", "D": "The bull sculptures symbolize weakness and bearish market trends.", "correct_answer": "C"}, {"question": "Where in the scene is the logo positioned?", "A": "The logo is positioned at the bottom of the frame.", "B": "The logo is positioned in the center of the frame.", "C": "The logo is positioned in the upper-left corner of the frame.", "D": "The logo is positioned in the lower-right corner of the frame.", "correct_answer": "C"}, {"question": "How many times does the text 'FORMATION TRUTH INVEST' appear in the video?", "A": "The text appears three times in the video.", "B": "The text appears only once in the video.", "C": "The text appears twice in the video.", "D": "The text appears consistently in each frame throughout the video.", "correct_answer": "D"}, {"question": "In which direction are the bull sculptures facing?", "A": "The bull sculptures are facing upward.", "B": "The bull sculptures are facing each other directly.", "C": "The bull sculptures are facing downward.", "D": "The bull sculptures are facing away from each other.", "correct_answer": "B"}, {"question": "How far apart are the bull sculptures positioned?", "A": "The bull sculptures are positioned at a diagonal distance.", "B": "The bull sculptures are positioned in different corners of the frame.", "C": "The bull sculptures are positioned far apart from each other.", "D": "The bull sculptures are positioned closely together, facing each other.", "correct_answer": "D"}, {"question": "What type of trajectory do the bull sculptures follow?", "A": "The bull sculptures move in a straight line.", "B": "The bull sculptures move in a zigzag pattern.", "C": "The bull sculptures maintain a static position without any trajectory.", "D": "The bull sculptures move in a circular path.", "correct_answer": "C"}, {"question": "What is the speed of the bull sculptures' movement?", "A": "The bull sculptures move at a moderate pace.", "B": "The bull sculptures are static and do not exhibit any movement.", "C": "The bull sculptures move slowly across the frame.", "D": "The bull sculptures move rapidly toward each other.", "correct_answer": "B"}, {"question": "How does the spatial relationship between the bulls change throughout the video?", "A": "The bulls turn away from each other during the video.", "B": "The spatial relationship remains unchanged as the bulls stay facing each other.", "C": "The bulls move further apart as the video progresses.", "D": "The bulls rotate around each other in the video.", "correct_answer": "B"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/9.mp4", "videoName": "9.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/9.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/9.json", "updatedJSONPath": "video_QAs_updated/9.json", "jsonName": "9.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video features a speaker presenting on stage, engaging with the audience while holding objects in both hands.\\n\\n**Action Sequence and Recognition:**\\n- The speaker begins by standing confidently in the center of the stage, holding an object in their right hand.\\n- They gesture with their left hand, occasionally raising it to emphasize points during the presentation.\\n- The speaker shifts the object between their hands, demonstrating engagement with the audience.\\n\\n**Object-Action Associations:**\\n- The speaker's right hand holds an object, likely a microphone or a remote, which they use to facilitate their presentation.\\n- The left hand is used for gesturing, enhancing the verbal communication with visual emphasis.\\n\\n**Location-based Motion Analysis:**\\n- The speaker remains primarily in the center of the stage, with actions occurring in the middle of the frame.\\n- Gestures are made in the upper area of the frame, while the object in the right hand is consistently visible.\\n\\n**Repetition and Frequency Details:**\\n- The speaker raises their left hand for emphasis multiple times, approximately 5 times throughout the presentation.\\n- The object in the right hand is shifted between hands at least 3 times, indicating a dynamic interaction with the audience.\\n\\n**Traditional Motion Analysis:**\\n- The speaker's left hand moves upward and outward in a diagonal direction, emphasizing points at a moderate speed.\\n- The right hand remains relatively stable while holding the object, with slight movements to adjust its position.\\n\\n**Spatial Relationships:**\\n- The speaker is centrally positioned in relation to the background signage, which includes the \\\"OaksChristianSchool\\\" sign prominently displayed behind them.\\n- The distance between the speaker and the background remains consistent, maintaining a clear focus on the speaker's actions and gestures. \\n\\nOverall, the video captures a dynamic presentation with effective use of hand gestures and object manipulation to engage the audience.\"\n}", "questions": [{"question": "What action is the speaker performing with their left hand?", "A": "The speaker is raising their left hand to emphasize points during the presentation.", "B": "The speaker is pointing to the audience with their left hand.", "C": "The speaker is holding an object in their left hand.", "D": "The speaker is resting their left hand at their side.", "correct_answer": "A"}, {"question": "Which action happens first in the video?", "A": "The speaker shifts the object between their hands.", "B": "The speaker stands confidently in the center of the stage.", "C": "The speaker gestures with their right hand.", "D": "The speaker raises their left hand for emphasis.", "correct_answer": "B"}, {"question": "What object performs the action of holding during the presentation?", "A": "The speaker's right hand holds an object, likely a microphone or remote.", "B": "The speaker's right hand holds a glass of water.", "C": "The speaker's left hand holds a phone.", "D": "The speaker's left hand holds a notepad.", "correct_answer": "A"}, {"question": "Where in the scene does the speaker's gesture take place?", "A": "The gestures are made off to the side of the stage.", "B": "The gestures are made in the lower area of the frame.", "C": "The gestures are made behind the speaker.", "D": "The gestures are made in the upper area of the frame.", "correct_answer": "D"}, {"question": "How many times does the speaker raise their left hand?", "A": "The speaker raises their left hand 10 times.", "B": "The speaker raises their left hand 2 times.", "C": "The speaker raises their left hand approximately 5 times.", "D": "The speaker raises their left hand once.", "correct_answer": "C"}, {"question": "In which direction does the speaker's left hand move?", "A": "The left hand moves upward and outward in a diagonal direction.", "B": "The left hand moves straight to the right.", "C": "The left hand moves downward and inward.", "D": "The left hand moves straight to the left.", "correct_answer": "A"}, {"question": "How far does the speaker move their right hand during the presentation?", "A": "The right hand remains relatively stable with slight movements.", "B": "The right hand moves back and forth 50 cm.", "C": "The right hand moves up and down 30 cm.", "D": "The right hand moves across the stage about 1 meter.", "correct_answer": "A"}, {"question": "What type of trajectory does the speaker's left hand follow?", "A": "The left hand follows a straight downward trajectory.", "B": "The left hand follows a circular motion.", "C": "The left hand follows a zigzag pattern.", "D": "The left hand follows a diagonal upward trajectory.", "correct_answer": "D"}, {"question": "Where is the speaker positioned relative to the background signage?", "A": "The speaker is positioned behind the background signage.", "B": "The speaker is positioned to the left of the background signage.", "C": "The speaker is centrally positioned in relation to the background signage.", "D": "The speaker is positioned to the right of the background signage.", "correct_answer": "C"}, {"question": "How does the spatial relationship between the speaker and the background change?", "A": "The distance between the speaker and the background remains consistent.", "B": "The speaker moves further away from the background during the presentation.", "C": "The speaker moves closer to the background as the presentation progresses.", "D": "The background moves closer to the speaker.", "correct_answer": "A"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/CMnbrZTo4tY.mp4", "videoName": "CMnbrZTo4tY.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/CMnbrZTo4tY.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/CMnbrZTo4tY.json", "updatedJSONPath": "video_QAs_updated/CMnbrZTo4tY.json", "jsonName": "CMnbrZTo4tY.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a fantastical scene featuring a dragon and various objects, including a person, headphones, and a microphone, set against a backdrop of rocks and trees.\\n\\n**Action Sequence and Recognition:**\\n- The dragon is seen flapping its wings, creating a powerful motion.\\n- The person appears to be interacting with the microphone, possibly speaking or singing.\\n- The person raises their left hand, gesturing towards the dragon.\\n\\n**Object-Action Associations:**\\n- The dragon performs wing flapping actions, showcasing its size and power.\\n- The person uses the microphone, indicating a performance or communication.\\n- The left hand of the person is raised, possibly to emphasize a point or signal.\\n\\n**Location-based Motion Analysis:**\\n- The dragon's actions occur in the center of the frame, dominating the scene.\\n- The person is positioned on the right side, interacting with the microphone.\\n- The rocks and trees are located in the lower and upper parts of the frame, respectively.\\n\\n**Repetition and Frequency Details:**\\n- The dragon flaps its wings multiple times, creating a rhythmic motion.\\n- The person raises their hand once during the interaction with the microphone.\\n\\n**Traditional Motion Analysis:**\\n- The dragon moves its wings in a downward and upward motion, covering a significant vertical distance with each flap.\\n- The person\\u2019s hand moves upward approximately 20cm in a quick gesture.\\n- The dragon's wing flapping occurs at a moderate speed, creating a strong visual impact.\\n\\n**Spatial Relationships:**\\n- The dragon is centrally located, with the person positioned to the right, creating a dynamic interaction.\\n- The rocks are scattered in the foreground, while the trees provide a backdrop, enhancing the depth of the scene.\\n- The distance between the person and the dragon varies, with the dragon being the focal point of the action.\"\n}", "questions": [{"question": "What action is the dragon performing?", "A": "The dragon is breathing fire.", "B": "The dragon is flapping its wings.", "C": "The dragon is walking on the ground.", "D": "The dragon is sitting still.", "correct_answer": "B"}, {"question": "Which action happens first in the video?", "A": "The person interacts with the microphone before the dragon flaps its wings.", "B": "The person raises their hand before the dragon flaps its wings.", "C": "The dragon roars before the person raises their hand.", "D": "The dragon flaps its wings before the person raises their hand.", "correct_answer": "D"}, {"question": "Which object performs the wing flapping motion?", "A": "The headphones perform the wing flapping motion.", "B": "The microphone performs the wing flapping motion.", "C": "The person performs the wing flapping motion.", "D": "The dragon performs the wing flapping motion.", "correct_answer": "D"}, {"question": "Where in the scene does the person interact with the microphone?", "A": "The person interacts with the microphone at the bottom of the frame.", "B": "The person interacts with the microphone on the left side of the frame.", "C": "The person interacts with the microphone on the right side of the frame.", "D": "The person interacts with the microphone in the center of the frame.", "correct_answer": "C"}, {"question": "How many times does the dragon flap its wings?", "A": "The dragon does not flap its wings.", "B": "The dragon flaps its wings multiple times.", "C": "The dragon flaps its wings three times.", "D": "The dragon flaps its wings once.", "correct_answer": "B"}, {"question": "In which direction does the dragon's wings move?", "A": "The dragon's wings move left and right.", "B": "The dragon's wings remain stationary.", "C": "The dragon's wings move in a circular motion.", "D": "The dragon's wings move downward and upward.", "correct_answer": "D"}, {"question": "How far does the person's hand move when raised?", "A": "The person's hand moves 30cm to the left.", "B": "The person's hand moves approximately 20cm upward.", "C": "The person's hand does not move.", "D": "The person's hand moves 10cm downward.", "correct_answer": "B"}, {"question": "Where is the dragon positioned relative to the person?", "A": "The dragon is centrally located with the person on the right.", "B": "The dragon is above the person.", "C": "The dragon is on the left side with the person on the right.", "D": "The dragon is behind the person.", "correct_answer": "A"}, {"question": "How does the spatial relationship between the dragon and the person change?", "A": "The distance between the person and the dragon varies as the dragon flaps its wings.", "B": "The dragon moves away from the person without any interaction.", "C": "The person and the dragon remain at a fixed distance.", "D": "The person moves closer to the dragon without any change in distance.", "correct_answer": "A"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/Dr3zpWjHsfg.mp4", "videoName": "Dr3zpWjHsfg.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/Dr3zpWjHsfg.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/Dr3zpWjHsfg.json", "updatedJSONPath": "video_QAs_updated/Dr3zpWjHsfg.json", "jsonName": "Dr3zpWjHsfg.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts an action-packed scene where a character engages in combat, utilizing a sword and shield against an enemy.\\n\\n**Action Sequence and Recognition:**\\n- The character first raises their sword in the right hand, preparing for an attack.\\n- Then, they perform a slashing motion with the sword, aimed at an unseen enemy.\\n- Following the attack, the character holds the shield up in the left hand for defense.\\n- The scene culminates with the text \\\"ENEMY FELLED\\\" appearing, indicating a successful strike.\\n\\n**Object-Action Associations:**\\n- The character model uses the sword to perform the slashing action.\\n- The shield is held in the left hand to provide protection during the combat.\\n- The character's movements are coordinated, with the right hand actively wielding the sword while the left hand stabilizes the shield.\\n\\n**Location-based Motion Analysis:**\\n- The actions occur primarily in the center of the frame, where the character is positioned against a rocky background.\\n- The sword slashing motion takes place in the central area, while the shield remains in the left side of the frame for defense.\\n\\n**Repetition and Frequency Details:**\\n- The character performs a single slashing motion with the sword.\\n- The shield is raised continuously during the combat sequence for protection.\\n\\n**Traditional Motion Analysis:**\\n- The sword moves in a diagonal downward trajectory during the attack, covering a distance of approximately 30cm.\\n- The character's movements are rapid, indicating a quick and aggressive combat style.\\n- The shield remains stationary in the left hand, providing a consistent defensive posture.\\n\\n**Spatial Relationships:**\\n- The character is centrally located, with the sword positioned to the right and the shield to the left.\\n- The distance between the sword and shield is minimal, indicating a close grip for effective combat.\\n- The rocky background remains stationary, providing a stable context for the character's actions.\\n\\nOverall, the video captures a dynamic combat scenario, highlighting the character's actions, object interactions, and spatial dynamics effectively.\"\n}", "questions": [{"question": "What action is the character performing with their right hand?", "A": "The character is raising their sword in the right hand.", "B": "The character is running towards the enemy.", "C": "The character is holding the shield in the left hand.", "D": "The character is crouching down.", "correct_answer": "A"}, {"question": "Which action happens first in the video?", "A": "The character receives a message saying 'ENEMY FELLED'.", "B": "The character raises their sword in the right hand.", "C": "The character holds the shield up in the left hand.", "D": "The character performs a slashing motion with the sword.", "correct_answer": "B"}, {"question": "What object performs the slashing motion?", "A": "The shield is used for defense.", "B": "The rocky background remains stationary.", "C": "The sword performs the slashing motion.", "D": "The character model is engaged in combat.", "correct_answer": "C"}, {"question": "Where in the scene does the slashing action take place?", "A": "The slashing action occurs on the left side of the scene.", "B": "The slashing action takes place in the center of the frame.", "C": "The slashing action happens in the upper area of the frame.", "D": "The slashing action is performed behind the rocky background.", "correct_answer": "B"}, {"question": "How many times does the character perform a slashing motion with the sword?", "A": "The character performs a single slashing motion with the sword.", "B": "The character performs three slashing motions with the sword.", "C": "The character performs two slashing motions with the sword.", "D": "The character does not perform any slashing motions.", "correct_answer": "A"}, {"question": "In which direction does the sword move during the attack?", "A": "The sword moves in a circular motion.", "B": "The sword moves in a diagonal downward trajectory.", "C": "The sword moves straight upward.", "D": "The sword moves horizontally to the left.", "correct_answer": "B"}, {"question": "How far does the sword move during the attack?", "A": "The sword moves 50cm to the right.", "B": "The sword moves 10cm forward.", "C": "The sword covers a distance of approximately 30cm.", "D": "The sword does not move at all.", "correct_answer": "C"}, {"question": "What type of trajectory does the sword follow during the attack?", "A": "The sword follows a circular path.", "B": "The sword follows a zigzag pattern.", "C": "The sword follows a diagonal downward trajectory.", "D": "The sword follows a straight path.", "correct_answer": "C"}, {"question": "Where is the shield positioned relative to the character?", "A": "The shield is held in the left hand of the character.", "B": "The shield is held in the right hand of the character.", "C": "The shield is positioned behind the character.", "D": "The shield is placed on the ground.", "correct_answer": "A"}, {"question": "How does the spatial relationship between the sword and shield change during the action?", "A": "The sword is positioned to the right and the shield to the left, indicating a close grip.", "B": "The sword and shield are both held in the right hand.", "C": "The sword and shield are positioned far apart from each other.", "D": "The sword moves away from the shield during the action.", "correct_answer": "A"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/MHYvT6raEgE.mp4", "videoName": "MHYvT6raEgE.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/MHYvT6raEgE.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/MHYvT6raEgE.json", "updatedJSONPath": "video_QAs_updated/MHYvT6raEgE.json", "jsonName": "MHYvT6raEgE.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a musical performance where multiple individuals are engaged in playing instruments and conducting, creating a lively and coordinated atmosphere.\\n\\n**Action Sequence and Recognition:**\\n- The conductor raises their baton to signal the start of the performance.\\n- The musicians begin playing their instruments, with some individuals using their right hands to hold and play the instruments.\\n- Several performers wave their hands in rhythm with the music, while others maintain a steady posture at their respective positions.\\n\\n**Object-Action Associations:**\\n- The conductor uses the baton to direct the musicians, indicating tempo and dynamics.\\n- The musicians utilize their right hands to play various musical instruments, such as the guzheng and other string instruments.\\n- The podium serves as a stable base for the conductor, who stands behind it while leading the performance.\\n\\n**Location-based Motion Analysis:**\\n- The conductor is positioned at the front center of the stage, clearly visible to the audience.\\n- The musicians are arranged in rows behind the conductor, with some instruments located on the left and right sides of the stage.\\n- The actions primarily occur in the central area of the frame, where the conductor and musicians are actively engaged.\\n\\n**Repetition and Frequency Details:**\\n- The conductor raises and lowers the baton multiple times, signaling changes in the music.\\n- Musicians repeatedly play their instruments in a synchronized manner, with rhythmic patterns observed throughout the performance.\\n- Hand gestures, such as waving and conducting motions, are performed several times, contributing to the overall dynamic of the performance.\\n\\n**Traditional Motion Analysis:**\\n- The conductor's baton moves in a circular motion, indicating tempo changes, while the conductor shifts their body slightly to engage with the musicians.\\n- The musicians' right hands move in a coordinated manner, playing their instruments with varying speeds, from slow strumming to rapid plucking.\\n- The overall movement of the performers is characterized by a steady rhythm, with occasional bursts of energy as they engage with the music.\\n\\n**Spatial Relationships:**\\n- The conductor is centrally located, with musicians positioned in a semi-circle around them, creating a cohesive visual arrangement.\\n- The distance between the conductor and the nearest musicians is minimal, allowing for effective communication and coordination.\\n- As the performance progresses, the spatial arrangement remains consistent, with slight adjustments in hand positions and body movements as they respond to the music.\\n\\nThis detailed analysis captures the essence of the performance, highlighting the interactions between the conductor, musicians, and their instruments, as well as the spatial dynamics of the scene.\"\n}", "questions": [{"question": "What action is the conductor performing with their baton?", "A": "The conductor is waving their baton in a circular motion.", "B": "The conductor is holding the baton still.", "C": "The conductor is lowering their baton to end the performance.", "D": "The conductor is raising their baton to signal the start of the performance.", "correct_answer": "D"}, {"question": "Which action happens first in the video?", "A": "The conductor raises their baton to signal the start of the performance.", "B": "The musicians wave their hands in rhythm.", "C": "The conductor lowers the baton after the performance.", "D": "The musicians begin playing their instruments.", "correct_answer": "A"}, {"question": "What object performs the conducting motion?", "A": "The conductor uses the baton to direct the musicians.", "B": "The audience claps in response to the music.", "C": "The podium supports the conductor during the performance.", "D": "The musicians use their instruments to play music.", "correct_answer": "A"}, {"question": "Where in the scene does the conducting action take place?", "A": "The conducting action occurs at the back of the stage.", "B": "The conducting action is performed offstage.", "C": "The conducting action takes place at the front center of the stage.", "D": "The conducting action happens on the left side of the stage.", "correct_answer": "C"}, {"question": "How many times does the conductor raise their baton?", "A": "The conductor does not raise their baton at all.", "B": "The conductor raises their baton three times.", "C": "The conductor raises their baton only once.", "D": "The conductor raises their baton multiple times during the performance.", "correct_answer": "D"}, {"question": "In which direction does the conductor's baton move?", "A": "The conductor's baton moves in a circular motion.", "B": "The conductor's baton moves left to right.", "C": "The conductor's baton moves diagonally.", "D": "The conductor's baton moves straight up and down.", "correct_answer": "A"}, {"question": "How far does the conductor stand from the nearest musicians?", "A": "The conductor is positioned 2 meters behind the musicians.", "B": "The distance between the conductor and the nearest musicians is minimal.", "C": "The conductor stands 5 meters away from the musicians.", "D": "The conductor is 10 meters away from the musicians.", "correct_answer": "B"}, {"question": "What type of trajectory does the conductor's baton follow?", "A": "The baton follows a circular motion indicating tempo changes.", "B": "The baton follows a diagonal path.", "C": "The baton follows a straight line up and down.", "D": "The baton follows a zigzag pattern.", "correct_answer": "A"}, {"question": "Where is the podium positioned relative to the conductor?", "A": "The podium is to the left of the conductor.", "B": "The podium is behind the conductor.", "C": "The podium is directly in front of the conductor.", "D": "The podium is to the right of the conductor.", "correct_answer": "C"}, {"question": "How does the spatial relationship between the conductor and musicians change?", "A": "The conductor and musicians switch places.", "B": "The spatial arrangement remains consistent with slight adjustments in hand positions.", "C": "The conductor moves further away from the musicians as the performance progresses.", "D": "The musicians move closer to the conductor during the performance.", "correct_answer": "B"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/VX_xG1QfOqE.mp4", "videoName": "VX_xG1QfOqE.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/VX_xG1QfOqE.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/VX_xG1QfOqE.json", "updatedJSONPath": "video_QAs_updated/VX_xG1QfOqE.json", "jsonName": "VX_xG1QfOqE.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video features a speaker delivering a speech at a formal event, likely a graduation ceremony, with various visual elements including subtitles and a podium.\\n\\n**Action Sequence and Recognition:**\\n- The speaker stands at the podium, delivering a speech.\\n- The speaker gestures with their hands while speaking, emphasizing key points.\\n- The speaker occasionally looks down at notes or a podium surface.\\n\\n**Object-Action Associations:**\\n- The speaker uses their hands to gesture while speaking, enhancing the delivery of their message.\\n- The microphone is positioned in front of the speaker, capturing their voice as they speak.\\n- The podium serves as a support for the speaker's notes and the microphone.\\n\\n**Location-based Motion Analysis:**\\n- The speaker is centrally located at the podium, which is positioned in the lower part of the frame.\\n- The microphone is placed on the podium, ensuring it is within reach of the speaker.\\n- Subtitles appear at the bottom of the frame, providing a translation or transcription of the speech.\\n\\n**Repetition and Frequency Details:**\\n- The speaker gestures multiple times throughout the speech, with hand movements occurring consistently as they emphasize points.\\n- The speaker looks down at the podium several times, indicating a reliance on notes.\\n\\n**Traditional Motion Analysis:**\\n- The speaker's hand gestures move in various directions, including upward and outward, to emphasize points.\\n- The distance of hand movements varies, with some gestures being small and others more expansive.\\n- The speaker maintains a steady pace in their speech delivery, with gestures synchronized to their verbal emphasis.\\n\\n**Spatial Relationships:**\\n- The podium is positioned directly in front of the speaker, with the microphone placed centrally on it.\\n- The subtitles are consistently located at the bottom of the frame, maintaining a clear view of the speaker and podium.\\n- The speaker's movements are confined to the area around the podium, with no significant lateral movement observed. \\n\\nOverall, the video captures a formal speaking engagement where the speaker effectively uses gestures and visual aids to communicate their message.\"\n}", "questions": [{"question": "What action is the speaker performing at the podium?", "A": "The speaker is adjusting the microphone.", "B": "The speaker is delivering a speech at the podium.", "C": "The speaker is reading a book at the podium.", "D": "The speaker is sitting down at the podium.", "correct_answer": "B"}, {"question": "Which action happens first in the video?", "A": "The speaker adjusts the microphone.", "B": "The speaker stands at the podium to deliver the speech.", "C": "The speaker looks down at the notes.", "D": "The speaker gestures with their hands.", "correct_answer": "B"}, {"question": "What does the speaker do with their hands while speaking?", "A": "The speaker keeps their hands in their pockets.", "B": "The speaker claps their hands together.", "C": "The speaker gestures with their hands to emphasize points.", "D": "The speaker points to the audience.", "correct_answer": "C"}, {"question": "Where in the scene does the speaker's hand movement take place?", "A": "The hand movements occur in the upper part of the frame.", "B": "The hand movements occur behind the podium.", "C": "The hand movements occur off to the left side of the frame.", "D": "The hand movements occur in the central area around the podium.", "correct_answer": "D"}, {"question": "How many times does the speaker look down at the podium?", "A": "The speaker looks down once.", "B": "The speaker does not look down at all.", "C": "The speaker looks down continuously throughout the speech.", "D": "The speaker looks down at the podium several times.", "correct_answer": "D"}, {"question": "In which direction does the speaker's hand move when gesturing?", "A": "The speaker's hand moves upward and outward.", "B": "The speaker's hand remains still.", "C": "The speaker's hand moves downward only.", "D": "The speaker's hand moves side to side.", "correct_answer": "A"}, {"question": "How far does the speaker's hand move during gestures?", "A": "The hand movements do not vary at all.", "B": "The hand movements are all very small.", "C": "The distance of hand movements varies from small to expansive.", "D": "The hand movements are all very large.", "correct_answer": "C"}, {"question": "What is the speed of the speaker's speech delivery?", "A": "The speaker's pace varies significantly.", "B": "The speaker speaks very quickly without pauses.", "C": "The speaker speaks very slowly throughout the speech.", "D": "The speaker maintains a steady pace in their speech delivery.", "correct_answer": "D"}, {"question": "Where is the podium positioned relative to the speaker?", "A": "The podium is positioned to the left of the speaker.", "B": "The podium is positioned behind the speaker.", "C": "The podium is positioned directly in front of the speaker.", "D": "The podium is positioned to the right of the speaker.", "correct_answer": "C"}, {"question": "How does the spatial relationship between the speaker and the microphone change?", "A": "The microphone is moved to the left side of the podium.", "B": "The microphone moves away from the podium.", "C": "The microphone remains centrally placed on the podium in front of the speaker.", "D": "The microphone is removed from the podium.", "correct_answer": "C"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/_FRKKhOPzL0.mp4", "videoName": "_FRKKhOPzL0.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/_FRKKhOPzL0.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/_FRKKhOPzL0.json", "updatedJSONPath": "video_QAs_updated/_FRKKhOPzL0.json", "jsonName": "_FRKKhOPzL0.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a scene where a person is walking with two children, engaging in a playful interaction while moving along a path.\\n\\n**Action Sequence and Recognition:**\\n- The person first walks forward, holding hands with the children.\\n- The children are seen reaching out and holding onto the person's hands.\\n- The children occasionally look back and smile, indicating a playful interaction.\\n\\n**Object-Action Associations:**\\n- The person uses their right hand to hold the hand of one child and their left hand to hold the other child's hand.\\n- The children actively engage by holding onto the person's hands and moving alongside them.\\n\\n**Location-based Motion Analysis:**\\n- The walking action occurs along a path in the center of the frame.\\n- The children are positioned on either side of the person, maintaining close proximity as they walk.\\n\\n**Repetition and Frequency Details:**\\n- The children repeatedly look back at the person, displaying a playful behavior that occurs multiple times throughout the video.\\n\\n**Traditional Motion Analysis:**\\n- The person moves forward in a straight line along the path at a moderate pace.\\n- The children follow closely, maintaining a small distance of about 1-2 feet from the person, moving in sync with their steps.\\n\\n**Spatial Relationships:**\\n- Initially, the children are positioned on either side of the person, with one child on the left and the other on the right.\\n- As they walk, the spatial arrangement remains consistent, with the children staying close to the person, reinforcing a sense of togetherness.\"\n}", "questions": [{"question": "What action is the person performing while walking?", "A": "The person is standing still with the children.", "B": "The person is sitting on a bench.", "C": "The person is holding hands with two children while walking forward.", "D": "The person is running ahead of the children.", "correct_answer": "C"}, {"question": "Which action happens first in the video?", "A": "The children reach out to hold the person's hands.", "B": "The children look back at the person.", "C": "The person walks forward while holding hands with the children.", "D": "The person stops walking.", "correct_answer": "C"}, {"question": "Which object performs the action of holding hands?", "A": "The person holds the hands of the children.", "B": "The children hold a toy.", "C": "The person holds a sign.", "D": "The children hold hands with each other.", "correct_answer": "A"}, {"question": "Where in the scene does the walking action take place?", "A": "The walking action takes place along a path in the center of the frame.", "B": "The walking action takes place indoors.", "C": "The walking action takes place in a park.", "D": "The walking action takes place on the left side of the frame.", "correct_answer": "A"}, {"question": "How many times do the children look back at the person?", "A": "The children do not look back at all.", "B": "The children look back at the person multiple times throughout the video.", "C": "The children look back once.", "D": "The children look back only at the beginning.", "correct_answer": "B"}, {"question": "In which direction does the person move?", "A": "The person moves to the right.", "B": "The person moves forward along the path.", "C": "The person moves to the left.", "D": "The person moves backward.", "correct_answer": "B"}, {"question": "How far apart are the children from the person while walking?", "A": "The children are 10 feet away from the person.", "B": "The children are right next to each other, away from the person.", "C": "The children maintain a distance of about 1-2 feet from the person.", "D": "The children are 5 feet away from the person.", "correct_answer": "C"}, {"question": "What type of trajectory does the person follow while walking?", "A": "The person follows a zigzag pattern.", "B": "The person follows a straight line along the path.", "C": "The person walks in a circular motion.", "D": "The person walks in a random direction.", "correct_answer": "B"}, {"question": "Where are the children positioned relative to the person initially?", "A": "One child is on the left and the other is on the right of the person.", "B": "One child is in front and the other is behind the person.", "C": "Both children are in front of the person.", "D": "Both children are behind the person.", "correct_answer": "A"}, {"question": "How does the spatial relationship between the children and the person change?", "A": "The children stay close to the person as they walk.", "B": "The children walk in the opposite direction from the person.", "C": "The children switch places with each other.", "D": "The children move further away from the person.", "correct_answer": "A"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/demo.mp4", "videoName": "demo.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/demo.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/demo.json", "updatedJSONPath": "video_QAs_updated/demo.json", "jsonName": "demo.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video showcases a crafting scene where a person is delicately manipulating flowers, likely for decoration or arrangement.\\n\\n**Action Sequence and Recognition:**\\n- The person first uses their left hand to hold a flower while the right hand performs intricate adjustments.\\n- The right hand is seen pinching and positioning the flower petals.\\n- The left hand assists by stabilizing the flower during the adjustments.\\n\\n**Object-Action Associations:**\\n- The person's left hand holds the flower steady.\\n- The right hand is actively engaged in adjusting the flower's petals.\\n- The flower itself is the primary object being manipulated.\\n\\n**Location-based Motion Analysis:**\\n- The left hand is positioned on the left side of the frame, holding the flower.\\n- The right hand is in the center, performing detailed adjustments to the flower.\\n- The flowers are arranged on a surface that appears to be a crafting table, with a pink backdrop.\\n\\n**Repetition and Frequency Details:**\\n- The right hand performs multiple adjustments, likely repeating the pinching motion several times to achieve the desired look.\\n- The left hand remains steady throughout, providing consistent support.\\n\\n**Traditional Motion Analysis:**\\n- The right hand moves in a precise, controlled manner, making small adjustments to the flower's petals, covering a distance of a few centimeters with each motion.\\n- The left hand remains stationary, maintaining its position while the right hand performs its actions.\\n\\n**Spatial Relationships:**\\n- The left hand is positioned to the left of the flower, while the right hand is directly above it, indicating a close working relationship.\\n- The flowers are closely grouped together, with the person\\u2019s hands actively interacting with them, creating a compact workspace. \\n\\nThis detailed analysis captures the intricate motions and spatial dynamics involved in the crafting process.\"\n}", "questions": [{"question": "What action is the person performing with their right hand?", "A": "The person is cutting the flower with scissors.", "B": "The person is placing the flower on the table.", "C": "The person is holding the flower with their left hand.", "D": "The person is adjusting the flower's petals with their right hand.", "correct_answer": "D"}, {"question": "Which action happens first in the video?", "A": "The person adjusts the flower's petals with their right hand.", "B": "The person stabilizes the flower with their right hand.", "C": "The person picks up another flower.", "D": "The person uses their left hand to hold the flower.", "correct_answer": "D"}, {"question": "What object performs the adjustment motion?", "A": "The table is being adjusted.", "B": "The right hand performs the adjustment motion on the flower.", "C": "The flower itself is moving.", "D": "The left hand holds the flower steady.", "correct_answer": "B"}, {"question": "Where in the scene does the adjustment action take place?", "A": "The adjustment action takes place on the left side of the frame.", "B": "The adjustment action takes place off-screen.", "C": "The adjustment action takes place at the bottom of the frame.", "D": "The adjustment action takes place in the center of the frame.", "correct_answer": "D"}, {"question": "How many times does the right hand adjust the flower's petals?", "A": "The right hand does not adjust the petals at all.", "B": "The right hand adjusts the petals multiple times.", "C": "The right hand adjusts the petals twice.", "D": "The right hand adjusts the petals once.", "correct_answer": "B"}, {"question": "In which direction does the right hand move to adjust the flower?", "A": "The right hand moves upward away from the table.", "B": "The right hand moves in a controlled manner toward the flower.", "C": "The right hand moves diagonally to the left.", "D": "The right hand moves away from the flower.", "correct_answer": "B"}, {"question": "How far does the right hand move during the adjustments?", "A": "The right hand moves a few centimeters with each adjustment.", "B": "The right hand moves several meters.", "C": "The right hand moves across the entire table.", "D": "The right hand does not move at all.", "correct_answer": "A"}, {"question": "What type of trajectory does the right hand follow while adjusting?", "A": "The right hand moves in a circular motion.", "B": "The right hand follows a precise, controlled path.", "C": "The right hand moves randomly.", "D": "The right hand moves in a zigzag pattern.", "correct_answer": "B"}, {"question": "Where is the left hand positioned relative to the flower?", "A": "The left hand is positioned below the flower.", "B": "The left hand is positioned above the flower.", "C": "The left hand is positioned behind the flower.", "D": "The left hand is positioned to the left of the flower.", "correct_answer": "D"}, {"question": "How does the spatial relationship between the hands change during the video?", "A": "The right hand moves away from the flower.", "B": "The left hand moves closer to the flower.", "C": "The right hand moves closer to the flower while the left hand remains steady.", "D": "The left hand moves away from the flower.", "correct_answer": "C"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.mp4", "videoName": "food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.json", "updatedJSONPath": "video_QAs_updated/food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.json", "jsonName": "food_v_M2xdggeRyPI_frame000156__start_7397_end_7469.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video showcases a cooking scene where a person is actively preparing a baking mixture using various kitchen tools and ingredients.\\n\\n**Action Sequence and Recognition:**\\n- The person first reaches for the rolling pin with their right hand.\\n- Then, they stir the baking ingredients in the bowl using the rolling pin.\\n- After stirring, they scrape the mixture from the sides of the bowl back into the center.\\n\\n**Object-Action Associations:**\\n- The person's right hand performs the reaching and stirring actions.\\n- The rolling pin is used to mix the baking ingredients in the bowl.\\n- The bowl contains the baking mixture that is being stirred.\\n\\n**Location-based Motion Analysis:**\\n- The reaching action occurs in the center of the frame where the mixer is located.\\n- The stirring motion takes place in the bowl positioned on the counter.\\n- The scraping action happens within the same bowl, ensuring the mixture is well combined.\\n\\n**Repetition and Frequency Details:**\\n- The stirring motion is repeated several times, with the rolling pin moving in a circular pattern.\\n- The scraping action is performed multiple times to ensure all ingredients are incorporated.\\n\\n**Traditional Motion Analysis:**\\n- The rolling pin moves in a circular motion within the bowl, covering a small area with consistent speed.\\n- The person's hand approaches the bowl from the right side, moving downward approximately 15cm to stir the mixture.\\n- The scraping motion involves a back-and-forth movement along the bowl's edge.\\n\\n**Spatial Relationships:**\\n- Initially, the bowl is centrally located on the counter, with the rolling pin positioned above it.\\n- As the person stirs, the rolling pin remains within the bowl, maintaining close proximity to the bowl's edges.\\n- The final arrangement shows the bowl filled with a well-mixed batter, with the rolling pin resting beside it on the counter.\"\n}", "questions": [{"question": "What action is the person performing with their right hand?", "A": "The person is cleaning the counter with a cloth.", "B": "The person is pouring flour into the bowl.", "C": "The person is reaching for the bowl with their left hand.", "D": "The person is stirring the baking ingredients in the bowl using the rolling pin.", "correct_answer": "D"}, {"question": "Which action happens first in the video?", "A": "The person reaches for the rolling pin with their right hand.", "B": "The person stirs the baking ingredients in the bowl.", "C": "The person places the bowl on the counter.", "D": "The person scrapes the mixture from the sides of the bowl.", "correct_answer": "A"}, {"question": "What object performs the stirring motion?", "A": "The bowl performs the stirring motion with the ingredients.", "B": "The person's left hand performs the stirring motion.", "C": "The rolling pin performs the stirring motion in the bowl.", "D": "The mixer performs the stirring motion in the bowl.", "correct_answer": "C"}, {"question": "Where in the scene does the stirring action take place?", "A": "The stirring action takes place on the kitchen floor.", "B": "The stirring action takes place in the refrigerator.", "C": "The stirring action takes place in the sink.", "D": "The stirring action takes place in the bowl positioned on the counter.", "correct_answer": "D"}, {"question": "How many times does the stirring motion occur?", "A": "The stirring motion occurs three times.", "B": "The stirring motion is repeated several times.", "C": "The stirring motion occurs twice.", "D": "The stirring motion occurs only once.", "correct_answer": "B"}, {"question": "In which direction does the rolling pin move while stirring?", "A": "The rolling pin moves diagonally toward the sink.", "B": "The rolling pin moves straight up and down.", "C": "The rolling pin moves from left to right across the counter.", "D": "The rolling pin moves in a circular motion within the bowl.", "correct_answer": "D"}, {"question": "How far does the person's hand move to stir the mixture?", "A": "The person's hand moves upward 10cm to reach the bowl.", "B": "The person's hand moves downward approximately 15cm to stir the mixture.", "C": "The person's hand moves sideways 20cm to grab the rolling pin.", "D": "The person's hand moves 5cm to the left to adjust the bowl.", "correct_answer": "B"}, {"question": "What type of trajectory does the rolling pin follow while stirring?", "A": "The rolling pin follows a straight line across the counter.", "B": "The rolling pin follows a zigzag pattern on the table.", "C": "The rolling pin follows a downward path into the sink.", "D": "The rolling pin follows a circular motion within the bowl.", "correct_answer": "D"}, {"question": "Where is the bowl positioned relative to the rolling pin initially?", "A": "The bowl is centrally located on the counter, with the rolling pin positioned above it.", "B": "The bowl is behind the rolling pin on the counter.", "C": "The bowl is to the left of the rolling pin on the counter.", "D": "The bowl is on the floor, far from the rolling pin.", "correct_answer": "A"}, {"question": "How does the spatial relationship between the rolling pin and the bowl change during stirring?", "A": "The rolling pin is placed on the floor, far from the bowl.", "B": "The rolling pin is lifted above the bowl and moved to the sink.", "C": "The rolling pin moves away from the bowl to the left side of the counter.", "D": "The rolling pin remains within the bowl, maintaining close proximity to the bowl's edges.", "correct_answer": "D"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.mp4", "videoName": "furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.json", "updatedJSONPath": "video_QAs_updated/furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.json", "jsonName": "furniture_v_U642Lq9uz08_frame000076__start_4450_end_4540.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video features a person standing in front of a quilt display, engaging in a presentation while using hand gestures to emphasize points.\\n\\n**Action Sequence and Recognition:**\\n- The person begins by standing still, facing the camera.\\n- They raise their right hand to gesture, indicating a point of interest.\\n- The left hand is also used to support or emphasize the right hand's gestures.\\n- The person smiles and appears to be speaking throughout the video.\\n\\n**Object-Action Associations:**\\n- The person uses their right hand to perform gestures while speaking.\\n- The left hand occasionally interacts with the right hand, creating a dynamic of support.\\n- The quilts and wooden furniture in the background serve as a visual context but are not actively manipulated.\\n\\n**Location-based Motion Analysis:**\\n- The person is centrally positioned in front of the quilt display, which occupies the background.\\n- Hand gestures occur primarily in the center of the frame, drawing attention to the person\\u2019s actions.\\n\\n**Repetition and Frequency Details:**\\n- The person raises their right hand multiple times, with gestures occurring in a rhythmic pattern as they speak.\\n- Smiling is a consistent behavior throughout the video, enhancing engagement.\\n\\n**Traditional Motion Analysis:**\\n- The right hand moves upward and outward in a diagonal direction, with gestures covering a distance of approximately 20cm.\\n- The left hand remains relatively stationary, occasionally moving to support the right hand's gestures.\\n- The person's overall movement is minimal, focusing on hand gestures rather than significant body movement.\\n\\n**Spatial Relationships:**\\n- The person is positioned in the center of the frame, with the quilts and wooden furniture on either side, creating a balanced visual composition.\\n- The distance between the person and the quilts is consistent, maintaining a clear focus on the speaker while the background provides context. \\n\\nOverall, the video captures a presentation style that emphasizes hand gestures and facial expressions, enhancing the communication of ideas related to the quilts displayed behind the person.\"\n}", "questions": [{"question": "What action is the person performing with their right hand?", "A": "The person is raising their right hand to gesture.", "B": "The person is holding a quilt with both hands.", "C": "The person is clapping their hands together.", "D": "The person is pointing to the wooden furniture.", "correct_answer": "A"}, {"question": "Which action happens first in the video?", "A": "The person stands still facing the camera.", "B": "The person picks up a quilt.", "C": "The person smiles at the audience.", "D": "The person raises their left hand.", "correct_answer": "A"}, {"question": "Which object performs the gesture in the video?", "A": "The quilts perform the gesture.", "B": "The books perform the gesture.", "C": "The wooden furniture performs the gesture.", "D": "The person's right hand performs the gesture.", "correct_answer": "D"}, {"question": "Where in the scene does the hand gesture take place?", "A": "The hand gesture takes place on the left side of the frame.", "B": "The hand gesture takes place in the center of the frame.", "C": "The hand gesture takes place behind the quilts.", "D": "The hand gesture takes place near the wooden furniture.", "correct_answer": "B"}, {"question": "How many times does the person raise their right hand?", "A": "The person raises their right hand once.", "B": "The person does not raise their right hand at all.", "C": "The person raises their right hand multiple times.", "D": "The person raises their right hand twice.", "correct_answer": "C"}, {"question": "In which direction does the right hand move?", "A": "The right hand moves upward and outward in a diagonal direction.", "B": "The right hand moves to the left side.", "C": "The right hand moves straight down.", "D": "The right hand moves backward.", "correct_answer": "A"}, {"question": "How far does the right hand travel during the gestures?", "A": "The right hand covers a distance of 50cm.", "B": "The right hand covers a distance of 5cm.", "C": "The right hand does not move at all.", "D": "The right hand covers a distance of approximately 20cm.", "correct_answer": "D"}, {"question": "What type of trajectory does the right hand follow?", "A": "The right hand follows a straight downward trajectory.", "B": "The right hand follows a diagonal upward trajectory.", "C": "The right hand follows a circular trajectory.", "D": "The right hand follows a zigzag trajectory.", "correct_answer": "B"}, {"question": "Where is the person positioned relative to the quilts?", "A": "The person is positioned behind the quilts.", "B": "The person is positioned in the center in front of the quilts.", "C": "The person is positioned to the right of the quilts.", "D": "The person is positioned to the left of the quilts.", "correct_answer": "B"}, {"question": "How does the spatial relationship between the person and the quilts change?", "A": "The person moves further away from the quilts.", "B": "The quilts move closer to the person.", "C": "The distance between the person and the quilts remains consistent.", "D": "The person moves closer to the quilts.", "correct_answer": "C"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.mp4", "videoName": "gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.json", "updatedJSONPath": "video_QAs_updated/gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.json", "jsonName": "gardening_v_T_21bzVSeCk_frame000017__start_915_end_1005.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a gardening scene where a person interacts with various plants and flowers, performing actions related to gardening and plant care.\\n\\n**Action Sequence and Recognition:**\\n- The person first raises their left hand to touch the leaves of a plant.\\n- Then, they use their right hand to adjust the position of a potted plant.\\n- Following this, the person reaches down to the soil, possibly to check or tend to it.\\n\\n**Object-Action Associations:**\\n- The person's left hand is used to interact with the leaves of the plants.\\n- The right hand is actively involved in repositioning the potted plants.\\n- The soil is the focus of the person's attention as they bend down to it.\\n\\n**Location-based Motion Analysis:**\\n- The interaction with the leaves occurs in the upper-left area of the frame.\\n- The adjustment of the potted plant takes place in the center of the scene.\\n- The action involving the soil is performed in the lower part of the frame.\\n\\n**Repetition and Frequency Details:**\\n- The person touches the leaves of the plants 2 times during the interaction.\\n- The adjustment of the potted plant is done once but involves multiple small movements.\\n\\n**Traditional Motion Analysis:**\\n- The left hand moves upward and slightly forward to touch the leaves, covering a distance of about 20cm at a moderate speed.\\n- The right hand moves horizontally to reposition the potted plant, traveling approximately 15cm in a straight line.\\n- The person bends down to the soil, moving downward about 30cm at a slow pace.\\n\\n**Spatial Relationships:**\\n- Initially, the person is positioned in the center of the frame, with the potted plants and flowers surrounding them.\\n- The left hand is positioned near the plants on the left side, while the right hand is closer to the center where the potted plants are located.\\n- As the person interacts with the soil, they lean forward, reducing the distance to the soil and plants, creating a more compact spatial arrangement.\"\n}", "questions": [{"question": "What action is the person performing with their left hand?", "A": "The person is holding a flower with their left hand.", "B": "The person is adjusting a potted plant with their left hand.", "C": "The person is raising their left hand to touch the leaves of a plant.", "D": "The person is watering the plants with their left hand.", "correct_answer": "C"}, {"question": "Which action happens first in the video?", "A": "The person bends down to the soil.", "B": "The person raises their left hand to touch the leaves.", "C": "The person picks up a flower.", "D": "The person adjusts the position of a potted plant.", "correct_answer": "B"}, {"question": "Which object performs the action of touching the leaves?", "A": "The potted plants perform the action of touching the leaves.", "B": "The person's right hand performs the action of touching the leaves.", "C": "The flowers perform the action of touching the leaves.", "D": "The person's left hand performs the action of touching the leaves.", "correct_answer": "D"}, {"question": "Where in the scene does the interaction with the leaves take place?", "A": "The interaction with the leaves takes place in the center of the scene.", "B": "The interaction with the leaves takes place on the right side of the frame.", "C": "The interaction with the leaves takes place in the upper-left area of the frame.", "D": "The interaction with the leaves takes place in the lower part of the frame.", "correct_answer": "C"}, {"question": "How many times does the person touch the leaves of the plants?", "A": "The person touches the leaves of the plants 3 times.", "B": "The person does not touch the leaves of the plants.", "C": "The person touches the leaves of the plants once.", "D": "The person touches the leaves of the plants 2 times.", "correct_answer": "D"}, {"question": "In which direction does the person's left hand move to touch the leaves?", "A": "The left hand moves backward to touch the leaves.", "B": "The left hand moves downward to touch the leaves.", "C": "The left hand moves upward and slightly forward to touch the leaves.", "D": "The left hand moves horizontally to touch the leaves.", "correct_answer": "C"}, {"question": "How far does the person's left hand move to touch the leaves?", "A": "The left hand covers a distance of about 30cm to touch the leaves.", "B": "The left hand covers a distance of about 10cm to touch the leaves.", "C": "The left hand does not move to touch the leaves.", "D": "The left hand covers a distance of about 20cm to touch the leaves.", "correct_answer": "D"}, {"question": "What path does the person's right hand follow to adjust the potted plant?", "A": "The right hand moves upward to adjust the potted plant.", "B": "The right hand moves horizontally in a straight line to adjust the potted plant.", "C": "The right hand moves diagonally to adjust the potted plant.", "D": "The right hand moves downward to adjust the potted plant.", "correct_answer": "B"}, {"question": "Where is the soil located in relation to the person?", "A": "The soil is located in the upper part of the frame, away from the person.", "B": "The soil is located in the lower part of the frame, closer to the person.", "C": "The soil is located on the right side of the frame, away from the person.", "D": "The soil is located in the center of the frame, far from the person.", "correct_answer": "B"}, {"question": "How does the spatial relationship between the person and the soil change as they interact with it?", "A": "The person stays in the same position relative to the soil.", "B": "The person moves backward, increasing the distance to the soil.", "C": "The person leans forward, reducing the distance to the soil.", "D": "The person moves to the left, away from the soil.", "correct_answer": "C"}], "hasError": false, "comment": ""}, {"videoFile": "Videos_crop/repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.mp4", "videoName": "repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.mp4", "bodyhandsVideoFile": "video_general_obj_det_finished/repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.mp4", "originalJSONPath": "videos_captions_gpt4o_mini/repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.json", "updatedJSONPath": "video_QAs_updated/repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.json", "jsonName": "repair_v_m5FagzLdbbM_frame000198__start_11763_end_11853.json", "jsonContent": "{\n    \"gpt4o_mini_res\": \"The video depicts a person engaged in a technical task involving the disassembly or repair of a device, utilizing various tools and components.\\n\\n**Action Sequence and Recognition:**\\n- The person first uses their left hand to hold a circuit board while manipulating it.\\n- They then use a tool, likely a screwdriver, to interact with the screen of the device.\\n- The left hand is consistently positioned to support the circuit board throughout the task.\\n\\n**Object-Action Associations:**\\n- The person's left hand is actively holding the circuit board and manipulating the screen.\\n- The tools are used to perform actions on the device, specifically the screen and circuit board.\\n\\n**Location-based Motion Analysis:**\\n- The actions occur primarily in the center of the frame, where the device is located.\\n- The left hand is positioned on the left side of the device, while the tools are scattered around the workspace.\\n\\n**Repetition and Frequency Details:**\\n- The left hand appears to make several adjustments to the circuit board, with at least 4 distinct movements observed.\\n- The tool is used multiple times to interact with the screen, indicating a repetitive action.\\n\\n**Traditional Motion Analysis:**\\n- The left hand moves in a downward direction towards the circuit board, covering a distance of approximately 10cm with moderate speed.\\n- The tool is manipulated in a back-and-forth motion, indicating a precise and controlled action.\\n\\n**Spatial Relationships:**\\n- The circuit board is positioned directly above the screen, with the left hand maintaining a close proximity to both.\\n- The tools are located to the left of the device, indicating a workspace arrangement that facilitates easy access during the task. \\n\\nOverall, the video captures a focused technical activity with clear interactions between the person, tools, and components of the device.\"\n}", "questions": [{"question": "What action is the person performing with their left hand?", "A": "The person is using a screwdriver with their right hand.", "B": "The person is holding the circuit board with their left hand.", "C": "The person is placing tools on the table.", "D": "The person is adjusting the screen with their right hand.", "correct_answer": "B"}, {"question": "Which action happens first in the video?", "A": "The person manipulates the screen with a tool.", "B": "The person uses their left hand to hold the circuit board.", "C": "The person gathers tools from the workspace.", "D": "The person adjusts the circuit board multiple times.", "correct_answer": "B"}, {"question": "Which object performs the action of holding the circuit board?", "A": "The right hand is used to manipulate the screen.", "B": "The tools are scattered around the workspace.", "C": "The screwdriver interacts with the screen.", "D": "The person's left hand holds the circuit board.", "correct_answer": "D"}, {"question": "Where in the scene does the manipulation of the screen take place?", "A": "The manipulation of the screen occurs on the left side of the device.", "B": "The manipulation of the screen happens off-screen.", "C": "The manipulation of the screen is done on the table.", "D": "The manipulation of the screen takes place in the center of the frame.", "correct_answer": "D"}, {"question": "How many distinct movements does the left hand make with the circuit board?", "A": "The left hand makes at least 4 distinct movements with the circuit board.", "B": "The left hand does not move at all during the video.", "C": "The left hand makes 10 movements with the circuit board.", "D": "The left hand makes 2 movements with the circuit board.", "correct_answer": "A"}, {"question": "In which direction does the left hand move towards the circuit board?", "A": "The left hand moves diagonally across the screen.", "B": "The left hand moves to the right side of the device.", "C": "The left hand moves downward towards the circuit board.", "D": "The left hand moves upward away from the circuit board.", "correct_answer": "C"}, {"question": "How far does the left hand move towards the circuit board?", "A": "The left hand covers a distance of approximately 10cm.", "B": "The left hand moves 5cm towards the circuit board.", "C": "The left hand moves 20cm towards the circuit board.", "D": "The left hand does not move at all.", "correct_answer": "A"}, {"question": "What type of trajectory does the tool follow when interacting with the screen?", "A": "The tool is thrown across the room.", "B": "The tool is manipulated in a back-and-forth motion.", "C": "The tool is dropped onto the table.", "D": "The tool moves in a circular path around the device.", "correct_answer": "B"}, {"question": "Where is the circuit board positioned relative to the screen?", "A": "The circuit board is positioned directly above the screen.", "B": "The circuit board is below the screen.", "C": "The circuit board is to the left of the screen.", "D": "The circuit board is far away from the screen.", "correct_answer": "A"}], "hasError": false, "comment": ""}];
window.ANNOTATIONS_PATH = "/Users/yulu/Downloads/FoundationMotion_addUI/data_annotate/video_annotations/user_annotations.json";
window.ANNOTATIONS = {};

// Debug: Log all video paths
console.log('Total videos loaded:', FILE_DATA.length);
FILE_DATA.forEach((item, index) => {
    console.log('Video ' + (index + 1) + ': ' + item.videoName + ' -> ' + item.videoFile);
    if (item.bodyhandsVideoFile) {
        console.log('  Processed: ' + item.bodyhandsVideoFile);
    }
});

// Initialize annotations from FILE_DATA
FILE_DATA.forEach(item => {
    ANNOTATIONS[item.videoName] = {
        hasError: item.hasError || false,
        comment: item.comment || ''
    };
});

let currentIndex = 0;
if (localStorage.getItem('video_json_viewer_index') !== null) {
    currentIndex = parseInt(localStorage.getItem('video_json_viewer_index'), 10);
}
if (isNaN(currentIndex) || currentIndex < 0 || currentIndex >= FILE_DATA.length) {
    currentIndex = 0;
}

const videoPlayer = document.getElementById('videoPlayer');
const bodyhandsVideoPlayer = document.getElementById('bodyhandsVideoPlayer');
const videoNameEl = document.getElementById('videoName');
const jsonNameEl = document.getElementById('jsonName');
const jsonEditor = document.getElementById('jsonEditor');
const indexDisplay = document.getElementById('indexDisplay');
const qaContainer = document.getElementById('qaContainer');
const qaCounter = document.getElementById('qaCounter');
const errorCheckbox = document.getElementById('errorCheckbox');
const commentTextarea = document.getElementById('commentTextarea');
const annotationStatus = document.getElementById('annotationStatus');

document.getElementById('prevBtn').addEventListener('click', () => {
    goToIndex(currentIndex - 1);
});
document.getElementById('nextBtn').addEventListener('click', () => {
    goToIndex(currentIndex + 1);
});
document.getElementById('saveBtn').addEventListener('click', onSaveClick);
document.getElementById('saveAnnotationBtn').addEventListener('click', saveAnnotation);

/**
 * Displays the questions in a beautiful format
 */
function displayQuestions(questions) {
    qaCounter.textContent = questions.length;
    qaContainer.innerHTML = '';
    
    if (questions.length === 0) {
        qaContainer.innerHTML = '<div style="text-align: center; color: #666; padding: 20px;">No questions available for this video</div>';
        return;
    }
    
    questions.forEach((qa, index) => {
        const qaItem = document.createElement('div');
        qaItem.className = 'qa-item';
        
        let questionHtml = `<div class="qa-question">${index + 1}. ${qa.question || 'No question available'}</div>`;
        
        if (qa.A || qa.B || qa.C || qa.D) {
            // Multiple choice question
            let optionsHtml = '<div class="qa-options">';
            ['A', 'B', 'C', 'D'].forEach(option => {
                if (qa[option]) {
                    const isCorrect = qa.correct_answer === option;
                    const optionClass = isCorrect ? 'qa-option correct' : 'qa-option';
                    optionsHtml += `<div class="${optionClass}"><span class="qa-option-label">${option}:</span>${qa[option]}</div>`;
                }
            });
            optionsHtml += '</div>';
            qaItem.innerHTML = questionHtml + optionsHtml;
        } else if (qa.answer) {
            // Simple Q&A format
            qaItem.innerHTML = questionHtml + `<div class="qa-options"><div class="qa-option"><strong>Answer:</strong> ${qa.answer}</div></div>`;
        } else {
            // Question only
            qaItem.innerHTML = questionHtml;
        }
        
        qaContainer.appendChild(qaItem);
    });
}

/**
 * Loads the video and JSON at FILE_DATA[idx]
 * Replaces literal '\n' in the JSON with real newlines in the editor.
 */
 
function loadFileByIndex(idx) {
    if (idx < 0 || idx >= FILE_DATA.length) return;

    const data = FILE_DATA[idx];
    console.log('Loading video ' + (idx + 1) + '/' + FILE_DATA.length + ': ' + data.videoName);
    console.log('Video path: ' + data.videoFile);
    
    videoPlayer.src = data.videoFile;
    videoNameEl.textContent = data.videoName;
    jsonNameEl.textContent = data.jsonName;
    
    // Add error handling for video loading
    videoPlayer.onerror = function(e) {
        console.error('Error loading video ' + data.videoName + ':', e);
        console.error('Video path: ' + data.videoFile);
    };
    
    videoPlayer.onloadstart = function() {
        console.log('Started loading video: ' + data.videoName);
    };
    
    videoPlayer.oncanplay = function() {
        console.log('Video ready to play: ' + data.videoName);
    };
    
    // Load bodyhands video if available
    if (data.bodyhandsVideoFile) {
        console.log('Loading processed video: ' + data.bodyhandsVideoFile);
        bodyhandsVideoPlayer.src = data.bodyhandsVideoFile;
        bodyhandsVideoPlayer.style.display = 'block';
        
        // Add error handling for processed video
        bodyhandsVideoPlayer.onerror = function(e) {
            console.error('Error loading processed video ' + data.videoName + ':', e);
            console.error('Processed video path: ' + data.bodyhandsVideoFile);
        };
    } else {
        bodyhandsVideoPlayer.src = '';
        bodyhandsVideoPlayer.style.display = 'none';
    }

    try {
        // Try to parse & re-stringify so we have pretty JSON
        const obj = JSON.parse(data.jsonContent);
        let pretty = JSON.stringify(obj, null, 2);

        jsonEditor.value = pretty;
    } catch (e) {
        // If parse fails, just show raw content
        jsonEditor.value = data.jsonContent;
    }

    // Display QA data if available
    displayQuestions(data.questions || []);
    
    // Load annotation data
    const annotation = ANNOTATIONS[data.videoName] || { hasError: false, comment: '' };
    errorCheckbox.checked = annotation.hasError;
    commentTextarea.value = annotation.comment;
    
    // Apply visual styling based on error status
    if (annotation.hasError) {
        qaContainer.classList.add('error-marker');
    } else {
        qaContainer.classList.remove('error-marker');
    }

    indexDisplay.textContent = (idx + 1) + " / " + FILE_DATA.length;
    
    // Clear annotation status
    annotationStatus.textContent = '';
}

function goToIndex(idx) {
    if (idx < 0) idx = 0;
    if (idx >= FILE_DATA.length) idx = FILE_DATA.length - 1;
    currentIndex = idx;
    localStorage.setItem('video_json_viewer_index', currentIndex);
    loadFileByIndex(currentIndex);
}

/**
 * Saves the user's annotation for the current video
 */
function saveAnnotation() {
    const data = FILE_DATA[currentIndex];
    const videoName = data.videoName;
    
    // Update annotation in memory
    ANNOTATIONS[videoName] = {
        hasError: errorCheckbox.checked,
        comment: commentTextarea.value
    };
    
    // Update FILE_DATA to reflect the change
    data.hasError = errorCheckbox.checked;
    data.comment = commentTextarea.value;
    
    // Apply visual styling based on error status
    if (errorCheckbox.checked) {
        qaContainer.classList.add('error-marker');
    } else {
        qaContainer.classList.remove('error-marker');
    }
    
    // Save all annotations to file
    const blob = new Blob([JSON.stringify(ANNOTATIONS, null, 2)], { type: 'application/json' });
    const url = URL.createObjectURL(blob);
    
    // Use download attribute to save to the specific path
    const a = document.createElement('a');
    a.href = url;
    a.download = "user_annotations.json";
    document.body.appendChild(a);
    a.click();
    document.body.removeChild(a);
    URL.revokeObjectURL(url);
    
    annotationStatus.textContent = 'Annotation saved! Place the file in video_annotations/ folder.';
    setTimeout(() => {
        annotationStatus.textContent = '';
    }, 3000);
}

/**
 * When saving, we do the reverse: 
 * turn real newlines in the editor into '\n', then parse to confirm it's valid JSON,
 * then let the user download the final text.
 */
function onSaveClick() {
    const data = FILE_DATA[currentIndex];
    let textValue = jsonEditor.value;


    try {
        // Parse to ensure it's valid JSON
        const obj = JSON.parse(textValue);
        // Re-stringify so the saved file is nicely formatted
        const formatted = JSON.stringify(obj, null, 2);

        const blob = new Blob([formatted], { type: 'application/json' });
        const url = URL.createObjectURL(blob);

        const a = document.createElement('a');
        a.href = url;
        a.download = data.jsonName; 
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
        URL.revokeObjectURL(url);

        alert("JSON valid. File downloaded. Place it in 'video_QAs_updated' to load next time.");

    } catch (err) {
        // If there's any parse error, show it
        alert("Error: JSON not valid. " + err);
    }
}

// Initial load
loadFileByIndex(currentIndex);
</script>

</body>
</html>
