{
  "video_path": "./ref_datasets/part2/6332244003.mp4",
  "start_time": 0.0,
  "end_time": 0.16,
  "image_path": "./004299.jpg",
  "represents_multiple_types": true,
  "original_detections": [
    {
      "video_path": "./ref_datasets/part2/6332244003.mp4",
      "start_time": 0.0,
      "end_time": 0.16,
      "image_path": "./ref_datasets/extracted_frames/person_871664421922354158_1_6332244003.mp4_0.00_0.16.jpg",
      "type": "person"
    },
    {
      "video_path": "./ref_datasets/part2/6332244003.mp4",
      "start_time": 0.0,
      "end_time": 0.16,
      "image_path": "./ref_datasets/extracted_frames/face_871664421922354158_1_6332244003.mp4_0.00_0.16.jpg",
      "type": "face"
    }
  ],
  "cross_video_duplicate": false,
  "types": [
    "person",
    "face"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a white long-sleeved shirt with thin horizontal black stripes and a beige knitted beanie hat. He also wears round black-framed glasses.",
        "clothing": [
          {
            "possible_names": [
              "shirt",
              "long-sleeved shirt",
              "striped shirt"
            ],
            "name": "long sleeved shirt",
            "type": "top",
            "color": [
              "white",
              "black"
            ]
          },
          {
            "possible_names": [
              "beanie",
              "knitted hat",
              "winter hat"
            ],
            "name": "beanie",
            "type": "headwear",
            "color": [
              "beige"
            ]
          },
          {
            "possible_names": [
              "glasses",
              "eyeglasses",
              "spectacles"
            ],
            "name": "glasses",
            "type": "accessory",
            "color": [
              "black"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "ring light",
              "lighting equipment"
            ],
            "name": "ring light",
            "position": "other"
          }
        ],
        "description": "An adult male appears in the foreground, wearing a white long-sleeved shirt with thin black stripes, a beige beanie, and black-framed glasses. His expression is neutral. A ring light is positioned near him, suggesting he might be involved in some form of video recording or streaming activity. The setting includes a softly lit room with curtains in the background.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to have a neutral expression, neither displaying strong emotions nor engaging in an expressive gesture.",
        "meaningful": true,
        "story": "The person seems to be engaged in a video recording session, possibly for a vlog or a tutorial. The setup includes a ring light and a camera, suggesting a focus on creating content with good lighting and clear visuals.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is seated indoors, wearing a striped shirt and a beanie, and appears to be engaged in a video recording session as indicated by the ring light and camera setup in front of them. They are gesturing with their hands, possibly explaining or discussing something, which suggests they might be communicating information or sharing an opinion. The individual's focused expression and direct gaze towards the camera imply that they are actively involved in the content creation process, likely aiming to connect with an audience through their presentation. The setting, with its soft lighting and casual attire, creates an atmosphere conducive to informal yet engaging dialogue.",
        "intention": "The individual is intentionally creating engaging content to communicate ideas and connect with an audience through a focused and informal presentation",
        "intention_ok": true
      },
      "hoi": [
        {
          "relevant": true,
          "relationship": {
            "standalone": false,
            "position": "standalone",
            "action": [
              [
                "standalone",
                "facing"
              ]
            ]
          },
          "object": 0
        }
      ],
      "facex_detailing": {
        "landmarks": [
          [
            0.33983813978764893,
            0.35314070900912004
          ],
          [
            0.3389386591651211,
            0.39916347020832943
          ],
          [
            0.3387085680192935,
            0.4451205772382242
          ],
          [
            0.3421794359622124,
            0.49094329839030276
          ],
          [
            0.35000180325005203,
            0.5312827633802223
          ],
          [
            0.3643280400761536,
            0.5658655764880004
          ],
          [
            0.3817693875836475,
            0.5941962385934497
          ],
          [
            0.40015775337815285,
            0.6146731125614631
          ],
          [
            0.41855638346169144,
            0.6208503002212161
          ],
          [
            0.43400043132714927,
            0.6117319037674596
          ],
          [
            0.4453858622416321,
            0.5845167807170324
          ],
          [
            0.4548757514317653,
            0.5587445182775064
          ],
          [
            0.46225678564847583,
            0.5296190242603342
          ],
          [
            0.4679314532383744,
            0.4958077701311262
          ],
          [
            0.47280645397092613,
            0.46583832606436715
          ],
          [
            0.47733108915521627,
            0.43152279953161876
          ],
          [
            0.48170639391589376,
            0.397407558024245
          ],
          [
            0.37498537542498006,
            0.35960401594638824
          ],
          [
            0.3881732029507735,
            0.34159551213973416
          ],
          [
            0.4015319834995483,
            0.34428349764101085
          ],
          [
            0.41582676547113806,
            0.3491068972481622
          ],
          [
            0.4281276552937925,
            0.35951926112963406
          ],
          [
            0.44935005191447475,
            0.36578068302737343
          ],
          [
            0.459658687136003,
            0.36092515299244526
          ],
          [
            0.46935113053768873,
            0.3614097352696474
          ],
          [
            0.47785500689808813,
            0.364056725192953
          ],
          [
            0.4821638063128505,
            0.3793522174396212
          ],
          [
            0.43913580912963623,
            0.3994584546360389
          ],
          [
            0.43929722256559345,
            0.42681222006126684
          ],
          [
            0.4401623773987272,
            0.4530595259376303
          ],
          [
            0.43930752169606946,
            0.478829522328402
          ],
          [
            0.4185811836006386,
            0.48826338951549836
          ],
          [
            0.4258397571143827,
            0.493304214969514
          ],
          [
            0.43335650571888046,
            0.502232171461065
          ],
          [
            0.4406318449348744,
            0.49937287726730273
          ],
          [
            0.4461156789745603,
            0.4927564129942939
          ],
          [
            0.3895046895902072,
            0.3892967877091554
          ],
          [
            0.3972558096117739,
            0.3854816649641309
          ],
          [
            0.4092914445625086,
            0.38568044949776287
          ],
          [
            0.4154458927556074,
            0.3961617670992695
          ],
          [
            0.40733688143414576,
            0.3984632611432404
          ],
          [
            0.3969080920131611,
            0.39579148248389917
          ],
          [
            0.44768507692164605,
            0.4027350692679642
          ],
          [
            0.4548635569268039,
            0.3955895897888002
          ],
          [
            0.46517732080870444,
            0.39659574605800485
          ],
          [
            0.46961605093175807,
            0.4044377905508829
          ],
          [
            0.46473380317911506,
            0.4111192659253166
          ],
          [
            0.4548437251775924,
            0.40881709819117545
          ],
          [
            0.3984965830708721,
            0.5205273304351423
          ],
          [
            0.41186415759979617,
            0.523954514820109
          ],
          [
            0.4246836901708905,
            0.5246621344770704
          ],
          [
            0.4297483955909099,
            0.5294413231668018
          ],
          [
            0.4352178192098758,
            0.5265464458831404
          ],
          [
            0.4417628121030118,
            0.5302683084730118
          ],
          [
            0.4477727658647511,
            0.5314925872459614
          ],
          [
            0.44148367640030173,
            0.5409011910516749
          ],
          [
            0.4354158858436027,
            0.5457543631710073
          ],
          [
            0.42872843325936366,
            0.5480284043405422
          ],
          [
            0.419643108965829,
            0.5438886088668986
          ],
          [
            0.40852897442943814,
            0.5353998677440421
          ],
          [
            0.4007891221304557,
            0.5219412529910052
          ],
          [
            0.41871600604749154,
            0.5299797547241998
          ],
          [
            0.4294693016979311,
            0.5345298581337802
          ],
          [
            0.4361280031236155,
            0.5327682502055295
          ],
          [
            0.4459387680648693,
            0.5323830831618536
          ],
          [
            0.43691334317970487,
            0.5333164502703954
          ],
          [
            0.42972395083467874,
            0.535240203173703
          ],
          [
            0.4185624807141721,
            0.5291992846620145
          ]
        ],
        "visibility": [
          0.9202705025672913,
          4.418292064656271e-06,
          0.9993013143539429,
          2.3273230453924043e-08,
          0.9999980926513672,
          1.0,
          0.00023416729527525604,
          2.714077481869026e-06,
          0.17239339649677277,
          4.882850200560451e-12,
          0.008998877368867397,
          1.7815702449297532e-05,
          0.9999998807907104,
          0.0032980055548250675,
          0.024946069344878197,
          1.4809230231005444e-12,
          3.6274555895943195e-05,
          1.0910122805185551e-09,
          3.3727191178778426e-11,
          0.9810934662818909,
          3.0443314535943955e-11,
          2.3601224770842618e-18,
          8.69146132005803e-17,
          5.785796104371133e-14,
          1.2698851776442137e-14,
          2.601937512595033e-16,
          1.058830215346085e-12,
          4.574133498184974e-13,
          2.364678730604197e-10
        ],
        "headpose": {
          "pitch": -9.59871072742904,
          "yaw": -18.066198694331398,
          "roll": 7.437100996639867
        },
        "attributes": {
          "5 oClock Shadow": 0.9159678816795349,
          "Arched Eyebrows": 0.002047695219516754,
          "Attractive": 0.22510726749897003,
          "Bags Under Eyes": 0.35478320717811584,
          "Bald": 9.091598622035235e-05,
          "Bangs": 0.010021368972957134,
          "Big Lips": 0.23751269280910492,
          "Big Nose": 0.5191665887832642,
          "Black Hair": 0.007453979458659887,
          "Blond Hair": 0.007164246868342161,
          "Blurry": 0.000525330426171422,
          "Brown Hair": 0.038386352360248566,
          "Bushy Eyebrows": 0.028899317607283592,
          "Chubby": 0.08718208968639374,
          "Double Chin": 0.06222441792488098,
          "Eyeglasses": 0.002659386955201626,
          "Goatee": 0.036004383116960526,
          "Gray Hair": 0.001246760948561132,
          "Heavy Makeup": 0.00016607741417828947,
          "High Cheekbones": 0.05811341479420662,
          "Male": 0.9999576807022095,
          "Mouth Slightly Open": 0.025009483098983765,
          "Mustache": 0.005888982210308313,
          "Narrow Eyes": 0.025974201038479805,
          "No Beard": 0.17345067858695984,
          "Oval Face": 0.227004274725914,
          "Pale Skin": 0.012559020891785622,
          "Pointy Nose": 0.06195242330431938,
          "Receding Hairline": 4.800517490366474e-05,
          "Rosy Cheeks": 0.003858874086290598,
          "Sideburns": 0.3050958812236786,
          "Smiling": 0.04735671728849411,
          "Straight Hair": 0.009263693355023861,
          "Wavy Hair": 0.012543892487883568,
          "Wearing Earrings": 0.0008772918954491615,
          "Wearing Hat": 0.988616406917572,
          "Wearing Lipstick": 0.00044005492236465216,
          "Wearing Necklace": 0.0035862578079104424,
          "Wearing Necktie": 0.04204598069190979,
          "Young": 0.9425758123397827
        },
        "age": [
          0.005435806233435869,
          0.6172308921813965,
          0.9911894798278809,
          0.819367527961731,
          0.16578862071037292,
          0.0027344650588929653,
          4.572878242470324e-05,
          2.233728764622356e-06
        ],
        "race": [
          0.9945313334465027,
          0.011861596256494522,
          0.4445134103298187,
          0.03772011399269104,
          0.1615145206451416
        ],
        "gender": [
          0.9986743927001953,
          0.0029341760091483593
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 3.4578327756662475e-06,
          "disgust": 2.749378155443425e-14,
          "fear": 2.9819031510669447e-05,
          "happy": 36.77372336387634,
          "sad": 0.03375257656443864,
          "surprise": 1.0090253285688355e-10,
          "neutral": 63.192492723464966
        },
        "dominant_emotion": "neutral",
        "region": {
          "x": 0,
          "y": 0,
          "w": 837,
          "h": 970,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 31,
        "gender": {
          "Woman": 1.6537100076675415,
          "Man": 98.34628701210022
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 0.02587593626230955,
          "indian": 0.07205893052741885,
          "black": 0.003376244058017619,
          "white": 89.38086032867432,
          "middle eastern": 8.371935784816742,
          "latino hispanic": 2.1458929404616356
        },
        "dominant_race": "white"
      }
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.15957097709178925,
        0.11275524646043777,
        0.6436247825622559,
        0.987280011177063
      ]
    ],
    "face_boxes": [
      [
        0.3464990258216858,
        0.3092906177043915,
        0.4827791154384613,
        0.6091755628585815
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.3914439012474484,
            0.7215970083301441
          ],
          [
            0.22640935320324365,
            0.7413457447834566
          ],
          [
            0.19766459708743628,
            0.9914964065254175
          ],
          [
            0.33742358371946546,
            0.9896155744822449
          ],
          [
            0.5564784492916531,
            0.7018482718768314
          ],
          [
            0.6010823811954922,
            0.9896155744822449
          ],
          [
            0.6704662752681309,
            0.9783305822232089
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.40185148535834425,
            0.3933918167965184
          ],
          [
            0.46231459305021494,
            0.40467680905555437
          ],
          [
            0.33742358371946546,
            0.40843847314189957
          ],
          [
            0.48213856278525463,
            0.42348512948728073
          ]
        ],
        "dw_hand_1": [
          [
            0.6704662752681309,
            0.9820922463095542
          ],
          [
            0.6704662752681309,
            0.9689264220073457
          ],
          [
            0.6704662752681309,
            0.9576414297483099
          ],
          [
            0.6704662752681309,
            0.9388331093165836
          ],
          [
            0.6704662752681309,
            0.9275481170575478
          ],
          [
            0.6704662752681309,
            0.9576414297483099
          ],
          [
            0.6704662752681309,
            0.9576414297483099
          ],
          [
            0.6704662752681309,
            0.9482372695324467
          ],
          [
            0.6704662752681309,
            0.942594773402929
          ],
          [
            0.6704662752681309,
            0.9670455899641733
          ],
          [
            0.6704662752681309,
            0.9651647579210004
          ],
          [
            0.6704662752681309,
            0.9614030938346553
          ],
          [
            0.6704662752681309,
            0.9538797656619648
          ],
          [
            0.6704662752681309,
            0.9783305822232089
          ],
          [
            0.6704662752681309,
            0.9764497501800363
          ],
          [
            0.6704662752681309,
            0.9670455899641733
          ],
          [
            0.6704662752681309,
            0.9651647579210004
          ],
          [
            0.6704662752681309,
            0.9858539103958993
          ],
          [
            0.6704662752681309,
            0.9820922463095542
          ],
          [
            0.6704662752681309,
            0.9783305822232089
          ],
          [
            0.6704662752681309,
            0.9764497501800363
          ]
        ],
        "dw_hand_2": [
          [
            0.3463443701002333,
            0.9896155744822449
          ],
          [
            0.3552651564810011,
            0.9820922463095542
          ],
          [
            0.365177141348521,
            0.9595222617914827
          ],
          [
            0.3750891262160408,
            0.9294289491007204
          ],
          [
            0.3830187141100566,
            0.8974548043667855
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_face": [
          [
            0.3413883776664734,
            0.3783451604511375
          ],
          [
            0.3413883776664734,
            0.41220013722824494
          ],
          [
            0.3413883776664734,
            0.44605511400535247
          ],
          [
            0.3433707746399773,
            0.4817909228256325
          ],
          [
            0.3483267670737373,
            0.51564589960274
          ],
          [
            0.3582387519412571,
            0.5457392122935021
          ],
          [
            0.37112433226903274,
            0.5701900288547467
          ],
          [
            0.38698350805706444,
            0.5927600133728181
          ],
          [
            0.40383388233184814,
            0.6059258376750267
          ],
          [
            0.4226666535801357,
            0.6096875017613719
          ],
          [
            0.4395170278549194,
            0.5946408454159907
          ],
          [
            0.4504202112091912,
            0.5683091968115738
          ],
          [
            0.461323394563463,
            0.5382158841208118
          ],
          [
            0.4692529824574789,
            0.5062417393868769
          ],
          [
            0.4761913718647427,
            0.47238676260976936
          ],
          [
            0.47916496732499864,
            0.4385317858326619
          ],
          [
            0.4811473642985026,
            0.4027959770123818
          ],
          [
            0.3770715231895447,
            0.3632985041057563
          ],
          [
            0.3889659050305685,
            0.35577517593306573
          ],
          [
            0.40185148535834425,
            0.3576560079762383
          ],
          [
            0.4137458671993679,
            0.3632985041057563
          ],
          [
            0.4256402490403917,
            0.3708218322784469
          ],
          [
            0.44942901272243924,
            0.38022599249431005
          ],
          [
            0.45834979910320706,
            0.3783451604511375
          ],
          [
            0.4662793869972229,
            0.3764643284079647
          ],
          [
            0.47420897489123875,
            0.3764643284079647
          ],
          [
            0.4811473642985026,
            0.3839876565806555
          ],
          [
            0.43753463088141553,
            0.40467680905555437
          ],
          [
            0.43753463088141553,
            0.4272467935736259
          ],
          [
            0.4385258293681675,
            0.4516976101348704
          ],
          [
            0.4395170278549194,
            0.47426759465294194
          ],
          [
            0.41771066114637584,
            0.4893142509983231
          ],
          [
            0.4256402490403917,
            0.4930759150846683
          ],
          [
            0.4335698369344076,
            0.4968375791710137
          ],
          [
            0.4405082263416714,
            0.4968375791710137
          ],
          [
            0.44645541726218335,
            0.4930759150846683
          ],
          [
            0.3859923095703125,
            0.3933918167965184
          ],
          [
            0.39689549292458426,
            0.38586848862382805
          ],
          [
            0.408789874765608,
            0.38774932066700063
          ],
          [
            0.41572826417287195,
            0.40467680905555437
          ],
          [
            0.4058162793053521,
            0.406557641098727
          ],
          [
            0.3949130959510803,
            0.4027959770123818
          ],
          [
            0.4474466157489353,
            0.41031930518507215
          ],
          [
            0.45537620364295117,
            0.39903431292603664
          ],
          [
            0.46528818851047093,
            0.39903431292603664
          ],
          [
            0.47222657791773476,
            0.41031930518507215
          ],
          [
            0.464296990023719,
            0.4159618013145904
          ],
          [
            0.4563674021297031,
            0.4159618013145904
          ],
          [
            0.3998690883848402,
            0.5250500598186032
          ],
          [
            0.41275466871261596,
            0.5231692277754306
          ],
          [
            0.42663144752714366,
            0.5231692277754306
          ],
          [
            0.43158743996090354,
            0.5250500598186032
          ],
          [
            0.4365434323946635,
            0.5250500598186032
          ],
          [
            0.4424906233151754,
            0.5269308918617758
          ],
          [
            0.4474466157489353,
            0.5325733879912937
          ],
          [
            0.4424906233151754,
            0.5419775482071569
          ],
          [
            0.4365434323946635,
            0.5495008763798475
          ],
          [
            0.4286138445006477,
            0.5513817084230203
          ],
          [
            0.41771066114637584,
            0.5476200443366749
          ],
          [
            0.4077986762788561,
            0.5382158841208118
          ],
          [
            0.40185148535834425,
            0.5269308918617758
          ],
          [
            0.4167194626596239,
            0.5306925559481211
          ],
          [
            0.4305962414741516,
            0.5344542200344665
          ],
          [
            0.43753463088141553,
            0.5344542200344665
          ],
          [
            0.4454642187754313,
            0.5344542200344665
          ],
          [
            0.43753463088141553,
            0.5344542200344665
          ],
          [
            0.42960504298739965,
            0.5344542200344665
          ],
          [
            0.41572826417287195,
            0.5306925559481211
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "new_filename": "004299",
  "objects": [
    {
      "box": [
        0.5860887169837952,
        0.44062259793281555,
        0.9279738068580627,
        0.9966287612915039
      ],
      "name": "ring light",
      "possible_names": [
        "ring light"
      ]
    }
  ],
  "mask_file": "./person_labeling/./004299.jpg_masks.pkl",
  "hoi_processed": true,
  "scene": "A cozy indoor setting with soft natural light filtering through sheer curtains features a striped shirt and a ring light setup suggesting a casual yet professional atmosphere for content creation",
  "overall_past": "Before the current scene, the individual likely prepared the recording space by setting up the ring light and camera, adjusting the lighting and framing for optimal video quality. They may have chosen the striped shirt and beanie as part of a deliberate, consistent visual style to build a recognizable personal brand. After testing the audio and video feed, they probably reviewed previous content or notes to refine their message, ensuring clarity and engagement. This preparation reflects a routine process of content creation, where attention to both technical setup and personal presentation is key to delivering a polished, authentic performance.",
  "overall_past_clean": "The space was meticulously configured with precise lighting and camera placement to ensure optimal visual clarity, followed by rigorous testing of audio and video systems, while a deliberate review of prior content and messaging strategies was conducted to refine the delivery and maintain a consistent, professional tone.",
  "past_scene_ok": false,
  "overall_future": "After the current scene, the individual is likely to pause briefly, review the recorded segment, and then proceed to deliver the next part of their message—perhaps transitioning to a new topic or demonstrating a concept with a prop or visual aid, all while maintaining the warm, approachable tone established by the setting and their demeanor.",
  "overall_future_clean": "The speaker takes a deliberate breath, glances briefly at the recording interface, then seamlessly shifts into the next segment with confident clarity, introducing a new idea with a focused gesture that draws attention to the accompanying visual.",
  "future_scene_ok": true
}