{
  "video_path": "./ref_datasets/part2/5527757000.mp4",
  "start_time": 0.0,
  "end_time": 6.08,
  "image_path": "./016565.jpg",
  "represents_multiple_types": false,
  "cross_video_duplicate": false,
  "original_detections": [
    {
      "video_path": "./ref_datasets/part2/5527757000.mp4",
      "start_time": 0.0,
      "end_time": 6.08,
      "image_path": "./ref_datasets/extracted_frames/person_-8944376822581962064_1_5527757000.mp4_0.00_6.08.jpg",
      "type": "person"
    }
  ],
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a light brown blazer over a yellow shirt. The blazer appears to be well-fitted, and the shirt is partially visible at the collar area. The person also has a dark wristwatch on their left wrist.",
        "clothing": {
          "vague": false,
          "clothing": [
            {
              "possible_names": [
                "jacket",
                "blazer",
                "suit jacket"
              ],
              "name": "jacket",
              "type": "top",
              "color": [
                "beige"
              ],
              "belonging_confident": true,
              "existence_confident": true
            }
          ]
        },
        "objects": [],
        "description": "The person is an adult male with a neutral expression. He is wearing a light brown blazer over a yellow shirt and a dark wristwatch on his left wrist. His hands are clasped together in front of him, and he appears to be seated. There are no other objects relevant to the person in the image.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears calm and composed, with a neutral expression that does not convey strong emotions.",
        "meaningful": false,
        "story": "unknown",
        "race": "black",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is seated and appears to be engaged in a conversation or an interview, as suggested by their direct gaze towards the camera and their hand gesture which seems to emphasize a point they are making. They are dressed in a light brown blazer over a yellow shirt, indicating a semi-formal setting, possibly professional or business-related. Their expression is serious and focused, suggesting that they are discussing something important or sharing significant information. The background is plain and uncluttered, drawing attention to the individual and their actions, which further implies that the focus of this scene is on their communication and the content of their speech.",
        "intention": "They are deliberately communicating a critical message with authority and intent to persuade or inform in a professional context",
        "intention_ok": false
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.44524289776260656,
            0.3736927922913637
          ],
          [
            0.4479948204010725,
            0.44453047487155467
          ],
          [
            0.45225587248181304,
            0.5152411746284948
          ],
          [
            0.45567216038083036,
            0.5764808812469401
          ],
          [
            0.4663693596298496,
            0.6350334468972747
          ],
          [
            0.48689265428110956,
            0.6846595753437628
          ],
          [
            0.5109341946740945,
            0.7157724259076295
          ],
          [
            0.5353647007917365,
            0.7391307118077758
          ],
          [
            0.5584013162801663,
            0.7487489284661712
          ],
          [
            0.5835565410554409,
            0.7547294453653709
          ],
          [
            0.6028643157333136,
            0.7333491800954102
          ],
          [
            0.6231523705025513,
            0.7022080853502587
          ],
          [
            0.6392011173069477,
            0.6605756401700318
          ],
          [
            0.6474140471468369,
            0.6058055909537764
          ],
          [
            0.6508876599371434,
            0.5480702506802069
          ],
          [
            0.6560019115606944,
            0.4893815677948099
          ],
          [
            0.6619715688129266,
            0.4208363325980605
          ],
          [
            0.4770371351391077,
            0.3307516903788955
          ],
          [
            0.4936396706228455,
            0.28237857124792837
          ],
          [
            0.5175417941684525,
            0.2753908912025432
          ],
          [
            0.5407663214951753,
            0.28167948426392975
          ],
          [
            0.562767421392103,
            0.31156278500165885
          ],
          [
            0.5847769010812044,
            0.3290371467827489
          ],
          [
            0.6074648216366768,
            0.3140739653003279
          ],
          [
            0.6277007527649403,
            0.31689742649043046
          ],
          [
            0.6456040342648824,
            0.33565135987662764
          ],
          [
            0.6549288980662823,
            0.38561488159749874
          ],
          [
            0.5716414768248796,
            0.37053881038433656
          ],
          [
            0.5725819695740938,
            0.40634326884355493
          ],
          [
            0.5735747415572405,
            0.44454645284269223
          ],
          [
            0.573187948577106,
            0.48644962459014207
          ],
          [
            0.5468411707008879,
            0.5195998839600376
          ],
          [
            0.5569987346728643,
            0.5298495639568914
          ],
          [
            0.569012867162625,
            0.5431402781337656
          ],
          [
            0.5792763565977415,
            0.537065865280767
          ],
          [
            0.5884851035972436,
            0.5332567865571017
          ],
          [
            0.5031035789599021,
            0.3651863640420651
          ],
          [
            0.5154969359437624,
            0.3534656087715159
          ],
          [
            0.5292729808638493,
            0.35875623109479426
          ],
          [
            0.5403003205855688,
            0.38135546187244396
          ],
          [
            0.5255228793869416,
            0.37326283697728757
          ],
          [
            0.51490335992227,
            0.37349087829312316
          ],
          [
            0.5933759904156128,
            0.4016297673579877
          ],
          [
            0.6083094246685505,
            0.386409778412057
          ],
          [
            0.6178969960659743,
            0.38723407933320947
          ],
          [
            0.629746266702811,
            0.40776762812541273
          ],
          [
            0.6180192476759354,
            0.4044582162269209
          ],
          [
            0.6067395361761252,
            0.4028827766735087
          ],
          [
            0.5219201800102989,
            0.5910520369729038
          ],
          [
            0.5383630771189928,
            0.5722883312159746
          ],
          [
            0.5553888810177644,
            0.5609375341228707
          ],
          [
            0.5672732934355735,
            0.5716285077037003
          ],
          [
            0.5782044211402535,
            0.5667891253239263
          ],
          [
            0.5908217541873455,
            0.5899723204986128
          ],
          [
            0.6034519236534834,
            0.6173286369553319
          ],
          [
            0.5873397171497345,
            0.6274069959839816
          ],
          [
            0.5739402849848072,
            0.6309824077540604
          ],
          [
            0.5629985325038434,
            0.6321258041278395
          ],
          [
            0.5525927866498629,
            0.6263519859187817
          ],
          [
            0.5367242609461148,
            0.6139467007268674
          ],
          [
            0.5284287223592401,
            0.5954721643495812
          ],
          [
            0.5553296445558469,
            0.5903998109714064
          ],
          [
            0.5644762988512715,
            0.5956712075326809
          ],
          [
            0.5758872974663973,
            0.5983158372382007
          ],
          [
            0.5965734479327996,
            0.612326575050909
          ],
          [
            0.5739290044953426,
            0.5973932326470733
          ],
          [
            0.5649088361610969,
            0.5968418621512318
          ],
          [
            0.5546453022708495,
            0.5911325937856442
          ]
        ],
        "visibility": [
          0.002671692054718733,
          9.806376510823611e-06,
          5.232557720447062e-10,
          2.5899683330792358e-21,
          0.6859327554702759,
          0.3842526078224182,
          3.1297306326422136e-12,
          2.2971690738851636e-13,
          0.9960682392120361,
          3.848909732595551e-11,
          3.05813469166083e-12,
          2.170253755384555e-17,
          0.00023473575129173696,
          2.791070528473938e-06,
          1.4293172548307531e-11,
          6.9044800259099315e-12,
          6.727286783192454e-11,
          1.5851449926967964e-12,
          5.726900294185941e-15,
          3.7745176086900756e-05,
          1.0457839304941925e-20,
          4.6745217186290423e-20,
          7.515877950936556e-05,
          3.4868035542201525e-11,
          2.136683638722693e-14,
          3.662339845972815e-17,
          1.0426129959384411e-09,
          5.7921202556432405e-11,
          0.005249891430139542
        ],
        "headpose": {
          "pitch": -0.23956146797964542,
          "yaw": -5.563711588446892,
          "roll": 5.097016715290968
        },
        "attributes": {
          "5 oClock Shadow": 0.7960347533226013,
          "Arched Eyebrows": 0.05669605731964111,
          "Attractive": 0.1445893943309784,
          "Bags Under Eyes": 0.42494651675224304,
          "Bald": 7.444376387866214e-05,
          "Bangs": 3.3886321034515277e-05,
          "Big Lips": 0.7008396983146667,
          "Big Nose": 0.535281777381897,
          "Black Hair": 0.49958449602127075,
          "Blond Hair": 8.287642413051799e-05,
          "Blurry": 5.8909430663334206e-05,
          "Brown Hair": 0.033749379217624664,
          "Bushy Eyebrows": 0.8921015858650208,
          "Chubby": 0.26271751523017883,
          "Double Chin": 0.022061893716454506,
          "Eyeglasses": 0.0002680107136256993,
          "Goatee": 0.40432825684547424,
          "Gray Hair": 0.00027430738555267453,
          "Heavy Makeup": 0.0014869578881189227,
          "High Cheekbones": 0.08962900936603546,
          "Male": 0.9995991587638855,
          "Mouth Slightly Open": 0.022488705813884735,
          "Mustache": 0.09206071496009827,
          "Narrow Eyes": 0.20559567213058472,
          "No Beard": 0.012073652818799019,
          "Oval Face": 0.40054020285606384,
          "Pale Skin": 0.00028145278338342905,
          "Pointy Nose": 0.0037327557802200317,
          "Receding Hairline": 0.24402561783790588,
          "Rosy Cheeks": 0.00033219577744603157,
          "Sideburns": 0.6793185472488403,
          "Smiling": 0.03848468139767647,
          "Straight Hair": 0.27375486493110657,
          "Wavy Hair": 0.019772253930568695,
          "Wearing Earrings": 0.007290619891136885,
          "Wearing Hat": 0.00025171181187033653,
          "Wearing Lipstick": 0.0009635836468078196,
          "Wearing Necklace": 0.0012529846280813217,
          "Wearing Necktie": 0.04444727301597595,
          "Young": 0.9921413660049438
        },
        "age": [
          0.17830471694469452,
          0.8731905221939087,
          0.9846291542053223,
          0.6059507727622986,
          0.06087338179349899,
          0.0004275167011655867,
          3.940754686482251e-05,
          4.289435764803784e-06
        ],
        "race": [
          0.21726833283901215,
          0.10867524147033691,
          0.5701448917388916,
          0.4083152413368225,
          0.9367049932479858
        ],
        "gender": [
          0.9998753070831299,
          0.00032667737104929984
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 7.534599304199219,
          "disgust": 0.000135616767238389,
          "fear": 0.6994081661105156,
          "happy": 2.3201363161206245,
          "sad": 43.83647441864014,
          "surprise": 3.623346174208564e-05,
          "neutral": 45.609208941459656
        },
        "dominant_emotion": "neutral",
        "region": {
          "x": 0,
          "y": 0,
          "w": 1252,
          "h": 1838,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 28,
        "gender": {
          "Woman": 0.06383316358551383,
          "Man": 99.93616938591003
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 8.649199971508978,
          "indian": 26.3043449106422,
          "black": 12.756640479364425,
          "white": 9.916298682469051,
          "middle eastern": 22.67940328594466,
          "latino hispanic": 19.69411639536121
        },
        "dominant_race": "indian"
      }
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.13458433747291565,
        0.06239473447203636,
        0.9019705653190613,
        0.9865702986717224
      ]
    ],
    "face_boxes": [
      [
        0.45165491104125977,
        0.15767842531204224,
        0.6691303253173828,
        0.7253425717353821
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.5266311928033829,
            0.7652184987619678
          ],
          [
            0.2715753426551819,
            0.7276492907907615
          ],
          [
            0.18205242836475372,
            0.9921365149080541
          ],
          [
            0.46075584077835086,
            0.9741032950818751
          ],
          [
            0.7816870429515839,
            0.8027877067331741
          ],
          [
            0.8847228499650955,
            1.010169734734233
          ],
          [
            0.7259463604688644,
            0.9741032950818751
          ],
          [
            0.3746111496686935,
            0.6344776550221698
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.6634492316246032,
            0.712621607602279
          ],
          [
            -1.0,
            -1.0
          ],
          [
            1.006338884472847,
            0.9230091722410344
          ],
          [
            0.5181856348514556,
            0.3940347240064492
          ],
          [
            0.6195323302745819,
            0.41206794383262824
          ],
          [
            0.44386472487449646,
            0.4391177735718968
          ],
          [
            0.6651383432149887,
            0.4631620666734689
          ]
        ],
        "dw_hand_1": [
          [
            0.7073661329746246,
            0.9861254416326611
          ],
          [
            0.6398016693592071,
            0.9620811485310891
          ],
          [
            0.5722372057437897,
            0.9410423920672135
          ],
          [
            0.523252969622612,
            0.9410423920672135
          ],
          [
            0.48271429145336153,
            0.9410423920672135
          ],
          [
            0.5958847680091858,
            0.7577046571677266
          ],
          [
            0.5333876391649246,
            0.7847544869069951
          ],
          [
            0.5080509653091431,
            0.9109870256902485
          ],
          [
            0.5046727421283722,
            0.9741032950818751
          ],
          [
            0.6178432186841964,
            0.7757378769939056
          ],
          [
            0.5553460898399353,
            0.8328430731101393
          ],
          [
            0.5367658623456955,
            0.9590756118933925
          ],
          [
            0.5435223087072373,
            1.010169734734233
          ],
          [
            0.643179892539978,
            0.8148098532839602
          ],
          [
            0.5857500984668732,
            0.8749205860378904
          ],
          [
            0.5688589825630188,
            0.9741032950818751
          ],
          [
            0.5739263173341751,
            1.0131752713719298
          ],
          [
            0.6651383432149887,
            0.8719150494001938
          ],
          [
            0.6245996650457383,
            0.9169980989656414
          ],
          [
            0.604330325961113,
            0.9710977584441786
          ],
          [
            0.6026412143707275,
            0.9981475881834472
          ]
        ],
        "dw_hand_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.5164965232610702,
            0.932025782154124
          ],
          [
            0.5164965232610702,
            0.9350313187918204
          ],
          [
            0.5215638580322266,
            0.9290202455164275
          ],
          [
            0.5418331971168519,
            0.8989648791394624
          ],
          [
            0.5367658623456955,
            0.8959593425017659
          ],
          [
            0.5418331971168519,
            0.9079814890525519
          ],
          [
            0.5570352014303207,
            0.9109870256902485
          ],
          [
            0.565480759382248,
            0.9230091722410344
          ],
          [
            0.5570352014303207,
            0.9139925623279449
          ],
          [
            0.5570352014303207,
            0.932025782154124
          ],
          [
            0.5553460898399353,
            0.956070075255696
          ],
          [
            0.5756154289245605,
            0.9410423920672135
          ],
          [
            0.5739263173341751,
            0.932025782154124
          ],
          [
            0.5857500984668732,
            0.950059001980303
          ],
          [
            0.5536569782495498,
            0.9620811485310891
          ],
          [
            0.5857500984668732,
            0.950059001980303
          ],
          [
            0.5874392100572586,
            0.9470534653426065
          ],
          [
            0.5925065448284149,
            0.956070075255696
          ],
          [
            0.5891283216476441,
            0.9650866851687856
          ]
        ],
        "dw_face": [
          [
            0.44893205964565275,
            0.3940347240064492
          ],
          [
            0.44893205964565275,
            0.44813438348498635
          ],
          [
            0.44893205964565275,
            0.49922850632582694
          ],
          [
            0.45231028282642366,
            0.5533281658043641
          ],
          [
            0.45906672918796537,
            0.6014167520075082
          ],
          [
            0.47595784509181976,
            0.6495053382106523
          ],
          [
            0.4945380725860596,
            0.6855717778630104
          ],
          [
            0.5215638580322266,
            0.712621607602279
          ],
          [
            0.550278755068779,
            0.724643754153065
          ],
          [
            0.5789936521053314,
            0.724643754153065
          ],
          [
            0.6077085491418839,
            0.7066105343268859
          ],
          [
            0.6262887766361237,
            0.6735496313122243
          ],
          [
            0.646558115720749,
            0.6254610451090803
          ],
          [
            0.6550036736726761,
            0.5803779955436327
          ],
          [
            0.6617601200342178,
            0.5262783360650956
          ],
          [
            0.6651383432149887,
            0.4781897498619514
          ],
          [
            0.6668274548053742,
            0.4240900903834143
          ],
          [
            0.4793360682725906,
            0.3519572110786981
          ],
          [
            0.496227184176445,
            0.3369295278902156
          ],
          [
            0.5148074116706848,
            0.33392399125251904
          ],
          [
            0.53507675075531,
            0.33993506452791206
          ],
          [
            0.5519678666591644,
            0.3519572110786981
          ],
          [
            0.5958847680091858,
            0.3609738209917876
          ],
          [
            0.6127758839130402,
            0.3579682843540911
          ],
          [
            0.6296669998168946,
            0.3609738209917876
          ],
          [
            0.6448690041303634,
            0.3669848942671806
          ],
          [
            0.658381896853447,
            0.3880236507310562
          ],
          [
            0.5722372057437897,
            0.4060568705572352
          ],
          [
            0.5722372057437897,
            0.4361122369342003
          ],
          [
            0.5722372057437897,
            0.4691731399488619
          ],
          [
            0.5722372057437897,
            0.49922850632582694
          ],
          [
            0.5435223087072373,
            0.5322894093404885
          ],
          [
            0.5553460898399353,
            0.535294945978185
          ],
          [
            0.5688589825630188,
            0.5383004826158816
          ],
          [
            0.5806827636957168,
            0.5383004826158816
          ],
          [
            0.5925065448284149,
            0.535294945978185
          ],
          [
            0.49791629576683044,
            0.3970402606441457
          ],
          [
            0.5131183000802993,
            0.3850181140933597
          ],
          [
            0.5300094159841537,
            0.3880236507310562
          ],
          [
            0.5435223087072373,
            0.40906240719493175
          ],
          [
            0.5266311928033829,
            0.41206794383262824
          ],
          [
            0.5114291884899139,
            0.40906240719493175
          ],
          [
            0.5941956564188003,
            0.41807901710802126
          ],
          [
            0.6077085491418839,
            0.40305133391953873
          ],
          [
            0.6245996650457383,
            0.40305133391953873
          ],
          [
            0.6381125577688217,
            0.42108455374571774
          ],
          [
            0.6229105534553527,
            0.4301011636588073
          ],
          [
            0.6093976607322693,
            0.42709562702111076
          ],
          [
            0.5215638580322266,
            0.5954056787321151
          ],
          [
            0.5401440855264664,
            0.5833835321813291
          ],
          [
            0.5587243130207061,
            0.5773724589059361
          ],
          [
            0.5671698709726334,
            0.5773724589059361
          ],
          [
            0.577304540514946,
            0.5773724589059361
          ],
          [
            0.5925065448284149,
            0.5893946054567222
          ],
          [
            0.6060194375514985,
            0.6074278252829012
          ],
          [
            0.5941956564188003,
            0.6224555084713838
          ],
          [
            0.5806827636957168,
            0.6314721183844733
          ],
          [
            0.5671698709726334,
            0.6344776550221698
          ],
          [
            0.550278755068779,
            0.6284665817467767
          ],
          [
            0.53507675075531,
            0.6134388985582943
          ],
          [
            0.5266311928033829,
            0.5984112153698117
          ],
          [
            0.5469005318880081,
            0.5954056787321151
          ],
          [
            0.5671698709726334,
            0.5984112153698117
          ],
          [
            0.5840609868764878,
            0.6014167520075082
          ],
          [
            0.6009521027803421,
            0.6074278252829012
          ],
          [
            0.5840609868764878,
            0.6074278252829012
          ],
          [
            0.5671698709726334,
            0.6044222886452048
          ],
          [
            0.5469005318880081,
            0.6014167520075082
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "new_filename": "016565",
  "objects": [],
  "mask_file": "./person_labeling/./016565.jpg_masks.pkl",
  "hoi_processed": true,
  "scene": "A person is seated indoors wearing a beige blazer and a yellow shirt with hands clasped together against a plain light-colored background suggesting a formal or professional setting with soft lighting and minimal distractions.",
  "overall_past": "Before the current scene, the person likely prepared for an important professional presentation or interview—perhaps reviewing key points, practicing their delivery, or receiving last-minute feedback from a colleague or coach. This is supported by their serious and focused expression, the semi-formal attire (beige blazer and yellow shirt), and the deliberate hand gesture, which suggests they are rehearsing or emphasizing a crucial message. The plain, distraction-free background further implies a controlled environment, such as a conference room or studio, where the focus is on clear communication—indicating that the moment captured is likely the culmination of prior preparation.",
  "overall_past_clean": "They had meticulously rehearsed their message, refining every point with precision, receiving critical feedback that sharpened their delivery, and mentally anchored themselves in confidence, transforming anxiety into a focused determination to make an impact.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the person is likely to continue speaking or presenting their key points, possibly concluding with a summary or call to action, as the serious expression, direct eye contact, and deliberate hand gesture suggest a pivotal moment in a professional discussion—such as a job interview, business pitch, or formal presentation. Given the focused demeanor and the emphasis on clear communication, the next moment would likely involve the person wrapping up their statement, followed by a pause for feedback, questions, or a decision from the audience or interviewer.",
  "overall_future_clean": "The speaker locks eyes with the audience, delivers a final, unwavering statement with measured precision, then holds the silence with confidence, waiting for the first response to confirm their impact.",
  "future_scene_ok": true
}