{
  "image_path": "./ref_datasets/hico_det/images/train/train_00001039.jpg",
  "image_id": "train_00001039",
  "width": 428,
  "height": 640,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 1039,
  "annotations": {
    "objects": "[{'id': 90, 'bbox_human': [34, 406, 71, 599], 'bbox_object': [130, 398, 273, 599], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('chair', 'sit_on')]",
    "negative_captions": "[('chair', 'carry'), ('chair', 'stand_on'), ('chair', 'no_interaction')]",
    "ambiguous_captions": "[('chair', 'hold'), ('chair', 'lie_on')]",
    "positive_objects": "[89]",
    "negative_objects": "[86, 90, 91]",
    "ambiguous_objects": "[87, 88]",
    "size": "[428, 640, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a green t-shirt, black cargo shorts, black sneakers, white socks, and a beige baseball cap.",
        "clothing": {
          "vague": false,
          "clothing": [
            {
              "possible_names": [
                "t-shirt",
                "shirt"
              ],
              "name": "t-shirt",
              "type": "top",
              "color": [
                "green"
              ],
              "belonging_confident": true,
              "existence_confident": true
            },
            {
              "possible_names": [
                "shorts",
                "trousers"
              ],
              "name": "shorts",
              "type": "bottom",
              "color": [
                "dark gray"
              ],
              "belonging_confident": true,
              "existence_confident": true
            },
            {
              "possible_names": [
                "sneakers",
                "shoes"
              ],
              "name": "sneakers",
              "type": "footwear",
              "color": [
                "black"
              ],
              "belonging_confident": true,
              "existence_confident": true
            },
            {
              "possible_names": [
                "cap",
                "hat"
              ],
              "name": "cap",
              "type": "headwear",
              "color": [
                "beige"
              ],
              "belonging_confident": true,
              "existence_confident": true
            },
            {
              "possible_names": [
                "socks"
              ],
              "name": "socks",
              "type": "accessory",
              "color": [
                "white"
              ],
              "belonging_confident": true,
              "existence_confident": true
            }
          ]
        },
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "phone"
            ],
            "name": "phone",
            "position": "hand"
          }
        ],
        "description": "The person is an adult male who is sitting on a chair, appearing neutral in emotion. He is wearing a green t-shirt, black cargo shorts, black sneakers, white socks, and a beige baseball cap. He is holding a phone to his ear with his hand.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to be engaged in a phone conversation, showing a neutral expression without strong emotions.",
        "meaningful": true,
        "story": "A man is sitting outdoors at a café, casually dressed, and talking on his phone. The setting suggests he might be taking a break or having a personal conversation while enjoying some time outside.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is seated on a metal chair outdoors, wearing a green t-shirt, cargo shorts, and sneakers, with a cap turned backward. They appear to be engaged in a phone conversation, holding the phone to their ear with one hand while the other hand is near their mouth, possibly indicating deep thought or concern. The setting suggests a casual environment, perhaps a café or park area, where they might be taking a break or discussing something important, as indicated by their focused posture and serious expression. The surrounding elements like the table and blurred background contribute to the relaxed yet contemplative atmosphere of the scene.",
        "intention": "The individual is likely processing significant information or making a critical decision during a private phone call while seeking a moment of quiet reflection in a casual outdoor setting",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.6639836918368677,
            0.18957112388951436
          ],
          [
            0.6676342153103552,
            0.20297257048743114
          ],
          [
            0.6652618032272094,
            0.2162120244332722
          ],
          [
            0.6633262205744617,
            0.2293243544442313
          ],
          [
            0.6623654014596315,
            0.24252858843122213
          ],
          [
            0.6677440119999274,
            0.2539834984711239
          ],
          [
            0.6705182250415054,
            0.2668927661010197
          ],
          [
            0.6759331289614154,
            0.27794244544846675
          ],
          [
            0.6879681671891257,
            0.28433675510542733
          ],
          [
            0.7118610212895199,
            0.2827121921948025
          ],
          [
            0.7306679996533769,
            0.2727030311311994
          ],
          [
            0.7515629888058346,
            0.2639503700392587
          ],
          [
            0.7714164516476987,
            0.2503135596002851
          ],
          [
            0.7888640330853226,
            0.23509460347039363
          ],
          [
            0.795030708147464,
            0.21644038259983062
          ],
          [
            0.7973694073978826,
            0.195285250885146
          ],
          [
            0.7974850213097953,
            0.17591727503708432
          ],
          [
            0.6578692872231411,
            0.19000294038227628
          ],
          [
            0.6581048103216015,
            0.17754279587950025
          ],
          [
            0.6635161765586232,
            0.17772563312734876
          ],
          [
            0.6698995735919047,
            0.17433928677013943
          ],
          [
            0.6760981351495267,
            0.17214753244604383
          ],
          [
            0.6962780490099827,
            0.16217571794986724
          ],
          [
            0.7052046105126354,
            0.15881989768573218
          ],
          [
            0.7156836736663797,
            0.15870804956981116
          ],
          [
            0.7272035596526671,
            0.15896694660186766
          ],
          [
            0.7404845623530756,
            0.16573840039116994
          ],
          [
            0.6835279490186948,
            0.18151662264551435
          ],
          [
            0.6794803324783437,
            0.19153420073645458
          ],
          [
            0.673123434245666,
            0.19874008936541424
          ],
          [
            0.6703004579996076,
            0.2067542105913162
          ],
          [
            0.672237051902053,
            0.22049703853470942
          ],
          [
            0.6771892160774073,
            0.22053103872707913
          ],
          [
            0.6821068354219557,
            0.21894750084195821
          ],
          [
            0.6926671367621071,
            0.215409865123885
          ],
          [
            0.7018708415598035,
            0.2145843131201608
          ],
          [
            0.6652970616823841,
            0.1975480709757124
          ],
          [
            0.6644941125119799,
            0.1973330408334732
          ],
          [
            0.6747140150283462,
            0.19352977701595853
          ],
          [
            0.6771620712945553,
            0.19076171815395354
          ],
          [
            0.6746006772737796,
            0.1932610196726663
          ],
          [
            0.6681746724689596,
            0.19647811225482395
          ],
          [
            0.7075313235793477,
            0.1825857256140028
          ],
          [
            0.7086311506333752,
            0.18043686747550963
          ],
          [
            0.7222722664535124,
            0.17807086535862515
          ],
          [
            0.7299848683526583,
            0.17712066088403974
          ],
          [
            0.7243576391994238,
            0.17886725791863034
          ],
          [
            0.7113568169252577,
            0.18115540572575162
          ],
          [
            0.6719592051289588,
            0.24340499384062628
          ],
          [
            0.6746775626897494,
            0.24162460735866
          ],
          [
            0.6784082792788864,
            0.23524406637464254
          ],
          [
            0.6826013466703875,
            0.23264449494225642
          ],
          [
            0.6853982807001539,
            0.23161017128399441
          ],
          [
            0.7006973454726235,
            0.23095148461205622
          ],
          [
            0.7140853241861902,
            0.23422135284968784
          ],
          [
            0.7034956458732188,
            0.23883132934570311
          ],
          [
            0.6958576667611207,
            0.24353289689336508
          ],
          [
            0.6826138266415717,
            0.24665728722299846
          ],
          [
            0.6785039624400705,
            0.2469841718673706
          ],
          [
            0.6752674256529764,
            0.24524770975112914
          ],
          [
            0.6730775120102357,
            0.24381911414010188
          ],
          [
            0.6767597258170551,
            0.24029743756566732
          ],
          [
            0.6841488630972176,
            0.2369208684989384
          ],
          [
            0.6989412883890964,
            0.23259064640317648
          ],
          [
            0.711300322226434,
            0.23503324474607198
          ],
          [
            0.6984219930678088,
            0.23807734761919291
          ],
          [
            0.6832443256085323,
            0.24353881052562168
          ],
          [
            0.6766824751336361,
            0.24259124994277953
          ]
        ],
        "visibility": [
          0.9935387969017029,
          0.9999270439147949,
          4.581488255439581e-08,
          4.76645008185006e-11,
          5.318010153132491e-05,
          0.00024330659653060138,
          5.90638232766949e-11,
          2.699691140151117e-05,
          8.806524931515014e-08,
          0.0006417359109036624,
          1.8801895804165802e-11,
          3.5379411462677e-17,
          3.3270037505544635e-15,
          5.454947885762071e-20,
          1.0959333267279714e-11,
          7.38512202780317e-12,
          7.339566466599625e-20,
          8.614909865123019e-14,
          0.969515323638916,
          2.145879807358142e-05,
          3.1876825008406552e-15,
          4.460805939743295e-05,
          1.0,
          0.9994431138038635,
          0.9997947812080383,
          0.9999998807907104,
          1.0,
          0.9999988079071045,
          1.0
        ],
        "headpose": {
          "pitch": 1.0316183253037974,
          "yaw": 45.17223471744471,
          "roll": -9.237520932608657
        },
        "attributes": {
          "5 oClock Shadow": 0.037066828459501266,
          "Arched Eyebrows": 0.0003326886217109859,
          "Attractive": 0.024749917909502983,
          "Bags Under Eyes": 0.045310791581869125,
          "Bald": 0.00013179970846977085,
          "Bangs": 0.0008725994848646224,
          "Big Lips": 0.014227628707885742,
          "Big Nose": 0.18428583443164825,
          "Black Hair": 0.034802794456481934,
          "Blond Hair": 0.00024998694425448775,
          "Blurry": 0.04478875547647476,
          "Brown Hair": 0.01100129820406437,
          "Bushy Eyebrows": 0.05126483365893364,
          "Chubby": 0.0374298132956028,
          "Double Chin": 0.0017832223093137145,
          "Eyeglasses": 0.0007278713746927679,
          "Goatee": 0.14571015536785126,
          "Gray Hair": 0.0002378286444582045,
          "Heavy Makeup": 0.00013969333667773753,
          "High Cheekbones": 0.019853925332427025,
          "Male": 0.9990615248680115,
          "Mouth Slightly Open": 0.023694004863500595,
          "Mustache": 0.04818987101316452,
          "Narrow Eyes": 0.06062513589859009,
          "No Beard": 0.2023879885673523,
          "Oval Face": 0.029178474098443985,
          "Pale Skin": 0.001087796175852418,
          "Pointy Nose": 0.017575640231370926,
          "Receding Hairline": 0.00020670253434218466,
          "Rosy Cheeks": 2.4654555090819485e-05,
          "Sideburns": 0.5121895670890808,
          "Smiling": 0.010851497761905193,
          "Straight Hair": 0.006624756380915642,
          "Wavy Hair": 0.00633225729689002,
          "Wearing Earrings": 0.0009987687226384878,
          "Wearing Hat": 0.7817823886871338,
          "Wearing Lipstick": 5.7326880778418854e-05,
          "Wearing Necklace": 0.0031443345360457897,
          "Wearing Necktie": 0.0005166542832739651,
          "Young": 0.9274535775184631
        },
        "age": [
          0.00029601636924780905,
          0.017116254195570946,
          0.9820141196250916,
          0.985752284526825,
          0.810764729976654,
          0.010835197754204273,
          0.00011199675645912066,
          1.346955855296983e-06
        ],
        "race": [
          0.9955484867095947,
          0.01915483921766281,
          0.09379010647535324,
          0.18039153516292572,
          0.209830641746521
        ],
        "gender": [
          0.9997602105140686,
          0.0006975879077799618
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 0.061884019719974384,
          "disgust": 3.483557827779301e-06,
          "fear": 99.28084011559726,
          "happy": 0.24873620450414446,
          "sad": 0.408277591811275,
          "surprise": 2.51870796188725e-09,
          "neutral": 0.000258274864723991
        },
        "dominant_emotion": "fear",
        "region": {
          "x": 0,
          "y": 0,
          "w": 84,
          "h": 119,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 30,
        "gender": {
          "Woman": 15.325076878070831,
          "Man": 84.67493057250977
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 4.007140785881328,
          "indian": 2.231355703255874,
          "black": 0.9149208278906839,
          "white": 65.7584646796265,
          "middle eastern": 12.566614900029593,
          "latino hispanic": 14.521504314035434
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "thigh",
                "sit on"
              ]
            ],
            "negative_action": [
              "carry",
              "stand on",
              "no interaction"
            ],
            "position": "thigh"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.0813707709312439,
        0.11582527309656143,
        0.9424406290054321,
        0.9374046325683594
      ]
    ],
    "face_boxes": [
      [
        0.6577671766281128,
        0.14376899600028992,
        0.790996789932251,
        0.2687758207321167
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.7478360401878477,
            0.2991898074746132
          ],
          [
            0.6292211444949808,
            0.2844006409247715
          ],
          [
            0.4301893703662718,
            0.3005342771609623
          ],
          [
            0.560866797824515,
            0.2440665503342947
          ],
          [
            0.8664509358807145,
            0.31397897402445485
          ],
          [
            0.7719611037185999,
            0.4269144276777904
          ],
          [
            0.677471271556486,
            0.29784533778826405
          ],
          [
            0.46034569977971246,
            0.5129604876041413
          ],
          [
            0.13063649819276044,
            0.6232070018847784
          ],
          [
            0.4120955727182074,
            0.8033659398555756
          ],
          [
            0.6231898786122927,
            0.5385054116447767
          ],
          [
            0.40003304095283093,
            0.6514408652981123
          ],
          [
            0.279407723299068,
            0.8302553335825602
          ],
          [
            0.6694295837129015,
            0.1916322325666746
          ],
          [
            0.7156692888135108,
            0.1875988235076269
          ],
          [
            0.6633983178302133,
            0.2064213991165161
          ],
          [
            0.8041278550929372,
            0.1983545809984207
          ]
        ],
        "dw_hand_1": [
          [
            0.677471271556486,
            0.2951563984155655
          ],
          [
            0.6935546472436543,
            0.2790227621793747
          ],
          [
            0.7156692888135108,
            0.2615446562568346
          ],
          [
            0.721700554696199,
            0.2467554897069931
          ],
          [
            0.7297422425397833,
            0.2333107928435007
          ],
          [
            0.6955650692045505,
            0.2507888987660408
          ],
          [
            0.6975754911654464,
            0.2373442019025484
          ],
          [
            0.6975754911654464,
            0.2333107928435007
          ],
          [
            0.6995859131263422,
            0.2306218534708023
          ],
          [
            0.6754608495955899,
            0.2494444290796915
          ],
          [
            0.6794816935173819,
            0.2359997322161993
          ],
          [
            0.6855129594000701,
            0.2359997322161993
          ],
          [
            0.6895338033218621,
            0.2373442019025484
          ],
          [
            0.6593774739084216,
            0.2507888987660408
          ],
          [
            0.6654087397911097,
            0.2386886715888977
          ],
          [
            0.6714400056737976,
            0.240033141275247
          ],
          [
            0.677471271556486,
            0.2454110200206438
          ],
          [
            0.6473149421430453,
            0.2548223078250885
          ],
          [
            0.6493253641039411,
            0.2467554897069931
          ],
          [
            0.651335786064837,
            0.2480999593933424
          ],
          [
            0.6573670519475254,
            0.2507888987660408
          ]
        ],
        "dw_hand_2": [
          [
            0.5749397515507874,
            0.2480999593933424
          ],
          [
            0.6091169248860201,
            0.2467554897069931
          ],
          [
            0.6412836762603571,
            0.2454110200206438
          ],
          [
            0.6553566299866294,
            0.2413776109615961
          ],
          [
            0.6654087397911097,
            0.2386886715888977
          ],
          [
            0.651335786064837,
            0.2225550353527069
          ],
          [
            0.6593774739084216,
            0.2171771566073099
          ],
          [
            0.6654087397911097,
            0.2212105656663576
          ],
          [
            0.6694295837129015,
            0.2238995050390562
          ],
          [
            0.639273254299461,
            0.2158326869209608
          ],
          [
            0.6473149421430453,
            0.2104548081755638
          ],
          [
            0.6553566299866294,
            0.2158326869209608
          ],
          [
            0.6573670519475254,
            0.2198660959800085
          ],
          [
            0.6272107225340844,
            0.2117992778619131
          ],
          [
            0.6352524103776688,
            0.2064213991165161
          ],
          [
            0.643294098221253,
            0.2131437475482622
          ],
          [
            0.6453045201821489,
            0.2198660959800085
          ],
          [
            0.6111273468469163,
            0.2104548081755638
          ],
          [
            0.6211794566513965,
            0.2077658688028654
          ],
          [
            0.6292211444949808,
            0.2131437475482622
          ],
          [
            0.6352524103776688,
            0.2198660959800085
          ]
        ],
        "dw_face": [
          [
            0.6593774739084216,
            0.1956656416257223
          ],
          [
            0.6593774739084216,
            0.2064213991165161
          ],
          [
            0.6613878958693175,
            0.2185216262936592
          ],
          [
            0.6674191617520056,
            0.229277383784453
          ],
          [
            0.6754608495955899,
            0.240033141275247
          ],
          [
            0.6814921154782778,
            0.2494444290796915
          ],
          [
            0.6915442252827582,
            0.2588557168841362
          ],
          [
            0.7096380229308227,
            0.2642335956295332
          ],
          [
            0.7277318205788871,
            0.2615446562568346
          ],
          [
            0.7418047743051592,
            0.2561667775114378
          ],
          [
            0.7558777280314319,
            0.2494444290796915
          ],
          [
            0.7699506817577041,
            0.2427220806479454
          ],
          [
            0.7800027915621843,
            0.2333107928435007
          ],
          [
            0.7860340574448726,
            0.2225550353527069
          ],
          [
            0.7880444794057686,
            0.2104548081755638
          ],
          [
            0.7900549013666645,
            0.19969905068477
          ],
          [
            0.7900549013666645,
            0.1875988235076269
          ],
          [
            0.6593774739084216,
            0.18894329319397601
          ],
          [
            0.6613878958693175,
            0.1875988235076269
          ],
          [
            0.6654087397911097,
            0.1862543538212776
          ],
          [
            0.6694295837129015,
            0.1862543538212776
          ],
          [
            0.6754608495955899,
            0.1862543538212776
          ],
          [
            0.6935546472436543,
            0.18222094476222991
          ],
          [
            0.7036067570481345,
            0.1795320053895315
          ],
          [
            0.7156692888135108,
            0.17684306601683292
          ],
          [
            0.7277318205788871,
            0.17684306601683292
          ],
          [
            0.7377839303833675,
            0.1795320053895315
          ],
          [
            0.6835025374391742,
            0.1916322325666746
          ],
          [
            0.6814921154782778,
            0.1983545809984207
          ],
          [
            0.677471271556486,
            0.2037324597438177
          ],
          [
            0.6754608495955899,
            0.2091103384892145
          ],
          [
            0.6754608495955899,
            0.2171771566073099
          ],
          [
            0.6794816935173819,
            0.2185216262936592
          ],
          [
            0.6855129594000701,
            0.2185216262936592
          ],
          [
            0.6935546472436543,
            0.2171771566073099
          ],
          [
            0.6995859131263422,
            0.2158326869209608
          ],
          [
            0.6633983178302133,
            0.1970101113120714
          ],
          [
            0.6654087397911097,
            0.194321171939373
          ],
          [
            0.6714400056737976,
            0.194321171939373
          ],
          [
            0.677471271556486,
            0.194321171939373
          ],
          [
            0.6734504276346938,
            0.1970101113120714
          ],
          [
            0.6674191617520056,
            0.1983545809984207
          ],
          [
            0.7056171790090303,
            0.1902877628803253
          ],
          [
            0.7136588668526149,
            0.1862543538212776
          ],
          [
            0.721700554696199,
            0.1862543538212776
          ],
          [
            0.7297422425397833,
            0.1875988235076269
          ],
          [
            0.721700554696199,
            0.1902877628803253
          ],
          [
            0.7136588668526149,
            0.1916322325666746
          ],
          [
            0.6794816935173819,
            0.2359997322161993
          ],
          [
            0.6794816935173819,
            0.2319663231571516
          ],
          [
            0.6835025374391742,
            0.2279329140981039
          ],
          [
            0.6875233813609659,
            0.2265884444117546
          ],
          [
            0.6915442252827582,
            0.2265884444117546
          ],
          [
            0.7096380229308227,
            0.2265884444117546
          ],
          [
            0.7257213986179909,
            0.229277383784453
          ],
          [
            0.7176797107744066,
            0.2359997322161993
          ],
          [
            0.7076276009699265,
            0.240033141275247
          ],
          [
            0.6955650692045505,
            0.2427220806479454
          ],
          [
            0.6895338033218621,
            0.2427220806479454
          ],
          [
            0.6835025374391742,
            0.240033141275247
          ],
          [
            0.6814921154782778,
            0.2359997322161993
          ],
          [
            0.6835025374391742,
            0.2333107928435007
          ],
          [
            0.6895338033218621,
            0.2306218534708023
          ],
          [
            0.7076276009699265,
            0.229277383784453
          ],
          [
            0.7237109766570948,
            0.2306218534708023
          ],
          [
            0.7096380229308227,
            0.2359997322161993
          ],
          [
            0.6935546472436543,
            0.2386886715888977
          ],
          [
            0.6875233813609659,
            0.2386886715888977
          ]
        ],
        "dw_foot_1": [
          [
            0.18692831309784977,
            0.9055456360181173
          ],
          [
            0.24724097192473124,
            0.9122679844498635
          ],
          [
            0.2934806770253404,
            0.8342887426416079
          ]
        ],
        "dw_foot_2": [
          [
            0.3537933358522219,
            0.9082345753908158
          ],
          [
            0.3356995382041575,
            0.8961343482136727
          ],
          [
            0.4744186535059849,
            0.8006770004828772
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "chair",
      "possible_names": [
        "chair"
      ],
      "box": [
        0.3037383177570093,
        0.4265625,
        0.9299065420560748,
        0.9359375
      ]
    }
  ],
  "scene": "A person is seated on a metal chair outdoors near a table with greenery and urban elements in the background under soft lighting creating a relaxed atmosphere with muted colors and casual attire visible around the area",
  "overall_past": "Before this scene, the individual likely received a message or call that prompted a significant or urgent conversation, possibly related to a personal matter, work emergency, or an unexpected development requiring immediate attention. The focused expression, hand near the mouth, and the serious posture suggest they were already in the middle of processing the information when the call came, indicating that the conversation was not casual but rather emotionally or mentally engaging. The choice to sit outdoors in a quiet, green-tinged urban space—perhaps a café patio or park bench—implies they sought a brief moment of privacy or calm to handle the situation, possibly stepping away from a more crowded or stressful environment.",
  "overall_past_clean": "A sharp, urgent notification shattered the usual rhythm of routine, triggering an immediate shift in focus as the individual froze, the weight of incoming information pressing in with undeniable urgency, demanding a response before the moment could be reclaimed.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to end the phone call, lower the phone slowly, and sit in quiet reflection for a moment—perhaps staring at the table or the greenery nearby—before standing up, adjusting their cap, and walking away with a thoughtful expression, possibly heading toward a nearby path or exit, as if the conversation has left them with a decision to make or a new direction to pursue.",
  "overall_future_clean": "The figure ends the call with a deliberate pause, sets the device down with measured care, rises steadily, and moves with purpose toward the edge of the space, shoulders squared, eyes fixed ahead—already committed to a path not yet chosen but now inevitable.",
  "future_scene_ok": true
}