{
  "image_path": "./ref_datasets/hico_det/images/train/train_00028966.jpg",
  "image_id": "train_00028966",
  "width": 640,
  "height": 427,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 28966,
  "annotations": {
    "objects": "[{'id': 357, 'bbox_human': [264, 638, 1, 426], 'bbox_object': [276, 328, 195, 235], 'connection': 1, 'invis': 0}, {'id': 361, 'bbox_human': [266, 637, 6, 417], 'bbox_object': [276, 326, 196, 235], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('hot_dog', 'carry'), ('hot_dog', 'hold')]",
    "negative_captions": "[('hot_dog', 'cook'), ('hot_dog', 'cut'), ('hot_dog', 'eat'), ('hot_dog', 'make'), ('hot_dog', 'no_interaction')]",
    "ambiguous_captions": "[]",
    "positive_objects": "[356, 360]",
    "negative_objects": "[357, 358, 359, 361, 362]",
    "ambiguous_objects": "[]",
    "size": "[640, 427, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The individual is wearing a black cap, dark sunglasses, a black jacket with yellow details, and a bracelet.",
        "clothing": [
          {
            "possible_names": [
              "cap",
              "hat"
            ],
            "name": "cap",
            "type": "headwear",
            "color": [
              "black"
            ]
          },
          {
            "possible_names": [
              "sunglasses",
              "glasses"
            ],
            "name": "sunglasses",
            "type": "accessory",
            "color": [
              "black"
            ]
          },
          {
            "possible_names": [
              "jacket",
              "coat"
            ],
            "name": "jacket",
            "type": "top",
            "color": [
              "black"
            ]
          },
          {
            "possible_names": [
              "bracelet",
              "wristband"
            ],
            "name": "bracelet",
            "type": "accessory",
            "color": [
              "yellow"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "hot dog",
              "burrito",
              "food"
            ],
            "name": "hot dog",
            "position": "hand"
          }
        ],
        "description": "The individual is an adult male in the foreground. He is wearing a black cap, dark sunglasses, a black jacket with yellow details, and a yellow bracelet. His expression is neutral. He is holding a hot dog wrapped in paper in his hand. He appears to be at an outdoor event, possibly a sports game, given the setting and background.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to be focused on eating, showing no strong emotional expression.",
        "meaningful": true,
        "story": "The individual seems to be enjoying a meal at an outdoor event, possibly a sports game, given the stadium seating and railings in the background. He is wearing a cap and earpiece, suggesting he might be part of the event staff or security.",
        "race": "black",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is holding a hot dog wrapped in paper, seemingly enjoying a snack during an event as they stand near a railing. Their posture suggests they are relaxed and focused on eating, possibly taking a break from watching the activity around them. The individual appears to be engaged in a casual moment, likely motivated by hunger or the desire for a quick meal while attending a public gathering such as a sports game or festival. The presence of earphones indicates they might also be listening to music or commentary, adding to their personal experience at the event.",
        "intention": "The individual is taking a momentary break to enjoy a quick meal while staying engaged with the event through music or commentary",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.6805469376966358,
            0.08894664972432285
          ],
          [
            0.6911798059940338,
            0.14805092529215194
          ],
          [
            0.6964983582496643,
            0.20160513986668566
          ],
          [
            0.6931732069700957,
            0.24793293309395192
          ],
          [
            0.6900054644793272,
            0.2925789731926406
          ],
          [
            0.6960387617349625,
            0.3294785423955218
          ],
          [
            0.7056445334106684,
            0.35850331678802094
          ],
          [
            0.7168614745140076,
            0.3934366769238673
          ],
          [
            0.725248983502388,
            0.4416127962665679
          ],
          [
            0.7475123576819896,
            0.44504742512299333
          ],
          [
            0.7692324101924897,
            0.405613975486615
          ],
          [
            0.7888152852654458,
            0.3649897391918153
          ],
          [
            0.8075355485081672,
            0.3158170764523631
          ],
          [
            0.8230903893709183,
            0.26190087403609785
          ],
          [
            0.8273947313427925,
            0.20016430420554893
          ],
          [
            0.8335669502615929,
            0.1354464604009869
          ],
          [
            0.8348953813314438,
            0.07166770885285026
          ],
          [
            0.6783384855836629,
            0.09461877177733967
          ],
          [
            0.6761239079758525,
            0.07900042643950671
          ],
          [
            0.6793066591024399,
            0.08166344056523496
          ],
          [
            0.6835747443139553,
            0.06422641645031417
          ],
          [
            0.6899147789925337,
            0.05395874158719505
          ],
          [
            0.6910832799971104,
            0.03960565230087837
          ],
          [
            0.7012403897941113,
            0.02663364526857776
          ],
          [
            0.7105700246989727,
            0.024283849263757557
          ],
          [
            0.7249133720993995,
            0.026814417362053602
          ],
          [
            0.7362282238900661,
            0.05576826729236857
          ],
          [
            0.6921785600483418,
            0.08916944195570935
          ],
          [
            0.682726475596428,
            0.12922435287011033
          ],
          [
            0.6741755547001957,
            0.15971439408793342
          ],
          [
            0.6701135721057654,
            0.18701210226171908
          ],
          [
            0.6866112530231476,
            0.24213475278086707
          ],
          [
            0.6878517378121615,
            0.23125545888411317
          ],
          [
            0.6914797775447369,
            0.22808273279694818
          ],
          [
            0.6963825605809688,
            0.2200557342457827
          ],
          [
            0.7055522620677948,
            0.22493815940532766
          ],
          [
            0.6857621528208255,
            0.11942130516667396
          ],
          [
            0.6866018384695053,
            0.11997261730154528
          ],
          [
            0.6868175484240056,
            0.11558769713599931
          ],
          [
            0.6819154359400272,
            0.10621319410592747
          ],
          [
            0.6868933208286763,
            0.10523834241278078
          ],
          [
            0.6903238896280527,
            0.11631970167877921
          ],
          [
            0.7104158043861389,
            0.1021681738043358
          ],
          [
            0.7112323224544526,
            0.0961528697034594
          ],
          [
            0.7187749691307544,
            0.09003673966970281
          ],
          [
            0.7274579167366028,
            0.09040191493412451
          ],
          [
            0.7210460424423217,
            0.09464538244947457
          ],
          [
            0.7089714415371418,
            0.08993295890984228
          ],
          [
            0.6926675200462341,
            0.31528737848919347
          ],
          [
            0.687307158857584,
            0.31361382651305114
          ],
          [
            0.6840478859841823,
            0.2834979714897414
          ],
          [
            0.6847915720194578,
            0.2727747062409989
          ],
          [
            0.6888265699148178,
            0.2665435705507188
          ],
          [
            0.7010679271072149,
            0.27926343737257964
          ],
          [
            0.710987926274538,
            0.2969034615469123
          ],
          [
            0.7112083368003368,
            0.3162752383127784
          ],
          [
            0.7060275226831436,
            0.34555056135803613
          ],
          [
            0.6954025082290173,
            0.3441402732667894
          ],
          [
            0.6939825668931008,
            0.3367651974169775
          ],
          [
            0.6955928847193718,
            0.323679087113361
          ],
          [
            0.6917219273746014,
            0.32282244572873975
          ],
          [
            0.6928394697606564,
            0.3178580262741543
          ],
          [
            0.6902589868754149,
            0.3062971924889961
          ],
          [
            0.7032508864998818,
            0.29499248965363234
          ],
          [
            0.712850908190012,
            0.2977040042921866
          ],
          [
            0.7094086617231369,
            0.30865526502449087
          ],
          [
            0.698413210734725,
            0.3225911249879931
          ],
          [
            0.6982299990952014,
            0.31518934370483115
          ]
        ],
        "visibility": [
          0.9999711513519287,
          0.07084935903549194,
          0.0005002667312510312,
          9.645542013458908e-05,
          0.9687085151672363,
          0.026312753558158875,
          8.395723005705236e-10,
          1.1887656370390687e-07,
          0.999984622001648,
          0.03264756128191948,
          1.0,
          0.003406163305044174,
          0.8134355545043945,
          0.9335522055625916,
          0.00024705613031983376,
          0.39495667815208435,
          0.5105946660041809,
          0.00012084859918104485,
          0.8945586085319519,
          1.3542415189249368e-08,
          7.593146733597678e-08,
          1.0727719927661195e-11,
          0.9805867671966553,
          1.9116997762580468e-08,
          5.707999761739302e-08,
          1.8361005231781746e-06,
          0.011433780193328857,
          0.01532662007957697,
          0.12538379430770874
        ],
        "headpose": {
          "pitch": 18.82484658846493,
          "yaw": 78.70902361771925,
          "roll": 8.366121983203845
        },
        "attributes": {
          "5 oClock Shadow": 0.03359116241335869,
          "Arched Eyebrows": 0.00018687393458094448,
          "Attractive": 0.02365502528846264,
          "Bags Under Eyes": 0.0030616887379437685,
          "Bald": 0.00013932019646745175,
          "Bangs": 0.001105324481613934,
          "Big Lips": 0.05253272131085396,
          "Big Nose": 0.015427281148731709,
          "Black Hair": 0.14921250939369202,
          "Blond Hair": 0.001650016289204359,
          "Blurry": 0.13412071764469147,
          "Brown Hair": 0.020653776824474335,
          "Bushy Eyebrows": 0.0036933687515556812,
          "Chubby": 0.005530053749680519,
          "Double Chin": 0.00047949166037142277,
          "Eyeglasses": 0.007596916519105434,
          "Goatee": 0.014713557437062263,
          "Gray Hair": 0.0013883037026971579,
          "Heavy Makeup": 0.0009486217168159783,
          "High Cheekbones": 0.019105039536952972,
          "Male": 0.9748085737228394,
          "Mouth Slightly Open": 0.1284698098897934,
          "Mustache": 0.004896173719316721,
          "Narrow Eyes": 0.14899282157421112,
          "No Beard": 0.8127390146255493,
          "Oval Face": 0.019961822777986526,
          "Pale Skin": 0.000641120015643537,
          "Pointy Nose": 0.12002519518136978,
          "Receding Hairline": 0.004671082366257906,
          "Rosy Cheeks": 8.469958265777677e-05,
          "Sideburns": 0.002846282906830311,
          "Smiling": 0.003690253710374236,
          "Straight Hair": 0.024002954363822937,
          "Wavy Hair": 0.09847427159547806,
          "Wearing Earrings": 0.024438945576548576,
          "Wearing Hat": 0.05417555943131447,
          "Wearing Lipstick": 0.0007444460061378777,
          "Wearing Necklace": 0.012845849618315697,
          "Wearing Necktie": 0.001415336038917303,
          "Young": 0.7900844812393188
        },
        "age": [
          0.005735685583204031,
          0.022544506937265396,
          0.9934440851211548,
          0.9112592339515686,
          0.11234299093484879,
          0.005552944261580706,
          0.002949983347207308,
          1.3461684829962905e-05
        ],
        "race": [
          0.2098555564880371,
          0.6621378064155579,
          0.6684166789054871,
          0.6560156345367432,
          0.0900343805551529
        ],
        "gender": [
          0.9536281228065491,
          0.06502249836921692
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 0.0011211124729015864,
          "disgust": 6.048527456670172e-07,
          "fear": 16.3936585187912,
          "happy": 0.13561341911554337,
          "sad": 78.31021547317505,
          "surprise": 2.3582614971928706e-06,
          "neutral": 5.1593925803899765
        },
        "dominant_emotion": "sad",
        "region": {
          "x": 0,
          "y": 0,
          "w": 125,
          "h": 215,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 30,
        "gender": {
          "Woman": 15.883879363536835,
          "Man": 84.11611914634705
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 16.00450724363327,
          "indian": 1.3158815912902355,
          "black": 0.4860164597630501,
          "white": 67.33971834182739,
          "middle eastern": 6.226227432489395,
          "latino hispanic": 8.627649396657944
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "hand",
                "carry"
              ],
              [
                "hand",
                "hold"
              ]
            ],
            "negative_action": [
              "cook",
              "cut",
              "eat",
              "make",
              "no interaction"
            ],
            "position": "hand"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.4158352017402649,
        0.0035035894252359867,
        0.9996010065078735,
        0.9871929287910461
      ]
    ],
    "face_boxes": [
      [
        0.6870707273483276,
        0.07485972344875336,
        0.8182069063186646,
        0.42028936743736267
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.9065368330106139,
            0.5268708794122557
          ],
          [
            0.8466562513262034,
            0.5230919002649097
          ],
          [
            0.763453758880496,
            0.9576745022097014
          ],
          [
            0.5882242672145367,
            0.7630570761213816
          ],
          [
            0.9664174146950245,
            0.5306498585596018
          ],
          [
            0.7571505397558213,
            0.9822378666674505
          ],
          [
            0.5693146098405123,
            0.7347147325162865
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.34365936517715456,
            0.7932889093001497
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.34365936517715456,
            0.8915423671311461
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.7092460744082928,
            0.1716468395617304
          ],
          [
            0.7268950879573822,
            0.16975734998805742
          ],
          [
            0.8403530322015286,
            0.20943663103519056
          ],
          [
            0.8378317445516587,
            0.20376816231417152
          ]
        ],
        "dw_hand_1": [
          [
            0.5604901030659676,
            0.7233777950742485
          ],
          [
            0.549144308641553,
            0.6440192329799822
          ],
          [
            0.5390591580420733,
            0.5703291396067349
          ],
          [
            0.5251920759677887,
            0.5193129211175637
          ],
          [
            0.5226707883179188,
            0.47585466092308454
          ],
          [
            0.47098439149558546,
            0.5873345457697919
          ],
          [
            0.4356863643974066,
            0.5419867960016398
          ],
          [
            0.4382076520472765,
            0.5098654732491987
          ],
          [
            0.44955344647169115,
            0.4928600670861416
          ],
          [
            0.45333537794649603,
            0.61756637894856
          ],
          [
            0.4256012137979269,
            0.5722186291804079
          ],
          [
            0.4319044329226017,
            0.5344288377069478
          ],
          [
            0.44325022734701636,
            0.5117549628228717
          ],
          [
            0.44955344647169115,
            0.6440192329799822
          ],
          [
            0.42812250144779684,
            0.6043399519328491
          ],
          [
            0.43442572057247164,
            0.5722186291804079
          ],
          [
            0.4457715149968863,
            0.5495447542963319
          ],
          [
            0.4508140902966261,
            0.6629141287167123
          ],
          [
            0.4419895835220814,
            0.6307928059642711
          ],
          [
            0.4445108711719513,
            0.606229441506522
          ],
          [
            0.4520747341215611,
            0.591113524917138
          ]
        ],
        "dw_hand_2": [
          [
            0.5680539660155773,
            0.7328252429426135
          ],
          [
            0.547883664816618,
            0.6364612746852901
          ],
          [
            0.5377985142171383,
            0.5608816917383699
          ],
          [
            0.5277133636176586,
            0.5249813898385828
          ],
          [
            0.5251920759677887,
            0.4928600670861416
          ],
          [
            0.4798088982701302,
            0.5552132230173509
          ],
          [
            0.4608992408961058,
            0.5212024106912367
          ],
          [
            0.4508140902966261,
            0.5098654732491987
          ],
          [
            0.4508140902966261,
            0.5023075149545067
          ],
          [
            0.4697237476706505,
            0.5816660770487729
          ],
          [
            0.4482928026467562,
            0.5514342438700048
          ],
          [
            0.4445108711719513,
            0.5287603689859287
          ],
          [
            0.44955344647169115,
            0.5155339419702177
          ],
          [
            0.4659418161958456,
            0.602450462359176
          ],
          [
            0.4508140902966261,
            0.5722186291804079
          ],
          [
            0.4482928026467562,
            0.5533237334436778
          ],
          [
            0.4520747341215611,
            0.5514342438700048
          ],
          [
            0.4608992408961058,
            0.6194558685222331
          ],
          [
            0.454596021771431,
            0.602450462359176
          ],
          [
            0.45585666559636595,
            0.585445056196119
          ],
          [
            0.4608992408961058,
            0.5816660770487729
          ]
        ],
        "dw_face": [
          [
            0.7117673620581627,
            0.1716468395617304
          ],
          [
            0.7079854305833578,
            0.20187867274049853
          ],
          [
            0.702942855283618,
            0.23399999549293965
          ],
          [
            0.6991609238088131,
            0.26423182867170775
          ],
          [
            0.6966396361589432,
            0.2982426409978219
          ],
          [
            0.701682211458683,
            0.332253453323936
          ],
          [
            0.7067247867584229,
            0.3624852865027041
          ],
          [
            0.7155492935329676,
            0.3927171196814722
          ],
          [
            0.734458950906992,
            0.4002750779761643
          ],
          [
            0.7546292521059513,
            0.3851591613867802
          ],
          [
            0.7722782656550408,
            0.3624852865027041
          ],
          [
            0.7874059915542603,
            0.337921922044955
          ],
          [
            0.79749114215374,
            0.3114690680135329
          ],
          [
            0.8075762927532196,
            0.2793477452610918
          ],
          [
            0.8126188680529595,
            0.2472264225086507
          ],
          [
            0.8164007995277643,
            0.2151050997562096
          ],
          [
            0.8189220871776343,
            0.18109428743009545
          ],
          [
            0.7117673620581627,
            0.14897296467765433
          ],
          [
            0.7117673620581627,
            0.14897296467765433
          ],
          [
            0.7117673620581627,
            0.14897296467765433
          ],
          [
            0.7117673620581627,
            0.14897296467765433
          ],
          [
            0.7117673620581627,
            0.14897296467765433
          ],
          [
            0.7130280058830977,
            0.14519398553030832
          ],
          [
            0.7205918688327074,
            0.1414150063829623
          ],
          [
            0.7268950879573822,
            0.1414150063829623
          ],
          [
            0.733198307082057,
            0.1433044959566353
          ],
          [
            0.7407621700316668,
            0.14897296467765433
          ],
          [
            0.7117673620581627,
            0.17542581870907642
          ],
          [
            0.704203499108553,
            0.1924312248721335
          ],
          [
            0.6966396361589432,
            0.20754714146151754
          ],
          [
            0.6878151293843985,
            0.2226630580509016
          ],
          [
            0.6865544855594635,
            0.24155795378763167
          ],
          [
            0.6928577046841383,
            0.2472264225086507
          ],
          [
            0.7004215676337481,
            0.25289489122966974
          ],
          [
            0.7067247867584229,
            0.25289489122966974
          ],
          [
            0.7117673620581627,
            0.2491159120823237
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7105067182332278,
            0.17542581870907642
          ],
          [
            0.7193312250077725,
            0.17542581870907642
          ],
          [
            0.7218525126576424,
            0.16975734998805742
          ],
          [
            0.7281557317823172,
            0.16975734998805742
          ],
          [
            0.733198307082057,
            0.17353632913540343
          ],
          [
            0.7281557317823172,
            0.17542581870907642
          ],
          [
            0.7231131564825773,
            0.17920479785642243
          ],
          [
            0.6979002799838782,
            0.30769008886618693
          ],
          [
            0.6941183485090733,
            0.2982426409978219
          ],
          [
            0.6941183485090733,
            0.28501621398211086
          ],
          [
            0.6941183485090733,
            0.28501621398211086
          ],
          [
            0.6966396361589432,
            0.28312672440843784
          ],
          [
            0.7054641429334879,
            0.2925741722768029
          ],
          [
            0.7117673620581627,
            0.30769008886618693
          ],
          [
            0.7092460744082928,
            0.319027026308225
          ],
          [
            0.704203499108553,
            0.32847447417659
          ],
          [
            0.6979002799838782,
            0.332253453323936
          ],
          [
            0.6953789923340082,
            0.324695495029244
          ],
          [
            0.6966396361589432,
            0.31524804716087895
          ],
          [
            0.6979002799838782,
            0.30769008886618693
          ],
          [
            0.6966396361589432,
            0.3039111097188409
          ],
          [
            0.6979002799838782,
            0.3001321305714949
          ],
          [
            0.7054641429334879,
            0.3001321305714949
          ],
          [
            0.7105067182332278,
            0.30769008886618693
          ],
          [
            0.7067247867584229,
            0.31335855758720593
          ],
          [
            0.701682211458683,
            0.31524804716087895
          ],
          [
            0.6991609238088131,
            0.3114690680135329
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "hot_dog",
      "possible_names": [
        "hot_dog"
      ],
      "box": [
        0.43125,
        0.4566744730679157,
        0.5125,
        0.550351288056206
      ]
    }
  ],
  "scene": "A person is enjoying a hot dog at an outdoor event with a blurred crowd in the background and metal railings nearby suggesting a lively atmosphere with natural lighting and casual attire typical for sports or public gatherings",
  "overall_past": "Before the current scene, the individual likely arrived at the outdoor event, possibly after traveling to the venue by car, public transit, or on foot, and made their way through the crowd to a spot near the railing where they could enjoy a view of the main activity—such as a sports game or performance—while also having a moment of personal relaxation. They may have purchased the hot dog from a nearby food stand, perhaps after waiting in a short line, and were drawn to the railing area to eat while taking in the atmosphere. The earphones suggest they had already connected to a music playlist or live commentary, indicating they had prepared for this personal experience in advance, possibly to enhance their enjoyment of the event. Their relaxed posture and focused attention on the hot dog imply they had just taken a break from more intense engagement with the event, such as cheering or watching a game, and were now savoring a simple, satisfying moment amidst the lively surroundings.",
  "overall_past_clean": "The individual moved through a bustling crowd, having arrived at the venue via transportation, and deliberately positioned themselves near the railing to secure a clear view of the central activity, pausing briefly at a food stand to acquire a meal before settling into a moment of quiet focus amidst the surrounding energy.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to finish eating the hot dog, carefully dispose of the wrapper, and then resume watching the event—perhaps glancing toward the field or stage with renewed attention, possibly adjusting their earphones to catch important commentary or music. The moment of pause has passed, and they are re-engaging with the lively atmosphere, ready to enjoy the next part of the event.",
  "overall_future_clean": "The individual rises slightly from the seat, tucks the crumpled wrapper into the nearby bin with deliberate precision, then settles back in, earphones snugly in place, eyes fixed on the unfolding action, fully immersed once more as the energy of the event surges forward.",
  "future_scene_ok": true
}