{
  "image_path": "./ref_datasets/hico_det/images/train/train_00005438.jpg",
  "image_id": "train_00005438",
  "width": 427,
  "height": 640,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 5438,
  "annotations": {
    "objects": "[{'id': 265, 'bbox_human': [2, 343, 1, 496], 'bbox_object': [127, 425, 350, 640], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('cake', 'blow')]",
    "negative_captions": "[('cake', 'carry'), ('cake', 'cut'), ('cake', 'eat'), ('cake', 'hold'), ('cake', 'light'), ('cake', 'make'), ('cake', 'pick_up'), ('cake', 'no_interaction')]",
    "ambiguous_captions": "[]",
    "positive_objects": "[264]",
    "negative_objects": "[265, 266, 267, 268, 269, 270, 271, 272]",
    "ambiguous_objects": "[]",
    "size": "[427, 640, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": true,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The person is wearing glasses and a dark green shirt.",
        "clothing": [
          {
            "possible_names": [
              "glasses",
              "eyewear"
            ],
            "name": "glasses",
            "type": "accessory",
            "color": [
              "black"
            ]
          },
          {
            "possible_names": [
              "shirt",
              "t-shirt"
            ],
            "name": "t shirt",
            "type": "top",
            "color": [
              "dark green"
            ]
          }
        ],
        "objects": [
          {
            "standalone": true,
            "possible_names": [
              "birthday cake"
            ],
            "name": "birthday cake",
            "position": "standalone"
          }
        ],
        "description": "The person is in the background and appears to be an adult male. He is wearing glasses and a dark green t-shirt. His expression suggests he is happy. He is looking at a birthday cake with lit candles.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to be focused on blowing out candles on a birthday cake, suggesting a neutral expression typical during such an activity.",
        "meaningful": true,
        "story": "The individual seems to be celebrating a birthday, as indicated by the presence of a decorated cake with lit candles. The setting suggests a casual home environment, possibly with friends or family around, given the stuffed animals and books in the background.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is leaning forward towards a birthday cake adorned with lit candles, likely preparing to blow them out as part of a celebratory tradition. Their focused expression suggests concentration on the task, possibly making a wish before extinguishing the flames. The presence of stuffed animals and books in the background indicates a cozy, personal space, perhaps a home setting where this special moment is being shared with loved ones. The act of blowing out candles symbolizes a transition into another year, marking growth and new beginnings for the individual.",
        "intention": "The intention is to mark a significant personal milestone with a symbolic gesture of reflection and hope for the future",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.10334832834216334,
            0.10021814608148165
          ],
          [
            0.10382829928087048,
            0.14401560899402416
          ],
          [
            0.10738255555672005,
            0.18520757965743542
          ],
          [
            0.11963028000844048,
            0.22529515922069548
          ],
          [
            0.14406258886815235,
            0.2581375904381275
          ],
          [
            0.1769206482417291,
            0.2915655593786921
          ],
          [
            0.21791162982199175,
            0.31752257964440755
          ],
          [
            0.26161919361215324,
            0.33490526899695394
          ],
          [
            0.29747483019607046,
            0.3404825698052134
          ],
          [
            0.3272753220491483,
            0.33339561383639066
          ],
          [
            0.35183667307037225,
            0.3092218350086894
          ],
          [
            0.37714777697923396,
            0.2866483952317919
          ],
          [
            0.4035370395350273,
            0.26171143374272754
          ],
          [
            0.42899650074080425,
            0.23619854003190993
          ],
          [
            0.4440672683971069,
            0.20986343813793998
          ],
          [
            0.45316207077878895,
            0.17981210132794723
          ],
          [
            0.4629310130594406,
            0.14927132443657945
          ],
          [
            0.18329704641378536,
            0.11457185566957509
          ],
          [
            0.22198825729615157,
            0.10506952106952668
          ],
          [
            0.26699378267902724,
            0.10858571715652943
          ],
          [
            0.30705630033460496,
            0.11769399174622128
          ],
          [
            0.34298884856657025,
            0.13028173132666518
          ],
          [
            0.3889799982698197,
            0.13073411349739347
          ],
          [
            0.41269774403449244,
            0.12192619554698467
          ],
          [
            0.43568700003600036,
            0.11389193252793381
          ],
          [
            0.4574235343103546,
            0.10952327661216259
          ],
          [
            0.4683410342061109,
            0.11369912624359131
          ],
          [
            0.359865876973453,
            0.15921345885310853
          ],
          [
            0.35860900728947426,
            0.18512694175754274
          ],
          [
            0.3631359400578509,
            0.21177825789366453
          ],
          [
            0.36267761021164513,
            0.23797197703804285
          ],
          [
            0.31841371624953935,
            0.24341818264552523
          ],
          [
            0.33218795665333845,
            0.24692066673721583
          ],
          [
            0.34846682642646043,
            0.2529277142669473
          ],
          [
            0.3645133718514211,
            0.247635509286608
          ],
          [
            0.3764352587881436,
            0.23973978715283534
          ],
          [
            0.23023829144273325,
            0.15118905710322514
          ],
          [
            0.25605453508440246,
            0.15172824401940618
          ],
          [
            0.2762778792025533,
            0.149211943681751
          ],
          [
            0.29594849838226206,
            0.15579887362463132
          ],
          [
            0.2729249345135553,
            0.1571089162358216
          ],
          [
            0.25399218645730753,
            0.15832266227475236
          ],
          [
            0.38125033887503895,
            0.15476081163755487
          ],
          [
            0.4025584706178514,
            0.1501475422510079
          ],
          [
            0.41978039969644965,
            0.14699595735541413
          ],
          [
            0.4339892839500042,
            0.14893197741891656
          ],
          [
            0.4214731925065879,
            0.15606112086347174
          ],
          [
            0.4035910792716094,
            0.15619907916656564
          ],
          [
            0.2759348780624681,
            0.27039236298629216
          ],
          [
            0.30422613388640934,
            0.26948685976011416
          ],
          [
            0.3278724533851907,
            0.26565301428948135
          ],
          [
            0.3405969893505598,
            0.26927965943302423
          ],
          [
            0.3518104285211139,
            0.26502760521003177
          ],
          [
            0.36020015577441994,
            0.26818321134362905
          ],
          [
            0.3651064133795658,
            0.2713105409273079
          ],
          [
            0.35349511924735993,
            0.2861565537750721
          ],
          [
            0.3426621291261726,
            0.29584000749247413
          ],
          [
            0.3296749732214378,
            0.29872438609600066
          ],
          [
            0.32076816632222,
            0.29767590867621563
          ],
          [
            0.2974371884570309,
            0.2899163118430546
          ],
          [
            0.2872803540642982,
            0.2724415745053973
          ],
          [
            0.33251324175352553,
            0.27804942535502575
          ],
          [
            0.33777758045394346,
            0.2804014090980802
          ],
          [
            0.34864305644260274,
            0.2763807963047709
          ],
          [
            0.35737008624110345,
            0.2722583879317556
          ],
          [
            0.3465423295233871,
            0.27303043720977643
          ],
          [
            0.3340882248366386,
            0.2753547980317047
          ],
          [
            0.3262106733363621,
            0.27467039184910913
          ]
        ],
        "visibility": [
          3.97282207131866e-11,
          2.2909667218851126e-11,
          0.0044168392196297646,
          8.029352183136496e-10,
          1.7087237180746229e-09,
          0.31217214465141296,
          9.542430720585796e-16,
          1.1332095937177655e-06,
          0.025832530111074448,
          1.9021649677597452e-06,
          4.354199347744725e-07,
          0.10696252435445786,
          0.9999988079071045,
          3.447670223977184e-06,
          0.9999442100524902,
          8.012744956431561e-08,
          0.9939250349998474,
          0.5911915302276611,
          1.5658969232390518e-07,
          0.9999887943267822,
          3.603115103777447e-16,
          5.5331914427407387e-17,
          1.7816535546444356e-05,
          5.675827196682803e-06,
          0.0027481396682560444,
          1.4109918993199244e-06,
          1.9833645836797587e-09,
          1.4866420769976685e-06,
          0.015362952835857868
        ],
        "headpose": {
          "pitch": -21.144421519283572,
          "yaw": -14.294592140312451,
          "roll": 7.551227481255256
        },
        "attributes": {
          "5 oClock Shadow": 0.563060998916626,
          "Arched Eyebrows": 0.008741442114114761,
          "Attractive": 0.1540038287639618,
          "Bags Under Eyes": 0.048496950417757034,
          "Bald": 2.5424835257581435e-05,
          "Bangs": 0.0007733569364063442,
          "Big Lips": 0.15997456014156342,
          "Big Nose": 0.30545103549957275,
          "Black Hair": 0.7174102663993835,
          "Blond Hair": 3.170510535710491e-05,
          "Blurry": 0.014768371358513832,
          "Brown Hair": 0.012721587903797626,
          "Bushy Eyebrows": 0.04184488579630852,
          "Chubby": 0.02584056369960308,
          "Double Chin": 0.001971207791939378,
          "Eyeglasses": 0.17309218645095825,
          "Goatee": 0.6449846029281616,
          "Gray Hair": 0.00021931735682301223,
          "Heavy Makeup": 0.00035993201890960336,
          "High Cheekbones": 0.0024614082649350166,
          "Male": 0.9998464584350586,
          "Mouth Slightly Open": 0.007881470955908298,
          "Mustache": 0.6678273677825928,
          "Narrow Eyes": 0.08514535427093506,
          "No Beard": 0.007247717585414648,
          "Oval Face": 0.045507434755563736,
          "Pale Skin": 0.0023923353292047977,
          "Pointy Nose": 0.15712517499923706,
          "Receding Hairline": 0.030122796073555946,
          "Rosy Cheeks": 0.00024576703435741365,
          "Sideburns": 0.9065611362457275,
          "Smiling": 0.00026684600743465126,
          "Straight Hair": 0.33070749044418335,
          "Wavy Hair": 0.013497292064130306,
          "Wearing Earrings": 0.0010798079892992973,
          "Wearing Hat": 0.00103391008451581,
          "Wearing Lipstick": 0.0005932282074354589,
          "Wearing Necklace": 0.0029946493450552225,
          "Wearing Necktie": 0.043169017881155014,
          "Young": 0.7974309325218201
        },
        "age": [
          0.0014922715490683913,
          0.631818950176239,
          0.9982137680053711,
          0.9728086590766907,
          0.10647518932819366,
          6.345855217659846e-05,
          1.906574766508129e-06,
          1.758441925403531e-08
        ],
        "race": [
          0.8918442130088806,
          0.011253289878368378,
          0.9223891496658325,
          0.194353848695755,
          0.5142325758934021
        ],
        "gender": [
          0.9996960163116455,
          0.0008647540234960616
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 72.84647822380066,
          "disgust": 0.03790096379816532,
          "fear": 6.019732728600502,
          "happy": 0.10939785279333591,
          "sad": 11.810600757598877,
          "surprise": 0.022083929798100144,
          "neutral": 9.153804183006287
        },
        "dominant_emotion": "angry",
        "region": {
          "x": 0,
          "y": 0,
          "w": 242,
          "h": 266,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 37,
        "gender": {
          "Woman": 2.1309368312358856,
          "Man": 97.86906242370605
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 6.928280740976334,
          "indian": 8.714311569929123,
          "black": 2.8377415612339973,
          "white": 34.47791039943695,
          "middle eastern": 29.123246669769287,
          "latino hispanic": 17.918506264686584
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "standalone",
                "blow"
              ]
            ],
            "negative_action": [
              "carry",
              "cut",
              "eat",
              "hold",
              "light",
              "make",
              "pick up",
              "no interaction"
            ],
            "position": "standalone"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.00041595386574044824,
        0.001457262085750699,
        0.8067721724510193,
        0.7303339838981628
      ]
    ],
    "face_boxes": [
      [
        0.10832064598798752,
        0.00999751128256321,
        0.4867205321788788,
        0.33608365058898926
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.2711761449111057,
            0.258809702222546
          ],
          [
            0.012512327078075543,
            0.24806094517310467
          ],
          [
            0.05368373060859241,
            0.5191684840867918
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.5298399627441359,
            0.26955845927198735
          ],
          [
            0.741067163465918,
            0.47617345588902643
          ],
          [
            0.6121827698051696,
            0.6039642341434955
          ],
          [
            0.060843974700856264,
            0.8249109068264563
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.2863916636071663,
            0.7126461109767359
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.2488003821227813,
            0.15371074440578614
          ],
          [
            0.42243630136017873,
            0.15609935708343983
          ],
          [
            0.08769489004684555,
            0.12385308593511582
          ],
          [
            0.48150831512135517,
            0.13221323030690355
          ]
        ],
        "dw_hand_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_hand_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_face": [
          [
            0.10559550027750512,
            0.11549294156332808
          ],
          [
            0.10738556130057099,
            0.14893351905047894
          ],
          [
            0.11454580539283485,
            0.1823740965376297
          ],
          [
            0.12528617153123056,
            0.21462036768595372
          ],
          [
            0.14139672073882414,
            0.24447802615662423
          ],
          [
            0.16645757506174744,
            0.27194707194964085
          ],
          [
            0.19867867347693458,
            0.2982218114038309
          ],
          [
            0.23626995496131972,
            0.3221079381803671
          ],
          [
            0.28102148053796844,
            0.3364396142462889
          ],
          [
            0.3275630671376832,
            0.3340510015686354
          ],
          [
            0.3633642875990022,
            0.3137477938085794
          ],
          [
            0.39021520294499146,
            0.28627874801556263
          ],
          [
            0.4170661182909809,
            0.258809702222546
          ],
          [
            0.4385468505677723,
            0.2313406564295292
          ],
          [
            0.45823752182149774,
            0.20148299795885888
          ],
          [
            0.4689778879598935,
            0.16804242047170792
          ],
          [
            0.4761381320521572,
            0.1334075366457303
          ],
          [
            0.17898800222320915,
            0.10952140986919404
          ],
          [
            0.2112091006383963,
            0.10474418451388674
          ],
          [
            0.2452202600766493,
            0.10832710353036719
          ],
          [
            0.27744135849183643,
            0.11549294156332808
          ],
          [
            0.3096624569070236,
            0.12265877959628897
          ],
          [
            0.39200526396805746,
            0.1262416986127694
          ],
          [
            0.4152760572679149,
            0.12146447325746221
          ],
          [
            0.4385468505677723,
            0.11429863522450132
          ],
          [
            0.46002758284456363,
            0.11071571620802087
          ],
          [
            0.47792819307522333,
            0.11429863522450132
          ],
          [
            0.3472537383914086,
            0.15729366342226658
          ],
          [
            0.3454636773683427,
            0.18117979019880295
          ],
          [
            0.3454636773683427,
            0.20626022331416607
          ],
          [
            0.3472537383914086,
            0.23014635009070245
          ],
          [
            0.295341968722496,
            0.24208941347897053
          ],
          [
            0.31503263997622144,
            0.24567233249545098
          ],
          [
            0.33651337225301303,
            0.24806094517310467
          ],
          [
            0.35620404350673845,
            0.24686663883427773
          ],
          [
            0.37231459271433204,
            0.24208941347897053
          ],
          [
            0.20762897859226442,
            0.1477392127116522
          ],
          [
            0.2380600159843856,
            0.1453506000339985
          ],
          [
            0.26670099235344075,
            0.1477392127116522
          ],
          [
            0.2917618466763642,
            0.15729366342226658
          ],
          [
            0.26312087030730885,
            0.16087658243874703
          ],
          [
            0.23268983291518772,
            0.15848796976109353
          ],
          [
            0.3848450198757937,
            0.15968227609992028
          ],
          [
            0.40453569112951915,
            0.1525164380669594
          ],
          [
            0.4295965454524426,
            0.1501278253893057
          ],
          [
            0.4528673387522999,
            0.15371074440578614
          ],
          [
            0.43138660647550847,
            0.16207088877757397
          ],
          [
            0.40811581317565104,
            0.16326519511640072
          ],
          [
            0.2738612364457047,
            0.27672429730494824
          ],
          [
            0.297132029745562,
            0.2731413782884678
          ],
          [
            0.32219288406848545,
            0.2707527656108141
          ],
          [
            0.33472331122994703,
            0.2707527656108141
          ],
          [
            0.3472537383914086,
            0.2707527656108141
          ],
          [
            0.3615742265759363,
            0.2731413782884678
          ],
          [
            0.37231459271433204,
            0.27672429730494824
          ],
          [
            0.3615742265759363,
            0.28508444167673586
          ],
          [
            0.3490437994144747,
            0.29225027970969675
          ],
          [
            0.331143189183815,
            0.2958331987261772
          ],
          [
            0.3096624569070236,
            0.2934445860485235
          ],
          [
            0.2917618466763642,
            0.28627874801556263
          ],
          [
            0.28102148053796844,
            0.27791860364377496
          ],
          [
            0.30608233486089187,
            0.27791860364377496
          ],
          [
            0.33472331122994703,
            0.27911290998260174
          ],
          [
            0.3508338604375406,
            0.27911290998260174
          ],
          [
            0.3651543486220683,
            0.27791860364377496
          ],
          [
            0.3490437994144747,
            0.2803072163214287
          ],
          [
            0.33293325020688114,
            0.2815015226602554
          ],
          [
            0.30608233486089187,
            0.27911290998260174
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "cake",
      "possible_names": [
        "cake"
      ],
      "box": [
        0.297423887587822,
        0.546875,
        0.9953161592505855,
        1.0
      ]
    }
  ],
  "scene": "A birthday cake adorned with candles and chocolate sprinkles sits on a table in a cozy room filled with colorful stuffed animals and books creating a warm celebratory atmosphere",
  "overall_past": "Before the current scene, the individual likely had just been surrounded by friends or family who gathered around the table, singing \"Happy Birthday\" in a joyful, heartfelt manner. The room’s warm, inviting atmosphere—filled with cherished stuffed animals and books—suggests this celebration was intimate and personal, possibly a quiet, meaningful milestone. The cake, decorated with chocolate sprinkles and now lit with candles, was likely brought out after a small gathering or gift-opening session, with the person being the center of attention. The anticipation in the air, combined with the focused posture and the glowing candles, indicates that the moment of singing had just concluded, and the individual was now preparing to make a wish before blowing out the flames—a cherished ritual marking the transition into another year of life.",
  "overall_past_clean": "Laughter and soft voices filled the space as people leaned in close, their faces lit with affection, the air thick with excitement as the final notes of a familiar tune faded into a hush, leaving only the quiet anticipation of what would come next.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to blow out the candles with a steady breath, causing the flames to flicker and fade one by one. As the last candle goes out, a moment of quiet anticipation may follow, perhaps accompanied by soft applause or cheers from nearby loved ones. Then, the room would likely fill with laughter and excitement as the birthday celebration continues—possibly with the cake being cut, shared, and enjoyed, while the surrounding stuffed animals and books silently witness the joyous milestone of another year lived.",
  "overall_future_clean": "The air hums with shared delight as laughter erupts, hands clap in unison, and voices rise in celebration, the moment swiftly giving way to the joyful chaos of cutting into the cake, passing slices, and savoring the sweetness of the occasion.",
  "future_scene_ok": true
}