{
  "image_path": "./ref_datasets/hico_det/images/test/test_00003240.jpg",
  "image_id": "test_00003240",
  "width": 640,
  "height": 480,
  "split": "test",
  "source": "zhimeng/hico_det",
  "dataset_index": 3240,
  "annotations": {
    "objects": "[{'id': 48, 'bbox_human': [1, 610, 118, 478], 'bbox_object': [203, 446, 204, 311], 'connection': 1, 'invis': 0}, {'id': 49, 'bbox_human': [10, 602, 125, 471], 'bbox_object': [214, 456, 210, 304], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('bottle', 'drink_with'), ('bottle', 'hold')]",
    "negative_captions": "[('bottle', 'carry'), ('bottle', 'inspect'), ('bottle', 'open'), ('bottle', 'no_interaction')]",
    "ambiguous_captions": "[('bottle', 'lick'), ('bottle', 'pour')]",
    "positive_objects": "[47, 48]",
    "negative_objects": "[46, 49, 51, 53]",
    "ambiguous_objects": "[50, 52]",
    "size": "[640, 480, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "baby",
        "gender": "unknown",
        "emotion": "neutral",
        "clothing_description": "The child is wearing a striped shirt and denim shorts. The shirt has rainbow-colored horizontal stripes and the shorts are black with some red decorative elements.",
        "clothing": {
          "vague": false,
          "clothing": [
            {
              "possible_names": [
                "shirt",
                "t-shirt",
                "top"
              ],
              "name": "shirt",
              "type": "top",
              "color": [
                "red",
                "white",
                "yellow",
                "green",
                "blue"
              ],
              "belonging_confident": true,
              "existence_confident": true
            },
            {
              "possible_names": [
                "shorts",
                "pants"
              ],
              "name": "shorts",
              "type": "bottom",
              "color": [
                "black"
              ],
              "belonging_confident": true,
              "existence_confident": true
            }
          ]
        },
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "bottle"
            ],
            "name": "bottle",
            "position": "hand"
          }
        ],
        "description": "The child, likely a baby, is lying on their back, drinking from a bottle. They are wearing a multicolored striped shirt and black shorts. The child's expression is neutral, and they are being held by someone whose partial presence can be seen in yellow clothing with polka dots. The baby is holding the bottle with their hands.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The baby appears to be calmly focused on drinking from the bottle, showing a neutral expression typical for an infant engaged in feeding.",
        "meaningful": true,
        "story": "A baby is being fed by an adult while lying comfortably on their lap. The baby seems content and absorbed in the act of drinking from the bottle, suggesting a peaceful moment of care and nourishment.",
        "race": "unknown",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The baby is lying on its back while being held by an adult, actively drinking from a bottle which suggests hunger or thirst as the primary motivation for this action. The baby's eyes are wide open, indicating alertness and engagement with the environment or possibly the person holding them. The colorful striped shirt and denim shorts suggest a casual setting, likely at home or during a relaxed outing. The baby's grip on the bottle shows coordination and self-feeding skills, which could be part of developing motor skills and independence in feeding. The adult appears to be providing support and comfort, ensuring the baby is safe and secure while eating.",
        "intention": "The individual is seeking nourishment while maintaining engagement with their surroundings and demonstrating developing motor skills through self-feeding",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.6853399891938482,
            0.25915387950482827
          ],
          [
            0.6684828321848597,
            0.28417473250911346
          ],
          [
            0.6451245314308576,
            0.30735076222391355
          ],
          [
            0.597605870451246,
            0.321939973107406
          ],
          [
            0.5476894902331488,
            0.3436936305392357
          ],
          [
            0.5047239809163979,
            0.3707161473376411
          ],
          [
            0.48185181607093136,
            0.3914758345910482
          ],
          [
            0.47459584325551984,
            0.43473140774738217
          ],
          [
            0.49399822162730356,
            0.46757170430251527
          ],
          [
            0.5046724637704236,
            0.5128601075637909
          ],
          [
            0.5363158037619932,
            0.5346584324325835
          ],
          [
            0.5801293455064297,
            0.5534911951848438
          ],
          [
            0.6200672127306461,
            0.5651725507917857
          ],
          [
            0.6908090181648732,
            0.561807957291603
          ],
          [
            0.7388498808656421,
            0.5585176157099859
          ],
          [
            0.7839476932372366,
            0.5495843677293687
          ],
          [
            0.8595082189355578,
            0.545014540070579
          ],
          [
            0.7230234478201185,
            0.277672969798247
          ],
          [
            0.7411885001829692,
            0.27494786282380423
          ],
          [
            0.738355057154383,
            0.29107910923305014
          ],
          [
            0.7508521184325219,
            0.3068539481432665
          ],
          [
            0.7573982634714672,
            0.31836091776688896
          ],
          [
            0.7947383252637727,
            0.38418514097020745
          ],
          [
            0.8161255236182894,
            0.39800364190623877
          ],
          [
            0.8222860902547836,
            0.41968082430816833
          ],
          [
            0.8243858331016132,
            0.4389685724462782
          ],
          [
            0.8151205963322095,
            0.4708535467584928
          ],
          [
            0.7291428617068699,
            0.37489087680975597
          ],
          [
            0.6945730849036148,
            0.3783003087554659
          ],
          [
            0.6772274485656193,
            0.38179356909933543
          ],
          [
            0.6393025223697935,
            0.38449225525061287
          ],
          [
            0.6299667162554605,
            0.3831116860111555
          ],
          [
            0.6299956946500711,
            0.38963674008846283
          ],
          [
            0.6281344472297601,
            0.40678327289365585
          ],
          [
            0.6374391411031995,
            0.42233716873895555
          ],
          [
            0.6530935327921595,
            0.4338728079483623
          ],
          [
            0.6985551087984017,
            0.3087507836875461
          ],
          [
            0.7170245962483542,
            0.3188000677596955
          ],
          [
            0.7251367939370019,
            0.3447684193650881
          ],
          [
            0.7132246732711792,
            0.3589913726562545
          ],
          [
            0.6926849357783794,
            0.34298743357261025
          ],
          [
            0.6946329659649304,
            0.3188849393100966
          ],
          [
            0.7482951609151705,
            0.42122084193286446
          ],
          [
            0.751598316856793,
            0.42549959591456826
          ],
          [
            0.7623800324542181,
            0.455520970196951
          ],
          [
            0.7547341944915907,
            0.47130636033557705
          ],
          [
            0.748169568819659,
            0.4579894275182769
          ],
          [
            0.7472537677202906,
            0.4336951471510388
          ],
          [
            0.570446732214519,
            0.38689133439745227
          ],
          [
            0.5878493346273899,
            0.40282430492696314
          ],
          [
            0.5961653336882591,
            0.4109368315055256
          ],
          [
            0.602622790741069,
            0.4173424874033247
          ],
          [
            0.6021385410002299,
            0.4221082224732354
          ],
          [
            0.6058519098375525,
            0.45168757381893343
          ],
          [
            0.5942231144223895,
            0.4842699723584311
          ],
          [
            0.570675301445382,
            0.4669267626035781
          ],
          [
            0.5488637439374413,
            0.45733646551767987
          ],
          [
            0.5424000091850758,
            0.4319312964166914
          ],
          [
            0.5499200645302024,
            0.4163738606941132
          ],
          [
            0.5557133428752422,
            0.39350934873024623
          ],
          [
            0.5735606998205185,
            0.38836756582771026
          ],
          [
            0.5859039717486926,
            0.40751705354168294
          ],
          [
            0.5867335235433919,
            0.42784094257014144
          ],
          [
            0.5972664745790618,
            0.4584761178919247
          ],
          [
            0.596180213455643,
            0.4755208321980068
          ],
          [
            0.582337171477931,
            0.45612404899937764
          ],
          [
            0.572692719740527,
            0.4324311202480679
          ],
          [
            0.562846390902996,
            0.4066763754401888
          ]
        ],
        "visibility": [
          0.9999978542327881,
          5.8091329265153036e-05,
          0.9999657869338989,
          8.500754353907292e-13,
          0.9999799728393555,
          1.0,
          7.628161898254163e-19,
          2.8053506749331405e-15,
          1.0,
          1.3555651889873843e-07,
          1.0,
          0.00658388389274478,
          0.9991583824157715,
          0.9999958276748657,
          6.445056044412922e-08,
          5.127959411765914e-06,
          0.999362051486969,
          6.242236167963711e-07,
          0.9376487731933594,
          0.24540647864341736,
          0.00011635178816504776,
          5.36259904038161e-07,
          0.9999942779541016,
          3.9407135773217306e-05,
          0.004033500328660011,
          0.012038304470479488,
          0.9658726453781128,
          0.9951615929603577,
          0.8092790842056274
        ],
        "headpose": {
          "pitch": 32.24380335453987,
          "yaw": 27.39389946832538,
          "roll": 68.08877613764477
        },
        "attributes": {
          "5 oClock Shadow": 0.0027291947044432163,
          "Arched Eyebrows": 0.05976361781358719,
          "Attractive": 0.0149083798751235,
          "Bags Under Eyes": 0.0045221769250929356,
          "Bald": 0.0035924604162573814,
          "Bangs": 0.00010632691555656493,
          "Big Lips": 0.08994676917791367,
          "Big Nose": 0.015517781488597393,
          "Black Hair": 0.02111995592713356,
          "Blond Hair": 0.001489761983975768,
          "Blurry": 0.08786271512508392,
          "Brown Hair": 0.00179683743044734,
          "Bushy Eyebrows": 0.00021972112881485373,
          "Chubby": 0.0073715802282094955,
          "Double Chin": 0.0007058958290144801,
          "Eyeglasses": 0.00306293205358088,
          "Goatee": 0.00013209512690082192,
          "Gray Hair": 0.0017756815068423748,
          "Heavy Makeup": 0.002899589017033577,
          "High Cheekbones": 0.005077861249446869,
          "Male": 0.3753540813922882,
          "Mouth Slightly Open": 0.050022903829813004,
          "Mustache": 8.981610153568909e-05,
          "Narrow Eyes": 0.00985488947480917,
          "No Beard": 0.9868264198303223,
          "Oval Face": 0.01821052096784115,
          "Pale Skin": 0.0029119718819856644,
          "Pointy Nose": 0.20781564712524414,
          "Receding Hairline": 0.018021322786808014,
          "Rosy Cheeks": 0.0003383338626008481,
          "Sideburns": 0.0003482735774014145,
          "Smiling": 0.0003751874319277704,
          "Straight Hair": 0.03343982994556427,
          "Wavy Hair": 0.002862310968339443,
          "Wearing Earrings": 0.0609436109662056,
          "Wearing Hat": 0.08618596196174622,
          "Wearing Lipstick": 0.013395729474723339,
          "Wearing Necklace": 0.037254661321640015,
          "Wearing Necktie": 0.0012372041819617152,
          "Young": 0.6960923671722412
        },
        "age": [
          0.9978725910186768,
          0.05096247047185898,
          0.4734545052051544,
          0.003061354160308838,
          0.001279277028515935,
          0.00020185767789371312,
          0.0027571700047701597,
          0.0011693391716107726
        ],
        "race": [
          0.3326323330402374,
          0.10031003504991531,
          0.8326749205589294,
          0.06588570028543472,
          0.9219469428062439
        ],
        "gender": [
          0.6819036602973938,
          0.35855117440223694
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 0.7193763740360737,
          "disgust": 69.06559467315674,
          "fear": 12.385528534650803,
          "happy": 0.0036104855098528787,
          "sad": 17.748717963695526,
          "surprise": 4.034182410350695e-06,
          "neutral": 0.07716576801612973
        },
        "dominant_emotion": "disgust",
        "region": {
          "x": 0,
          "y": 0,
          "w": 357,
          "h": 240,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 31,
        "gender": {
          "Woman": 12.845171988010406,
          "Man": 87.15482950210571
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 40.388888120651245,
          "indian": 8.65173190832138,
          "black": 7.2377316653728485,
          "white": 9.80544239282608,
          "middle eastern": 3.4394022077322006,
          "latino hispanic": 30.476802587509155
        },
        "dominant_race": "asian"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "mouth",
                "drink with"
              ],
              [
                "mouth",
                "hold"
              ]
            ],
            "negative_action": [
              "carry",
              "inspect",
              "open",
              "no interaction"
            ],
            "position": "mouth"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.0,
        0.24799925088882446,
        0.93939208984375,
        0.9981260299682617
      ]
    ],
    "face_boxes": [
      [
        0.4857960641384125,
        0.27235788106918335,
        0.8586152195930481,
        0.6070939302444458
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.6473420055376159,
            0.5644503057003021
          ],
          [
            0.5913649284177357,
            0.4355334008181535
          ],
          [
            0.457019943330023,
            0.3541121977346915
          ],
          [
            0.534370086259312,
            0.3893947190708584
          ],
          [
            0.7033190826574961,
            0.6933672105824508
          ],
          [
            0.5689740975697836,
            0.8426394162354646
          ],
          [
            0.5995070487260818,
            0.6255162080128989
          ],
          [
            0.28603541685475237,
            0.6038038871906422
          ],
          [
            0.10487323999404907,
            0.5766634861628216
          ],
          [
            0.07230475876066418,
            0.6336583283212449
          ],
          [
            0.324710488319397,
            0.785644574077041
          ],
          [
            0.09673111968570289,
            0.5848056064711676
          ],
          [
            0.10080217983987598,
            0.891492138085542
          ],
          [
            0.7094256728887558,
            0.373110478454166
          ],
          [
            0.7623494548930061,
            0.4518176414348461
          ],
          [
            0.680928251809544,
            0.3595402779402556
          ],
          [
            0.8152732368972566,
            0.5766634861628216
          ]
        ],
        "dw_hand_1": [
          [
            0.5995070487260818,
            0.6390864085268091
          ],
          [
            0.621897879574034,
            0.6146600476017706
          ],
          [
            0.6483597705761591,
            0.5929477267795139
          ],
          [
            0.6503953006532457,
            0.5576652054433471
          ],
          [
            0.6483597705761591,
            0.5223826841071799
          ],
          [
            0.6605729510386785,
            0.5712354059572572
          ],
          [
            0.662608481115765,
            0.5332388445183084
          ],
          [
            0.658537420961592,
            0.5115265236960517
          ],
          [
            0.6565018908845055,
            0.4925282429765774
          ],
          [
            0.6463242404990727,
            0.5576652054433471
          ],
          [
            0.6503953006532457,
            0.5223826841071799
          ],
          [
            0.644288710421986,
            0.4979563231821412
          ],
          [
            0.640217650267813,
            0.47895804246266693
          ],
          [
            0.6280044698052937,
            0.5495230851350007
          ],
          [
            0.6300399998823801,
            0.5169546039016161
          ],
          [
            0.625968939728207,
            0.4979563231821412
          ],
          [
            0.625968939728207,
            0.4816720825654489
          ],
          [
            0.603578108880255,
            0.5468090450322186
          ],
          [
            0.6056136389573415,
            0.5250967242099621
          ],
          [
            0.607649169034428,
            0.5088124835932697
          ],
          [
            0.6096846991115147,
            0.4925282429765774
          ]
        ],
        "dw_hand_2": [
          [
            0.5323345561822256,
            0.3921087591736406
          ],
          [
            0.5649030374156105,
            0.40567895968755097
          ],
          [
            0.5974715186489952,
            0.40567895968755097
          ],
          [
            0.6157912893427742,
            0.41110703989311487
          ],
          [
            0.6300399998823801,
            0.4192491602014614
          ],
          [
            0.6117202291886011,
            0.3676823982486019
          ],
          [
            0.6239334096511205,
            0.38125259876251233
          ],
          [
            0.6320755299594667,
            0.4029649195847688
          ],
          [
            0.6320755299594667,
            0.4192491602014614
          ],
          [
            0.5954359885719087,
            0.3676823982486019
          ],
          [
            0.6096846991115147,
            0.38668067896807623
          ],
          [
            0.6178268194198608,
            0.4165351200986792
          ],
          [
            0.6198623494969475,
            0.44096148102371785
          ],
          [
            0.5771162178781297,
            0.373110478454166
          ],
          [
            0.589329398340649,
            0.3921087591736406
          ],
          [
            0.5954359885719087,
            0.42467724040702526
          ],
          [
            0.5995070487260818,
            0.44910360133206395
          ],
          [
            0.5567609171072642,
            0.38396663886529425
          ],
          [
            0.5649030374156105,
            0.4002508794819866
          ],
          [
            0.5689740975697836,
            0.4192491602014614
          ],
          [
            0.5750806878010432,
            0.4382474409209357
          ]
        ],
        "dw_face": [
          [
            0.6972124924262365,
            0.34868411752912737
          ],
          [
            0.6788927217324575,
            0.3676823982486019
          ],
          [
            0.6605729510386785,
            0.3893947190708584
          ],
          [
            0.6463242404990727,
            0.41382107999589707
          ],
          [
            0.6361465901136398,
            0.44367552112650005
          ],
          [
            0.6280044698052937,
            0.4735299622571026
          ],
          [
            0.6280044698052937,
            0.5060984434904875
          ],
          [
            0.6361465901136398,
            0.5359528846210904
          ],
          [
            0.6524308307303321,
            0.5576652054433471
          ],
          [
            0.6727861315011978,
            0.5793775262656035
          ],
          [
            0.6931414322720635,
            0.590233686676732
          ],
          [
            0.7155322631200155,
            0.6010898470878602
          ],
          [
            0.7399586240450541,
            0.6010898470878602
          ],
          [
            0.7623494548930061,
            0.5929477267795139
          ],
          [
            0.7847402857409584,
            0.5766634861628216
          ],
          [
            0.8050955865118239,
            0.5576652054433471
          ],
          [
            0.8234153572056029,
            0.5386669247238725
          ],
          [
            0.7073901428116691,
            0.335113917015217
          ],
          [
            0.7155322631200155,
            0.3378279571179989
          ],
          [
            0.7236743834283617,
            0.34597007742634545
          ],
          [
            0.7297809736596215,
            0.3568262378374737
          ],
          [
            0.7338520338137945,
            0.3676823982486019
          ],
          [
            0.7623494548930061,
            0.3975368393792047
          ],
          [
            0.7765981654326121,
            0.41110703989311487
          ],
          [
            0.7888113458951315,
            0.42739128050980746
          ],
          [
            0.7989889962805641,
            0.446389561229282
          ],
          [
            0.8030600564347374,
            0.4681018820515387
          ],
          [
            0.7277454435825348,
            0.4029649195847688
          ],
          [
            0.717567793197102,
            0.41110703989311487
          ],
          [
            0.7053546127345827,
            0.4165351200986792
          ],
          [
            0.6951769623491499,
            0.42196320030424334
          ],
          [
            0.676857191655371,
            0.42739128050980746
          ],
          [
            0.6788927217324575,
            0.4382474409209357
          ],
          [
            0.6849993119637171,
            0.446389561229282
          ],
          [
            0.6911059021949768,
            0.4572457216404102
          ],
          [
            0.699248022503323,
            0.4681018820515387
          ],
          [
            0.6972124924262365,
            0.36496835814581974
          ],
          [
            0.7094256728887558,
            0.3676823982486019
          ],
          [
            0.717567793197102,
            0.37853855865973013
          ],
          [
            0.7155322631200155,
            0.3948227992764227
          ],
          [
            0.7053546127345827,
            0.3893947190708584
          ],
          [
            0.6972124924262365,
            0.37853855865973013
          ],
          [
            0.7440296841992271,
            0.4355334008181535
          ],
          [
            0.7603139248159196,
            0.4382474409209357
          ],
          [
            0.7725271052784389,
            0.4518176414348461
          ],
          [
            0.7745626353555256,
            0.4735299622571026
          ],
          [
            0.7603139248159196,
            0.4681018820515387
          ],
          [
            0.7501362744304869,
            0.4545316815376283
          ],
          [
            0.6483597705761591,
            0.4545316815376283
          ],
          [
            0.6565018908845055,
            0.4545316815376283
          ],
          [
            0.662608481115765,
            0.4599597617431922
          ],
          [
            0.6666795412699381,
            0.46538784194875654
          ],
          [
            0.6707506014241111,
            0.4735299622571026
          ],
          [
            0.6788927217324575,
            0.4952422830793593
          ],
          [
            0.6849993119637171,
            0.5142405637988339
          ],
          [
            0.6727861315011978,
            0.5060984434904875
          ],
          [
            0.662608481115765,
            0.4979563231821412
          ],
          [
            0.6544663608074188,
            0.48438612266823106
          ],
          [
            0.6503953006532457,
            0.4762440023598848
          ],
          [
            0.6483597705761591,
            0.46538784194875654
          ],
          [
            0.6503953006532457,
            0.4572457216404102
          ],
          [
            0.6565018908845055,
            0.46538784194875654
          ],
          [
            0.662608481115765,
            0.4735299622571026
          ],
          [
            0.6727861315011978,
            0.4925282429765774
          ],
          [
            0.6829637818866306,
            0.5115265236960517
          ],
          [
            0.6727861315011978,
            0.4925282429765774
          ],
          [
            0.6605729510386785,
            0.4762440023598848
          ],
          [
            0.6565018908845055,
            0.46538784194875654
          ]
        ],
        "dw_foot_1": [
          [
            0.02548756698767347,
            0.8100709350020796
          ],
          [
            0.02955862714184656,
            0.8534955766465928
          ],
          [
            0.09673111968570289,
            0.9132044589077987
          ]
        ],
        "dw_foot_2": [
          [
            0.02548756698767347,
            0.7720743735631308
          ],
          [
            0.013274386525154114,
            0.5820915663683857
          ],
          [
            0.07230475876066418,
            0.8453534563382468
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "bottle",
      "possible_names": [
        "bottle"
      ],
      "box": [
        0.3171875,
        0.425,
        0.696875,
        0.6479166666666667
      ]
    }
  ],
  "scene": "A baby is being fed from a bottle while lying on a yellow polka dot fabric in a cozy indoor setting with soft lighting and neutral colors around",
  "overall_past": "Before the current scene, the baby likely began showing signs of hunger—such as rooting, sucking on hands, or becoming fussy—prompting the adult to prepare a bottle. The baby may have been sitting upright or in a semi-reclined position earlier, possibly being held or placed in a safe, comfortable spot, before being gently laid down on the yellow polka dot fabric to feed. The adult would have warmed the bottle, checked the flow of the milk, and ensured the baby was in a relaxed, supported position to feed safely. This setup suggests a routine feeding moment in a calm, familiar environment, where the baby’s alertness and active grip on the bottle indicate readiness and comfort with the process.",
  "overall_past_clean": "The individual began noticing subtle cues of distress, responding with immediate attention by adjusting posture and positioning to create a secure, soothing environment, then initiated the feeding process by preparing the necessary supplies with deliberate care and precision.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the baby is likely to finish drinking from the bottle, show signs of satisfaction such as slowing down or stopping sucking, and possibly release the bottle with a contented expression. The adult may then gently burp the baby to help release any swallowed air, a common step after feeding. Following this, the baby might be placed on their side or tummy for a short period to aid digestion, or the adult may begin to engage in soothing interaction—such as soft talking, gentle rocking, or playing with a toy—to help transition the baby into a calm, relaxed state, possibly leading to sleep.",
  "overall_future_clean": "The infant settles into a peaceful stillness, breathing softly as the caregiver cradles them close, murmuring gently while swaying slightly, their body gradually growing heavier with drowsiness before drifting into quiet slumber.",
  "future_scene_ok": true
}