{
  "image_path": "./ref_datasets/hico_det/images/train/train_00006760.jpg",
  "image_id": "train_00006760",
  "width": 640,
  "height": 480,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 6760,
  "annotations": {
    "objects": "[{'id': 309, 'bbox_human': [10, 633, 26, 455], 'bbox_object': [209, 384, 271, 377], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('donut', 'hold')]",
    "negative_captions": "[('donut', 'carry'), ('donut', 'make'), ('donut', 'smell'), ('donut', 'no_interaction')]",
    "ambiguous_captions": "[('donut', 'buy'), ('donut', 'eat'), ('donut', 'pick_up')]",
    "positive_objects": "[308]",
    "negative_objects": "[306, 309, 311, 312]",
    "ambiguous_objects": "[305, 307, 310]",
    "size": "[640, 480, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "child",
        "gender": "female",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a grey T-shirt with a dark pattern or design on the front and a leopard-print beanie.",
        "clothing": [
          {
            "possible_names": [
              "shirt",
              "t-shirt"
            ],
            "name": "t shirt",
            "type": "top",
            "color": [
              "grey",
              "black"
            ]
          },
          {
            "possible_names": [
              "beanie",
              "hat"
            ],
            "name": "beanie",
            "type": "headwear",
            "color": [
              "white",
              "black"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "donut",
              "pastry",
              "snack"
            ],
            "name": "donut",
            "position": "hand"
          },
          {
            "standalone": true,
            "possible_names": [
              "bowl"
            ],
            "name": "bowl",
            "position": "standalone"
          }
        ],
        "description": "The person is a child in the foreground, appearing neutral. She is wearing a grey T-shirt with a dark pattern and a leopard-print beanie. She is holding a donut in her hands, and there is a bowl on the table in front of her, which she is not touching. The setting appears to be an indoor café with bookshelves and a window showing outside traffic.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The child appears to be focused on eating the food, showing a neutral expression without any strong emotions.",
        "meaningful": true,
        "story": "A young girl is enjoying a meal at a restaurant. She is wearing a knitted hat and seems to be engaged in eating her food, possibly a burger or sandwich. The setting suggests a casual dining experience during the day.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "A young girl wearing a patterned knit hat is seated at a table holding a donut with both hands as if she is about to take a bite or examine it closely her expression appears neutral or slightly curious suggesting she might be enjoying the moment or contemplating the taste of the donut the setting seems casual likely a café or diner with books on shelves in the background indicating a relaxed environment for eating and possibly reading.",
        "intention": "The individual is savoring a moment of simple pleasure by carefully holding a donut as if appreciating its texture or flavor in a relaxed setting conducive to quiet enjoyment",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.39704358790602,
            0.3484436614172799
          ],
          [
            0.3993315760578428,
            0.3947937942686535
          ],
          [
            0.40340517727392056,
            0.4394746954951968
          ],
          [
            0.4088313340076379,
            0.4861045266900744
          ],
          [
            0.41680412739515305,
            0.529340033020292
          ],
          [
            0.43309017856206217,
            0.5631384565716698
          ],
          [
            0.45397714419024326,
            0.5922232525689262
          ],
          [
            0.47429073708398006,
            0.6156778931617737
          ],
          [
            0.500036457819598,
            0.6246500951903207
          ],
          [
            0.5297845655253955,
            0.6261571702503023
          ],
          [
            0.5570844837597437,
            0.609669532094683
          ],
          [
            0.5893707807574954,
            0.5876500861985343
          ],
          [
            0.612041134919439,
            0.5549770551068443
          ],
          [
            0.6292163367782321,
            0.5111149194694701
          ],
          [
            0.6391809689147132,
            0.46675586558523635
          ],
          [
            0.6479056243385587,
            0.4196212575549171
          ],
          [
            0.6559731300388064,
            0.3667074470292954
          ],
          [
            0.40605385548302103,
            0.31798214004153297
          ],
          [
            0.4262339472770691,
            0.2955320860658373
          ],
          [
            0.44839320970433094,
            0.29806036963349297
          ],
          [
            0.47226620359080174,
            0.3028255569792929
          ],
          [
            0.4904284413371768,
            0.3121117122826122
          ],
          [
            0.5340355262160301,
            0.31639706449849264
          ],
          [
            0.5573847953762326,
            0.310144590479987
          ],
          [
            0.5821696243115835,
            0.3126766348168964
          ],
          [
            0.6063387032066073,
            0.3194464274815151
          ],
          [
            0.6235112590449197,
            0.3376857680933816
          ],
          [
            0.5089139150721687,
            0.36147117898577735
          ],
          [
            0.5070119883332934,
            0.3919106679303306
          ],
          [
            0.5045113116502762,
            0.41997129292715163
          ],
          [
            0.501034007540771,
            0.450537512699763
          ],
          [
            0.48204170380319866,
            0.4775973828065963
          ],
          [
            0.4928046262689999,
            0.48177414281027653
          ],
          [
            0.5016536797795977,
            0.48761823631468276
          ],
          [
            0.5150374386991773,
            0.4838864178884597
          ],
          [
            0.524997598358563,
            0.4814119381564004
          ],
          [
            0.4330956696399621,
            0.3528694659471512
          ],
          [
            0.4476722627878189,
            0.34075432519117993
          ],
          [
            0.4699120532189097,
            0.3426845222711563
          ],
          [
            0.4825067009244647,
            0.3658596951337088
          ],
          [
            0.46591171515839436,
            0.36855008204778034
          ],
          [
            0.4450166592640536,
            0.3627892753907612
          ],
          [
            0.5434448570013046,
            0.37101292468252633
          ],
          [
            0.558274467076574,
            0.3520563592513402
          ],
          [
            0.5771549237625939,
            0.35187025439171565
          ],
          [
            0.5926923560244697,
            0.36874910124710625
          ],
          [
            0.5779140272310801,
            0.3768332650264104
          ],
          [
            0.5596337020397186,
            0.37579131906940827
          ],
          [
            0.46346116065979004,
            0.5293908034052167
          ],
          [
            0.47771790730101726,
            0.5248609923181079
          ],
          [
            0.49361890980175566,
            0.5178193421590895
          ],
          [
            0.5023808787975993,
            0.5222250478608268
          ],
          [
            0.5115145082984652,
            0.5191733964851925
          ],
          [
            0.5287823911224093,
            0.5293210872582027
          ],
          [
            0.5486154854297638,
            0.5365288853645325
          ],
          [
            0.5300781215940203,
            0.5460674734342665
          ],
          [
            0.5130782361541476,
            0.5521213185219537
          ],
          [
            0.5013980226857322,
            0.5513961641561418
          ],
          [
            0.4886523644839015,
            0.5487941021010989
          ],
          [
            0.47440980800560545,
            0.5423995171274458
          ],
          [
            0.4694015234708786,
            0.5303127595356533
          ],
          [
            0.48901484587362837,
            0.5303583854720706
          ],
          [
            0.50266454155956,
            0.5346013137272426
          ],
          [
            0.5144137974296298,
            0.5353453074182783
          ],
          [
            0.542426911847932,
            0.5383598123277937
          ],
          [
            0.5145272250686374,
            0.5339763874099368
          ],
          [
            0.5022408972893443,
            0.5344927481242588
          ],
          [
            0.48766454415661953,
            0.5303997894128164
          ]
        ],
        "visibility": [
          1.0,
          0.9999996423721313,
          0.9839333295822144,
          2.2046407366360654e-07,
          1.0,
          1.0,
          0.10399958491325378,
          0.646479070186615,
          4.174951300228713e-07,
          1.7935077778474806e-07,
          1.1863329329314665e-13,
          1.385638972268774e-13,
          9.253548341803253e-06,
          5.554445683144166e-17,
          2.8023900995322037e-06,
          5.3100452449980235e-17,
          1.5714986388541273e-16,
          4.299591553017651e-13,
          4.158494903094834e-06,
          4.384364274301333e-06,
          8.299643855069983e-19,
          4.856815036952122e-12,
          0.22458171844482422,
          0.994882345199585,
          6.73352915327996e-05,
          0.0043722535483539104,
          0.996299684047699,
          0.0037236574571579695,
          0.29606175422668457
        ],
        "headpose": {
          "pitch": -8.897454351570184,
          "yaw": 7.033447915749514,
          "roll": 1.9209168526456855
        },
        "attributes": {
          "5 oClock Shadow": 0.0007517041522078216,
          "Arched Eyebrows": 0.003966555465012789,
          "Attractive": 0.3471952974796295,
          "Bags Under Eyes": 0.158234104514122,
          "Bald": 6.3625102484365925e-06,
          "Bangs": 0.0065088095143437386,
          "Big Lips": 0.07410047948360443,
          "Big Nose": 0.118257075548172,
          "Black Hair": 0.002003144472837448,
          "Blond Hair": 0.08935925364494324,
          "Blurry": 0.0003239867219235748,
          "Brown Hair": 0.09862728416919708,
          "Bushy Eyebrows": 0.001588144339621067,
          "Chubby": 0.010607986710965633,
          "Double Chin": 0.0005607094499282539,
          "Eyeglasses": 0.00028638323419727385,
          "Goatee": 4.19151401729323e-05,
          "Gray Hair": 0.00027670265990309417,
          "Heavy Makeup": 0.00920924823731184,
          "High Cheekbones": 0.01764732412993908,
          "Male": 0.07325441390275955,
          "Mouth Slightly Open": 0.005196898709982634,
          "Mustache": 3.958977231377503e-06,
          "Narrow Eyes": 0.015276520512998104,
          "No Beard": 0.9989134073257446,
          "Oval Face": 0.19070051610469818,
          "Pale Skin": 0.16260312497615814,
          "Pointy Nose": 0.02077772282063961,
          "Receding Hairline": 0.0008250314276665449,
          "Rosy Cheeks": 0.000710931490175426,
          "Sideburns": 6.529586244141683e-05,
          "Smiling": 0.009527037851512432,
          "Straight Hair": 0.08425244688987732,
          "Wavy Hair": 0.04599376767873764,
          "Wearing Earrings": 0.04085828736424446,
          "Wearing Hat": 0.9267675876617432,
          "Wearing Lipstick": 0.02148093841969967,
          "Wearing Necklace": 0.02308095432817936,
          "Wearing Necktie": 0.0005184835754334927,
          "Young": 0.9962843060493469
        },
        "age": [
          0.9634546637535095,
          0.9994713664054871,
          0.6658589243888855,
          0.010272964835166931,
          0.000510807556565851,
          8.883383998181671e-06,
          2.1130153982085176e-06,
          5.650448429150856e-07
        ],
        "race": [
          0.9910104274749756,
          0.009542493149638176,
          0.6921985149383545,
          0.0652097687125206,
          0.15178844332695007
        ],
        "gender": [
          0.005438349209725857,
          0.9955987930297852
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 0.09947015227021602,
          "disgust": 1.790915078789859e-05,
          "fear": 5.766876729354468,
          "happy": 0.05396722760778399,
          "sad": 21.352679592690418,
          "surprise": 6.071876881244904e-06,
          "neutral": 72.72698554122276
        },
        "dominant_emotion": "neutral",
        "region": {
          "x": 0,
          "y": 0,
          "w": 219,
          "h": 249,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 34,
        "gender": {
          "Woman": 52.49883532524109,
          "Man": 47.50116467475891
        },
        "dominant_gender": "Woman",
        "race": {
          "asian": 34.426748752593994,
          "indian": 4.759642481803894,
          "black": 9.319886565208435,
          "white": 25.968503952026367,
          "middle eastern": 9.418263286352158,
          "latino hispanic": 16.10695719718933
        },
        "dominant_race": "asian"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "hand",
                "hold"
              ]
            ],
            "negative_action": [
              "carry",
              "make",
              "smell",
              "no interaction"
            ],
            "position": "hand"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.018166732043027878,
        0.03079102747142315,
        1.0,
        0.9475471377372742
      ]
    ],
    "face_boxes": [
      [
        0.4096541404724121,
        0.2543196380138397,
        0.6385661363601685,
        0.6017544865608215
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.5146977328281436,
            0.5373355653136969
          ],
          [
            0.31819474020351957,
            0.5247257476051649
          ],
          [
            0.05759184089385805,
            0.7825264652018193
          ],
          [
            0.2047063808267314,
            0.799339555479862
          ],
          [
            0.7112007254527676,
            0.5499453830222288
          ],
          [
            0.9676003521929184,
            0.8778006434440613
          ],
          [
            0.7490301785783636,
            0.8049439189058764
          ],
          [
            0.3602274658986264,
            0.9422508228432247
          ],
          [
            0.17318183655540142,
            0.9086246422871395
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.6292369103473094,
            0.9646682765472818
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.4590043712821272,
            0.35379266311173085
          ],
          [
            0.5682894580894047,
            0.36500138996375925
          ],
          [
            0.40015855530897787,
            0.36780357167676647
          ],
          [
            0.652354909479618,
            0.3958253888068377
          ]
        ],
        "dw_hand_1": [
          [
            0.7490301785783636,
            0.8021417371928692
          ],
          [
            0.7196072705917889,
            0.7432959212197197
          ],
          [
            0.6880827263204587,
            0.6872522869595774
          ],
          [
            0.6502532731948627,
            0.6676370149685276
          ],
          [
            0.614525456354022,
            0.650823924690485
          ],
          [
            0.6943876351747248,
            0.7292850126546843
          ],
          [
            0.6502532731948627,
            0.7124719223766415
          ],
          [
            0.60191563864549,
            0.6872522869595774
          ],
          [
            0.5703910943741599,
            0.6648348332555204
          ],
          [
            0.6880827263204587,
            0.7825264652018193
          ],
          [
            0.6355418192015755,
            0.7601090114977626
          ],
          [
            0.5851025483674472,
            0.7236806492286699
          ],
          [
            0.5556796403808726,
            0.6928566503855916
          ],
          [
            0.6796761811814374,
            0.821757009183919
          ],
          [
            0.627135274062554,
            0.8021417371928692
          ],
          [
            0.5872041846522027,
            0.7685155566367837
          ],
          [
            0.5577812766656279,
            0.7376915577937055
          ],
          [
            0.6670663634729055,
            0.84977882631399
          ],
          [
            0.627135274062554,
            0.8357679177489546
          ],
          [
            0.5998140023607347,
            0.8105482823318906
          ],
          [
            0.576696003228426,
            0.7853286469148266
          ]
        ],
        "dw_hand_2": [
          [
            0.20260474454197616,
            0.8077461006188834
          ],
          [
            0.22362110738952942,
            0.7573068297847553
          ],
          [
            0.24463747023708288,
            0.7068675589506273
          ],
          [
            0.28667019593218973,
            0.6872522869595774
          ],
          [
            0.326601285342541,
            0.6788457418205562
          ],
          [
            0.2404341976675722,
            0.7292850126546843
          ],
          [
            0.27616201450841305,
            0.7124719223766415
          ],
          [
            0.31819474020351957,
            0.6956588320985989
          ],
          [
            0.3560241933291156,
            0.6844501052465705
          ],
          [
            0.25514565166085956,
            0.7797242834888124
          ],
          [
            0.30138164992547695,
            0.7657133749237766
          ],
          [
            0.3476176481900943,
            0.7432959212197197
          ],
          [
            0.3833454650309351,
            0.7236806492286699
          ],
          [
            0.2656538330846363,
            0.821757009183919
          ],
          [
            0.31188983134925363,
            0.8077461006188834
          ],
          [
            0.3539225570443604,
            0.7825264652018193
          ],
          [
            0.3854471013156903,
            0.7629111932107695
          ],
          [
            0.2782636507931683,
            0.8525810080269972
          ],
          [
            0.3139914676340091,
            0.8413722811749687
          ],
          [
            0.34551601190533904,
            0.821757009183919
          ],
          [
            0.37073564732240316,
            0.799339555479862
          ]
        ],
        "dw_face": [
          [
            0.40646346416324375,
            0.33697957283368823
          ],
          [
            0.40646346416324375,
            0.37901229852879487
          ],
          [
            0.4085651004479992,
            0.4210450242239017
          ],
          [
            0.4127683730175097,
            0.4602755682060012
          ],
          [
            0.42117491815653113,
            0.502308293901108
          ],
          [
            0.4358863721498185,
            0.5387366561702003
          ],
          [
            0.45480109871261654,
            0.5695606550132787
          ],
          [
            0.48002073412968044,
            0.5975824721433497
          ],
          [
            0.5094436421162551,
            0.6059890172823711
          ],
          [
            0.5409681863875854,
            0.5975824721433497
          ],
          [
            0.5682894580894047,
            0.5779672001523001
          ],
          [
            0.5935090935064686,
            0.5527475647352361
          ],
          [
            0.6124238200692667,
            0.5219235658921576
          ],
          [
            0.627135274062554,
            0.4854952036230654
          ],
          [
            0.6355418192015755,
            0.4462646596409658
          ],
          [
            0.6418467280558413,
            0.4042319339458589
          ],
          [
            0.646050000625352,
            0.36219920825075225
          ],
          [
            0.42117491815653113,
            0.3173643008426384
          ],
          [
            0.4358863721498185,
            0.308957755703617
          ],
          [
            0.45480109871261654,
            0.31175993741662417
          ],
          [
            0.47161418899065916,
            0.31456211912963117
          ],
          [
            0.4884272792687018,
            0.3229686642686526
          ],
          [
            0.5388665501028299,
            0.3285730276946668
          ],
          [
            0.5577812766656279,
            0.3229686642686526
          ],
          [
            0.5787976395131814,
            0.3229686642686526
          ],
          [
            0.5977123660759794,
            0.32577084598165956
          ],
          [
            0.614525456354022,
            0.33697957283368823
          ],
          [
            0.5115452784010106,
            0.36500138996375925
          ],
          [
            0.5094436421162551,
            0.3930232070938305
          ],
          [
            0.5073420058314999,
            0.4238472059369087
          ],
          [
            0.5052403695467446,
            0.45186902306697974
          ],
          [
            0.48002073412968044,
            0.4714842950580298
          ],
          [
            0.4926305518382126,
            0.477088658484044
          ],
          [
            0.5052403695467446,
            0.47989084019705097
          ],
          [
            0.5199518235400319,
            0.477088658484044
          ],
          [
            0.5325616412485639,
            0.477088658484044
          ],
          [
            0.4358863721498185,
            0.35099048139872363
          ],
          [
            0.45269946242786113,
            0.3397817545466952
          ],
          [
            0.47161418899065916,
            0.3453861179727096
          ],
          [
            0.4842240066991913,
            0.36780357167676647
          ],
          [
            0.4674109164211485,
            0.3706057533897737
          ],
          [
            0.45059782614310573,
            0.36780357167676647
          ],
          [
            0.5430698226723406,
            0.37340793510278064
          ],
          [
            0.5577812766656279,
            0.3565948448247381
          ],
          [
            0.5787976395131814,
            0.35379266311173085
          ],
          [
            0.595610729791224,
            0.36780357167676647
          ],
          [
            0.5787976395131814,
            0.3818144802418021
          ],
          [
            0.5598829129503834,
            0.3818144802418021
          ],
          [
            0.4632076438516378,
            0.5163192024661434
          ],
          [
            0.48002073412968044,
            0.5135170207531362
          ],
          [
            0.494732188122968,
            0.5107148390401295
          ],
          [
            0.5031387332619893,
            0.5135170207531362
          ],
          [
            0.5136469146857658,
            0.5135170207531362
          ],
          [
            0.5325616412485639,
            0.5191213841791507
          ],
          [
            0.5514763678113619,
            0.527527929318172
          ],
          [
            0.5367649138180746,
            0.5387366561702003
          ],
          [
            0.5199518235400319,
            0.5471432013092217
          ],
          [
            0.5031387332619893,
            0.5471432013092217
          ],
          [
            0.4884272792687018,
            0.5415388378832074
          ],
          [
            0.47581746156016996,
            0.530330111031179
          ],
          [
            0.46951255270590375,
            0.5191213841791507
          ],
          [
            0.48632564298394654,
            0.5219235658921576
          ],
          [
            0.5031387332619893,
            0.5247257476051649
          ],
          [
            0.5241550961095426,
            0.527527929318172
          ],
          [
            0.545171458957096,
            0.527527929318172
          ],
          [
            0.5241550961095426,
            0.530330111031179
          ],
          [
            0.5031387332619893,
            0.527527929318172
          ],
          [
            0.48632564298394654,
            0.5247257476051649
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "donut",
      "possible_names": [
        "donut"
      ],
      "box": [
        0.3265625,
        0.5645833333333333,
        0.6,
        0.7854166666666667
      ]
    }
  ],
  "scene": "A sepia-toned image captures a cozy indoor setting with books on shelves and a view of cars outside through a window while a plate and glass sit on the table in front of the subject.",
  "overall_past": "Before the current scene, the young girl likely entered the cozy café or diner, perhaps drawn by the inviting atmosphere and the scent of freshly baked donuts. She may have browsed the bookshelves briefly, selecting a seat at the table with a view of the street, and then ordered the donut, possibly while chatting with a staff member or reading a book nearby. The act of holding the donut with both hands suggests she had just received it or was carefully considering it before taking the first bite, indicating a moment of anticipation and quiet enjoyment in a familiar, comforting space.",
  "overall_past_clean": "The figure stepped through the door, the bell above it chiming softly as the warmth of the interior enveloped them, drawn by the rich aroma of sweet pastries and the gentle hum of quiet conversation, paused briefly to scan the room before settling into a seat near the window, their gaze lingering on the passing world outside as they reached for the small paper bag placed before them.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the young girl is likely to take a bite of the donut, her neutral expression shifting slightly as she tastes it—perhaps a small smile forming if the flavor meets her expectations, or a momentary pause as she savor the sweetness. The quiet ambiance, enhanced by the books and distant view of cars, suggests a leisurely moment, so she may then set the donut down, reach for her drink, and continue sitting in the cozy space, perhaps picking up a book or gazing out the window, lost in thought or enjoying the stillness.",
  "overall_future_clean": "A soft breath escapes, followed by a deliberate pause, then the lips part slightly as the flavor lingers—eyes close for a heartbeat before opening again, gaze drifting toward the window, fingers curling around the edge of the cup, lifting it slowly to the mouth, the motion steady, the silence deepening as the moment stretches into stillness.",
  "future_scene_ok": true
}