{
  "image_path": "./ref_datasets/hico_det/images/train/train_00023445.jpg",
  "image_id": "train_00023445",
  "width": 640,
  "height": 427,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 23445,
  "annotations": {
    "objects": "[{'id': 590, 'bbox_human': [25, 574, 6, 423], 'bbox_object': [289, 553, 138, 291], 'connection': 1, 'invis': 0}, {'id': 591, 'bbox_human': [10, 585, 4, 421], 'bbox_object': [303, 557, 117, 310], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('wine_glass', 'hold'), ('wine_glass', 'sip')]",
    "negative_captions": "[('wine_glass', 'fill'), ('wine_glass', 'toast'), ('wine_glass', 'lick'), ('wine_glass', 'wash'), ('wine_glass', 'no_interaction')]",
    "ambiguous_captions": "[]",
    "positive_objects": "[589, 590]",
    "negative_objects": "[588, 591, 592, 593, 594]",
    "ambiguous_objects": "[]",
    "size": "[640, 427, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "female",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a black sleeveless top and a pair of dangling earrings.",
        "clothing": [
          {
            "possible_names": [
              "top",
              "tank top",
              "sleeveless"
            ],
            "name": "top",
            "type": "top",
            "color": [
              "black"
            ]
          },
          {
            "possible_names": [
              "earrings",
              "jewelry"
            ],
            "name": "earrings",
            "type": "accessory",
            "color": [
              "silver"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "wine glass",
              "glass"
            ],
            "name": "wine glass",
            "position": "hand"
          },
          {
            "standalone": false,
            "possible_names": [
              "ring",
              "jewelry"
            ],
            "name": "ring",
            "position": "hand"
          }
        ],
        "description": "The person is an adult female in the foreground, characterized by a neutral emotion. She is wearing a black sleeveless top and silver earrings. In her hand, she holds a wine glass filled with red wine, and she is also wearing a ring on her finger.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears focused on tasting the wine, showing a neutral expression typical for concentration.",
        "meaningful": true,
        "story": "A woman is engaged in wine tasting, holding a glass of red wine up to her nose, likely evaluating its aroma and flavor profile. The setting suggests a casual yet sophisticated environment, possibly a wine cellar or tasting room.",
        "race": "asian",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is holding a wine glass close to their mouth, appearing to be tasting the wine, which suggests they might be at a wine tasting event or enjoying a moment of relaxation with a drink. Their focused expression indicates an appreciation for the nuances of the wine's flavor, possibly evaluating its quality or simply savoring the experience. The presence of another hand holding a similar glass in the background implies that they may be sharing this activity with someone else, enhancing the social aspect of the occasion. The setting, with its stone wall and framed picture, adds to the ambiance, suggesting a sophisticated or intimate environment conducive to such an activity.",
        "intention": "The individual is engaging in a deliberate act of sensory appreciation to evaluate or enjoy the wine while sharing a moment of refined leisure with another person",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.23021134191325734,
            0.3306100053426692
          ],
          [
            0.241272380735193,
            0.4154786208514268
          ],
          [
            0.26319019539015637,
            0.5014161195751815
          ],
          [
            0.2914994810308729,
            0.5651488541838875
          ],
          [
            0.3311105992112841,
            0.6098661116430615
          ],
          [
            0.3768614598682949,
            0.6413184437475782
          ],
          [
            0.4187313812119621,
            0.6504681983812473
          ],
          [
            0.4619974562100001,
            0.6510255835294803
          ],
          [
            0.5019257136753627,
            0.6346555963811685
          ],
          [
            0.4985005072184972,
            0.5751106106505102
          ],
          [
            0.4977115290505546,
            0.5409572495389997
          ],
          [
            0.5048635499817985,
            0.49724756670301284
          ],
          [
            0.5053585835865565,
            0.45195378201109143
          ],
          [
            0.5024511711938041,
            0.3911296467032868
          ],
          [
            0.48680834940501627,
            0.32098180643887614
          ],
          [
            0.47822857584272105,
            0.2708728708284441
          ],
          [
            0.48388826676777424,
            0.21871617702625792
          ],
          [
            0.3908557321344103,
            0.23755475165812856
          ],
          [
            0.4059650242328644,
            0.190907109338091
          ],
          [
            0.42465535913194935,
            0.17141359232867112
          ],
          [
            0.4495252864701408,
            0.1554800716679644
          ],
          [
            0.46694143159048895,
            0.14292348432237786
          ],
          [
            0.4747886555535453,
            0.15123624457049506
          ],
          [
            0.4790306585175651,
            0.14809145621130324
          ],
          [
            0.47869763033730645,
            0.14902429582283783
          ],
          [
            0.47952377625874104,
            0.15604061121604004
          ],
          [
            0.4853315251214164,
            0.1633397576481094
          ],
          [
            0.4779955557414464,
            0.21897113071950872
          ],
          [
            0.4991211192948478,
            0.2535057698017221
          ],
          [
            0.5198698452540806,
            0.29717021261261795
          ],
          [
            0.5336564251354762,
            0.34397775874005465
          ],
          [
            0.4863686459405082,
            0.40331239722651674
          ],
          [
            0.49743743283408026,
            0.39380740864135705
          ],
          [
            0.507743603842599,
            0.39266489580268643
          ],
          [
            0.512268260547093,
            0.3856267018157051
          ],
          [
            0.5110642296927315,
            0.3942276178374662
          ],
          [
            0.41333199228559214,
            0.25984212330409456
          ],
          [
            0.4303371974400111,
            0.24849070542310941
          ],
          [
            0.44642066785267415,
            0.23421149223534388
          ],
          [
            0.44362322602953236,
            0.25597992466619957
          ],
          [
            0.44349672794342043,
            0.26090957760292377
          ],
          [
            0.428387691293444,
            0.26626936126688566
          ],
          [
            0.4813509481293815,
            0.22855309205296132
          ],
          [
            0.4776555282729012,
            0.2156160625178266
          ],
          [
            0.4802311386380877,
            0.19517191184647867
          ],
          [
            0.4765636648450579,
            0.2200340286630099
          ],
          [
            0.4822651726858956,
            0.2213338570835679
          ],
          [
            0.48130604028701784,
            0.23157972522147247
          ],
          [
            0.4593477913311549,
            0.48986815831939606
          ],
          [
            0.48724442550114216,
            0.47276791412721075
          ],
          [
            0.5133600217955452,
            0.4415484144760239
          ],
          [
            0.5169779641287666,
            0.4604107549494428
          ],
          [
            0.5173019749777656,
            0.45034808464854326
          ],
          [
            0.5131373473576136,
            0.47295103108216857
          ],
          [
            0.5044500044413975,
            0.46401205506361454
          ],
          [
            0.5105377503803797,
            0.48312631615351254
          ],
          [
            0.5137715237481254,
            0.5028201240725882
          ],
          [
            0.5102405105318342,
            0.5242748420028712
          ],
          [
            0.49908857515880045,
            0.5202190926259239
          ],
          [
            0.47797529867717203,
            0.5149575536886798
          ],
          [
            0.46101426056453165,
            0.4908646251414284
          ],
          [
            0.49170902626855034,
            0.47197839750339055
          ],
          [
            0.5089307750974383,
            0.47834936852198295
          ],
          [
            0.5095211931637355,
            0.4785828480316907
          ],
          [
            0.5132574336869377,
            0.46919379038091247
          ],
          [
            0.5094070843287877,
            0.4812666222019393
          ],
          [
            0.5101921541350227,
            0.5047528275521393
          ],
          [
            0.490626563344683,
            0.49239103113699617
          ]
        ],
        "visibility": [
          0.9998100399971008,
          0.988052487373352,
          0.9998773336410522,
          0.2186419516801834,
          0.0768224373459816,
          0.8210323452949524,
          7.580750889246701e-07,
          7.111559258143885e-11,
          0.0001828255335567519,
          3.1088546847968246e-07,
          1.800375493132833e-08,
          1.793622175227938e-07,
          9.182054782286286e-06,
          8.233452114536632e-12,
          3.926651226393574e-10,
          4.5797265583796473e-14,
          1.1661716925459586e-08,
          1.6287473592058177e-09,
          1.032976086889903e-07,
          0.9700166583061218,
          1.1653328302330124e-09,
          0.005459895823150873,
          2.7091355150332674e-06,
          0.9998093247413635,
          1.2976357766092406e-07,
          0.00047298226854763925,
          0.27085065841674805,
          0.9958685636520386,
          0.9988777041435242
        ],
        "headpose": {
          "pitch": 8.547647067001627,
          "yaw": -70.0638277889915,
          "roll": -8.583393715796536
        },
        "attributes": {
          "5 oClock Shadow": 0.004893676843494177,
          "Arched Eyebrows": 0.0036773032043129206,
          "Attractive": 0.17362681031227112,
          "Bags Under Eyes": 0.16278482973575592,
          "Bald": 6.582988021364145e-08,
          "Bangs": 0.08115394413471222,
          "Big Lips": 0.09592712670564651,
          "Big Nose": 0.009426671080291271,
          "Black Hair": 0.24889259040355682,
          "Blond Hair": 0.001132151111960411,
          "Blurry": 0.003663052339106798,
          "Brown Hair": 0.3393319845199585,
          "Bushy Eyebrows": 0.007879919372498989,
          "Chubby": 0.0004164587880950421,
          "Double Chin": 5.348689228412695e-05,
          "Eyeglasses": 0.00017826400289777666,
          "Goatee": 0.00034769365447573364,
          "Gray Hair": 0.0001101570887840353,
          "Heavy Makeup": 0.01283095870167017,
          "High Cheekbones": 0.08340367674827576,
          "Male": 0.12928485870361328,
          "Mouth Slightly Open": 0.12480040639638901,
          "Mustache": 3.792637653532438e-05,
          "Narrow Eyes": 0.06530284136533737,
          "No Beard": 0.9928049445152283,
          "Oval Face": 0.03622807189822197,
          "Pale Skin": 0.0008036251529119909,
          "Pointy Nose": 0.3720121681690216,
          "Receding Hairline": 0.0006277339998632669,
          "Rosy Cheeks": 0.0003531975962687284,
          "Sideburns": 0.00026335378061048687,
          "Smiling": 0.009569499641656876,
          "Straight Hair": 0.1905928999185562,
          "Wavy Hair": 0.2942716181278229,
          "Wearing Earrings": 0.43667587637901306,
          "Wearing Hat": 0.0015148159582167864,
          "Wearing Lipstick": 0.03438205644488335,
          "Wearing Necklace": 0.020246464759111404,
          "Wearing Necktie": 0.0008015648345462978,
          "Young": 0.9586445093154907
        },
        "age": [
          0.007680521346628666,
          0.86297208070755,
          0.968353271484375,
          0.7697365283966064,
          0.4502778649330139,
          0.0015164035139605403,
          6.460304575739428e-05,
          1.0984695819615808e-07
        ],
        "race": [
          0.3535914719104767,
          0.008250560611486435,
          0.5080928206443787,
          0.9547299742698669,
          0.826178789138794
        ],
        "gender": [
          0.045705847442150116,
          0.9597354531288147
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 12.804219162967104,
          "disgust": 7.507780182149658e-07,
          "fear": 45.26785555694766,
          "happy": 0.03192607926813713,
          "sad": 4.335154364723487,
          "surprise": 0.06220226219099905,
          "neutral": 37.498642504053414
        },
        "dominant_emotion": "fear",
        "region": {
          "x": 0,
          "y": 0,
          "w": 239,
          "h": 337,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 35,
        "gender": {
          "Woman": 99.99898672103882,
          "Man": 0.0010132184797839727
        },
        "dominant_gender": "Woman",
        "race": {
          "asian": 0.2630023052915931,
          "indian": 3.6763474345207214,
          "black": 84.0968370437622,
          "white": 1.9274983555078506,
          "middle eastern": 1.243890356272459,
          "latino hispanic": 8.792426437139511
        },
        "dominant_race": "black"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "mouth",
                "hold"
              ],
              [
                "mouth",
                "sip"
              ]
            ],
            "negative_action": [
              "fill",
              "toast",
              "lick",
              "wash",
              "no interaction"
            ],
            "position": "mouth"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.0012943267356604338,
        0.005098508205264807,
        0.8471003770828247,
        0.9892495274543762
      ]
    ],
    "face_boxes": [
      [
        0.2687351703643799,
        0.07133352011442184,
        0.5187238454818726,
        0.6477320790290833
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.22737211354283823,
            0.8596818083870575
          ],
          [
            0.3122001104884678,
            0.9280380800060078
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.7573190622031689,
            0.9499120869240724
          ],
          [
            0.1425441165972087,
            0.7913255367681071
          ],
          [
            0.25747237052354544,
            0.9253038291412502
          ],
          [
            0.40706152642766646,
            0.9389750834650403
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.42712836441480456,
            0.29642613024690534
          ],
          [
            0.4763833303832346,
            0.28548912678787297
          ],
          [
            0.23375701653874578,
            0.40579616483722597
          ],
          [
            0.22828424254225368,
            0.40579616483722597
          ]
        ],
        "dw_hand_1": [
          [
            0.4034130104300047,
            0.9581148395183463
          ],
          [
            0.4143585584229893,
            0.9061640730879439
          ],
          [
            0.43442539641012756,
            0.8733530627108476
          ],
          [
            0.4709105563867425,
            0.8542133066575414
          ],
          [
            0.5238140383528339,
            0.8022625402271393
          ],
          [
            0.45266797639843476,
            0.8596818083870575
          ],
          [
            0.4928016523727113,
            0.8487448049280254
          ],
          [
            0.5292868123493261,
            0.8432763031985094
          ],
          [
            0.558474940330618,
            0.8405420523337516
          ],
          [
            0.4599650083937579,
            0.8924928187641539
          ],
          [
            0.5073957163633572,
            0.8924928187641539
          ],
          [
            0.5420566183411413,
            0.8979613204936698
          ],
          [
            0.5675962303247715,
            0.900695571358428
          ],
          [
            0.4709105563867425,
            0.9280380800060078
          ],
          [
            0.5110442323610187,
            0.9335065817355241
          ],
          [
            0.5365838443446492,
            0.9417093343297982
          ],
          [
            0.5602991983294487,
            0.9444435851945562
          ],
          [
            0.4763833303832346,
            0.9772545955716523
          ],
          [
            0.5037472003656959,
            0.9772545955716523
          ],
          [
            0.5201655223551725,
            0.9772545955716523
          ],
          [
            0.5402323603423106,
            0.9772545955716523
          ]
        ],
        "dw_hand_2": [
          [
            0.7518462882066765,
            0.9389750834650403
          ],
          [
            0.7190096442277232,
            0.8487448049280254
          ],
          [
            0.6843487422499394,
            0.7448432720672207
          ],
          [
            0.6934700322440929,
            0.660081495259722
          ],
          [
            0.7153611282300618,
            0.6026622270998034
          ],
          [
            0.6898215162464314,
            0.6518787426654479
          ],
          [
            0.7153611282300618,
            0.5643827149931915
          ],
          [
            0.7299551922207078,
            0.5780539693169813
          ],
          [
            0.7263066762230463,
            0.6190677322883518
          ],
          [
            0.7336037082183692,
            0.6546129935302061
          ],
          [
            0.7573190622031689,
            0.5725854675874654
          ],
          [
            0.7609675782008303,
            0.5999279762350456
          ],
          [
            0.7536705462055074,
            0.6382074883416581
          ],
          [
            0.7719131261938147,
            0.6682842478539962
          ],
          [
            0.7938042221797836,
            0.5944594745055296
          ],
          [
            0.7956284801786145,
            0.6190677322883518
          ],
          [
            0.7883314481832914,
            0.6491444918006899
          ],
          [
            0.8102225441692603,
            0.6874240039073022
          ],
          [
            0.8284651241575679,
            0.6300047357473837
          ],
          [
            0.8321136401552293,
            0.6409417392064157
          ],
          [
            0.8321136401552293,
            0.66281574612448
          ]
        ],
        "dw_face": [
          [
            0.24835108052939175,
            0.3429083949477915
          ],
          [
            0.25747237052354544,
            0.3975934122429521
          ],
          [
            0.26841791851652985,
            0.44954417867335444
          ],
          [
            0.27936346650951444,
            0.4987606942389988
          ],
          [
            0.2976060464978218,
            0.5452429589398852
          ],
          [
            0.32496991648028306,
            0.5807882201817393
          ],
          [
            0.3541580444615747,
            0.6108649796940776
          ],
          [
            0.3888189464393591,
            0.6300047357473837
          ],
          [
            0.42347984841714315,
            0.6436759900711739
          ],
          [
            0.4581407503949272,
            0.6354732374768999
          ],
          [
            0.480031846380896,
            0.5971937253702876
          ],
          [
            0.49097739437388055,
            0.5452429589398852
          ],
          [
            0.5000986843680343,
            0.49055794164472466
          ],
          [
            0.4982744263692034,
            0.4358729243495641
          ],
          [
            0.4928016523727113,
            0.38392215791916173
          ],
          [
            0.4855046203773881,
            0.32923714062400145
          ],
          [
            0.480031846380896,
            0.2772863741935991
          ],
          [
            0.38699468844052803,
            0.26088086900505075
          ],
          [
            0.4034130104300047,
            0.24447536381650273
          ],
          [
            0.42347984841714315,
            0.2335383603574706
          ],
          [
            0.4417224284054505,
            0.23080410949271266
          ],
          [
            0.4599650083937579,
            0.2335383603574706
          ],
          [
            0.4782075883820653,
            0.23900686208698654
          ],
          [
            0.480031846380896,
            0.23900686208698654
          ],
          [
            0.480031846380896,
            0.23900686208698654
          ],
          [
            0.4818561043797267,
            0.23900686208698654
          ],
          [
            0.4836803623785576,
            0.23900686208698654
          ],
          [
            0.47455907238440387,
            0.282754875923115
          ],
          [
            0.4873288783762192,
            0.3128316354354531
          ],
          [
            0.5037472003656959,
            0.34017414408303354
          ],
          [
            0.5183412643563414,
            0.3675166527306137
          ],
          [
            0.472734814385573,
            0.42220167002577424
          ],
          [
            0.4836803623785576,
            0.42220167002577424
          ],
          [
            0.49462591037154197,
            0.41673316829625806
          ],
          [
            0.5092199743621879,
            0.4085304157019839
          ],
          [
            0.5183412643563414,
            0.3948591613781941
          ],
          [
            0.401588752431174,
            0.3046288828411792
          ],
          [
            0.41618281642182,
            0.29095762851738916
          ],
          [
            0.430776880412466,
            0.28548912678787297
          ],
          [
            0.4417224284054505,
            0.2991603811116633
          ],
          [
            0.430776880412466,
            0.3128316354354531
          ],
          [
            0.41618281642182,
            0.3128316354354531
          ],
          [
            0.4763833303832346,
            0.28548912678787297
          ],
          [
            0.4763833303832346,
            0.28002062505835706
          ],
          [
            0.4782075883820653,
            0.28002062505835706
          ],
          [
            0.480031846380896,
            0.28548912678787297
          ],
          [
            0.480031846380896,
            0.28822337765263095
          ],
          [
            0.4782075883820653,
            0.28822337765263095
          ],
          [
            0.46361352439141934,
            0.5151661994275472
          ],
          [
            0.4818561043797267,
            0.49055794164472466
          ],
          [
            0.4982744263692034,
            0.4686839347266607
          ],
          [
            0.5037472003656959,
            0.4686839347266607
          ],
          [
            0.5055714583645263,
            0.4686839347266607
          ],
          [
            0.5037472003656959,
            0.4850894399152087
          ],
          [
            0.4964501683703727,
            0.5014949451037568
          ],
          [
            0.5000986843680343,
            0.5151661994275472
          ],
          [
            0.5019229423668647,
            0.526103202886579
          ],
          [
            0.4964501683703727,
            0.5370402063456111
          ],
          [
            0.4836803623785576,
            0.5343059554808528
          ],
          [
            0.472734814385573,
            0.526103202886579
          ],
          [
            0.4672620403890809,
            0.5124319485627888
          ],
          [
            0.4818561043797267,
            0.5014949451037568
          ],
          [
            0.4982744263692034,
            0.49329219250948286
          ],
          [
            0.4982744263692034,
            0.49602644337424084
          ],
          [
            0.4964501683703727,
            0.5014949451037568
          ],
          [
            0.49462591037154197,
            0.5042291959685147
          ],
          [
            0.4928016523727113,
            0.5069634468332727
          ],
          [
            0.480031846380896,
            0.5096976976980306
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "wine_glass",
      "possible_names": [
        "wine_glass"
      ],
      "box": [
        0.4515625,
        0.3231850117096019,
        0.8640625,
        0.6814988290398126
      ]
    },
    {
      "name": "wine_glass",
      "possible_names": [
        "wine_glass"
      ],
      "box": [
        0.4734375,
        0.27400468384074944,
        0.8703125,
        0.7259953161592506
      ]
    }
  ],
  "scene": "A person is sipping red wine from an elegant glass in a warmly lit setting with stone walls and framed artwork visible in the background suggesting a cozy and sophisticated atmosphere for wine tasting or social gathering",
  "overall_past": "Before the current scene, the individual likely arrived at a carefully curated wine tasting event or intimate gathering, possibly after a day of social or professional engagement. They may have been greeted by a host or companion, exchanged pleasantries, and been led into the warmly lit room with stone walls and framed artwork—setting the tone for a refined, relaxed atmosphere. As they settled in, they were probably handed a glass of red wine, perhaps after a brief introduction to the wine’s origin or characteristics, setting the stage for the moment of focused tasting now captured. The presence of a second glass in the background suggests the gathering was already underway, with conversation and shared appreciation beginning, indicating that the social and sensory experience had already started to unfold.",
  "overall_past_clean": "The individual entered through a softly ajar door, greeted by a warm exchange of words and a subtle shift in ambiance as the weight of the day dissolved into the hushed elegance of the space, where the air carried the faint, inviting scent of aged wood and fermented grapes, and the first notes of conversation and clinking glass had already begun to weave through the room.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to set down the wine glass gently, lean back slightly, and engage in conversation with the other person present—perhaps sharing their thoughts on the wine’s aroma, body, or finish, while the soft glow of the lighting and the quiet elegance of the room deepen the sense of connection and shared appreciation.",
  "overall_future_clean": "A quiet exchange unfolds, voices low and deliberate, each word carrying the weight of mutual understanding, as the atmosphere thickens with unspoken rapport and the moment stretches into something intimate and inevitable.",
  "future_scene_ok": true
}