{
  "image_path": "./ref_datasets/hico_det/images/train/train_00032497.jpg",
  "image_id": "train_00032497",
  "width": 426,
  "height": 640,
  "split": "train",
  "source": "zhimeng/hico_det",
  "dataset_index": 32497,
  "annotations": {
    "objects": "[{'id': 297, 'bbox_human': [1, 421, 91, 632], 'bbox_object': [32, 209, 291, 472], 'connection': 1, 'invis': 0}, {'id': 298, 'bbox_human': [1, 425, 82, 639], 'bbox_object': [28, 210, 292, 475], 'connection': 1, 'invis': 0}, {'id': 301, 'bbox_human': [3, 423, 76, 635], 'bbox_object': [32, 215, 279, 509], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('cup', 'drink_with'), ('cup', 'hold'), ('cup', 'sip')]",
    "negative_captions": "[('cup', 'carry'), ('cup', 'inspect'), ('cup', 'smell'), ('cup', 'fill'), ('cup', 'wash'), ('cup', 'no_interaction')]",
    "ambiguous_captions": "[('cup', 'pour')]",
    "positive_objects": "[296, 297, 300]",
    "negative_objects": "[295, 298, 301, 302, 303, 304]",
    "ambiguous_objects": "[299]",
    "size": "[426, 640, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "male",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a black shirt.",
        "clothing": [
          {
            "possible_names": [
              "shirt",
              "polo shirt"
            ],
            "name": "shirt",
            "type": "top",
            "color": [
              "black"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "mug",
              "cup"
            ],
            "name": "mug",
            "position": "hand"
          }
        ],
        "description": "The person is in the foreground and appears to be an adult male with a neutral emotion. He is wearing a black shirt and is holding a white mug with his hand. The background is blurred, indicating he is the main subject of the photo.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to be engaged in a simple activity (drinking from a cup) without showing strong emotions. The facial expression is calm and focused on the action.",
        "meaningful": true,
        "story": "The individual seems to be enjoying a quiet moment, possibly during a break or a casual setting. The act of drinking suggests a relaxed atmosphere, perhaps at a café or a similar environment.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is holding a white mug close to their mouth, seemingly in the act of drinking from it. Their eyes are wide open, suggesting they might be engaged in conversation or reacting to something happening around them. The individual appears relaxed yet attentive, possibly enjoying a casual moment while sipping their beverage. The background's blurred lights indicate an indoor setting, perhaps a café or a social gathering where this person is taking a break and hydrating.",
        "intention": "The individual is taking a moment to hydrate while remaining engaged and present in a social or relaxed indoor environment",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.2212146130446057,
            0.46962515238140307
          ],
          [
            0.23629260738770644,
            0.5081790645739861
          ],
          [
            0.2556302336380035,
            0.5521170523549829
          ],
          [
            0.2655619297708784,
            0.5907684612487044
          ],
          [
            0.2748390804354094,
            0.6302855685353279
          ],
          [
            0.29346696823575524,
            0.6659738597060952
          ],
          [
            0.32558275196553876,
            0.7005523954119001
          ],
          [
            0.3532966858264506,
            0.7402823221470628
          ],
          [
            0.3992372416234672,
            0.7406930359346526
          ],
          [
            0.4587539695877904,
            0.7422383102987494
          ],
          [
            0.5283423218448877,
            0.7081213598804814
          ],
          [
            0.60436774483469,
            0.6776844789939267
          ],
          [
            0.6726187527539506,
            0.6310812660626003
          ],
          [
            0.7195806101933971,
            0.5770915954240732
          ],
          [
            0.7373155879142859,
            0.5208888884101596
          ],
          [
            0.726838656194573,
            0.45865078243826113
          ],
          [
            0.7268803481045063,
            0.38672651164233685
          ],
          [
            0.21073666780607403,
            0.434523144736886
          ],
          [
            0.21876953017543738,
            0.4116473663598299
          ],
          [
            0.24886625929217782,
            0.4162910724324839
          ],
          [
            0.2810548912511425,
            0.41518791764974594
          ],
          [
            0.3162027729349117,
            0.4225646425038576
          ],
          [
            0.37914471902917646,
            0.40958906763366293
          ],
          [
            0.4269158399640754,
            0.39980160125664305
          ],
          [
            0.4809254570186418,
            0.39322414754756857
          ],
          [
            0.5362983194475283,
            0.40082357185227535
          ],
          [
            0.5834171847158595,
            0.409170786184924
          ],
          [
            0.3475123962885897,
            0.4564579475138869
          ],
          [
            0.3442912082556987,
            0.4860616709504809
          ],
          [
            0.3411103857994719,
            0.5110583704497133
          ],
          [
            0.3309840815407889,
            0.5427465010434389
          ],
          [
            0.3225749534380508,
            0.5701138755040509
          ],
          [
            0.3369736524335016,
            0.5724672919937543
          ],
          [
            0.356770878186728,
            0.5772642221833978
          ],
          [
            0.3858078169710759,
            0.569760465728385
          ],
          [
            0.4220051786229564,
            0.5640936328896455
          ],
          [
            0.2540640092231858,
            0.4710214489272662
          ],
          [
            0.26377882484139253,
            0.46041562408208847
          ],
          [
            0.299227634026331,
            0.45896797254681587
          ],
          [
            0.322666820785809,
            0.46052955942494533
          ],
          [
            0.29855916205866073,
            0.4682594628206321
          ],
          [
            0.2731673875005352,
            0.47388969760920324
          ],
          [
            0.4390453829371953,
            0.4537000395357609
          ],
          [
            0.4670905998135317,
            0.43844569474458694
          ],
          [
            0.4901325372463101,
            0.43378372117877007
          ],
          [
            0.5293401248658127,
            0.4413938242942095
          ],
          [
            0.49976266773653705,
            0.44929709019405506
          ],
          [
            0.46995527947372756,
            0.4568113152469907
          ],
          [
            0.32079824243592864,
            0.6315373437745231
          ],
          [
            0.33422326266085994,
            0.6237193612115723
          ],
          [
            0.3543592789763176,
            0.6176907521273408
          ],
          [
            0.3747528792227138,
            0.6184295660683087
          ],
          [
            0.38994760340608103,
            0.616220986204488
          ],
          [
            0.43635139986622823,
            0.6168076901563576
          ],
          [
            0.48167085631592493,
            0.6220765060612133
          ],
          [
            0.4351776199481537,
            0.6338011255221707
          ],
          [
            0.3967099511167653,
            0.6460039791251931
          ],
          [
            0.3726545449153918,
            0.6482782294707639
          ],
          [
            0.3550467950877209,
            0.6498137499604907
          ],
          [
            0.3274572612773964,
            0.6414619109460286
          ],
          [
            0.3300941119171644,
            0.634026473654168
          ],
          [
            0.35162242781308417,
            0.6312148352818829
          ],
          [
            0.3759413739325295,
            0.6316311827727726
          ],
          [
            0.40047043126193893,
            0.6217957037900176
          ],
          [
            0.4669397731981847,
            0.6189256262566362
          ],
          [
            0.3917142793206382,
            0.6180039692137923
          ],
          [
            0.37166667688140764,
            0.6298021211155823
          ],
          [
            0.3472698024261086,
            0.6260094195604324
          ]
        ],
        "visibility": [
          0.9999791383743286,
          0.9999250173568726,
          0.0011602964950725436,
          0.1839105784893036,
          1.0,
          0.9971606731414795,
          0.3529113829135895,
          4.5893619244452566e-05,
          8.71927259140648e-05,
          1.465604600525694e-05,
          1.270605298486771e-05,
          1.582014108201992e-14,
          4.466729478735942e-06,
          0.00012313926708884537,
          2.2270562567427987e-06,
          3.143287230500391e-08,
          2.0103881226418707e-08,
          8.923236004206408e-17,
          1.0,
          0.9990653395652771,
          0.9997493624687195,
          1.0,
          1.0,
          0.7054795622825623,
          0.9999991655349731,
          0.9998784065246582,
          0.9855636358261108,
          0.9994543194770813,
          0.8061878681182861
        ],
        "headpose": {
          "pitch": -6.626290094041299,
          "yaw": 30.613915312899728,
          "roll": -7.985952800227289
        },
        "attributes": {
          "5 oClock Shadow": 0.021092219278216362,
          "Arched Eyebrows": 0.002190827624872327,
          "Attractive": 0.18946698307991028,
          "Bags Under Eyes": 0.10174452513456345,
          "Bald": 1.8267634516178077e-07,
          "Bangs": 0.3915143311023712,
          "Big Lips": 0.04849228262901306,
          "Big Nose": 0.11582864820957184,
          "Black Hair": 0.0002715654845815152,
          "Blond Hair": 0.3071559965610504,
          "Blurry": 0.0015247819246724248,
          "Brown Hair": 0.03835098817944527,
          "Bushy Eyebrows": 0.0012693378375843167,
          "Chubby": 0.00340153556317091,
          "Double Chin": 0.0006787145393900573,
          "Eyeglasses": 0.001127855503000319,
          "Goatee": 0.0005792524898424745,
          "Gray Hair": 0.048228584229946136,
          "Heavy Makeup": 0.003024713136255741,
          "High Cheekbones": 0.010789461433887482,
          "Male": 0.931547224521637,
          "Mouth Slightly Open": 0.0024783138651400805,
          "Mustache": 0.00012762524420395494,
          "Narrow Eyes": 0.015568177215754986,
          "No Beard": 0.976889431476593,
          "Oval Face": 0.08105236291885376,
          "Pale Skin": 0.4750610888004303,
          "Pointy Nose": 0.08622067421674728,
          "Receding Hairline": 0.00013617351942230016,
          "Rosy Cheeks": 0.00015581170737277716,
          "Sideburns": 0.0008927405579015613,
          "Smiling": 0.001883589313365519,
          "Straight Hair": 0.011563047766685486,
          "Wavy Hair": 0.5933427810668945,
          "Wearing Earrings": 0.005722484551370144,
          "Wearing Hat": 0.0011312011629343033,
          "Wearing Lipstick": 0.005622416734695435,
          "Wearing Necklace": 0.009144416078925133,
          "Wearing Necktie": 0.001204066677019,
          "Young": 0.757443368434906
        },
        "age": [
          0.24606919288635254,
          0.862873911857605,
          0.4467570185661316,
          0.2840823233127594,
          0.6267080903053284,
          0.014396395534276962,
          0.0007387487567029893,
          2.883178422052879e-05
        ],
        "race": [
          0.9994537234306335,
          4.662768333218992e-05,
          0.9477177262306213,
          0.06856022030115128,
          0.04502037167549133
        ],
        "gender": [
          0.812998354434967,
          0.2140396535396576
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 99.81921315193176,
          "disgust": 6.688893142821372e-17,
          "fear": 0.003638559064711444,
          "happy": 1.5608984450357134e-13,
          "sad": 8.173709487024894e-08,
          "surprise": 0.1771501381881535,
          "neutral": 1.4398816069582238e-11
        },
        "dominant_emotion": "angry",
        "region": {
          "x": 0,
          "y": 0,
          "w": 312,
          "h": 394,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 36,
        "gender": {
          "Woman": 8.619976788759232,
          "Man": 91.38001799583435
        },
        "dominant_gender": "Man",
        "race": {
          "asian": 0.0677926448068854,
          "indian": 0.0008236546400248976,
          "black": 7.624541239858386e-05,
          "white": 99.44126602657235,
          "middle eastern": 0.15647014138669618,
          "latino hispanic": 0.33356933514442055
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "mouth",
                "drink with"
              ],
              [
                "mouth",
                "hold"
              ],
              [
                "mouth",
                "sip"
              ]
            ],
            "negative_action": [
              "carry",
              "inspect",
              "smell",
              "fill",
              "wash",
              "no interaction"
            ],
            "position": "mouth"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.0014820367796346545,
        0.13573665916919708,
        0.997494101524353,
        0.9978706240653992
      ]
    ],
    "face_boxes": [
      [
        0.22900640964508057,
        0.3144705295562744,
        0.718484103679657,
        0.7256279587745667
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.6472574502587257,
            0.6893717993826916
          ],
          [
            0.3270843406020818,
            0.737286967670338
          ],
          [
            0.1358399999346905,
            1.0133355491484206
          ],
          [
            0.06922680262357664,
            0.7472990923871596
          ],
          [
            0.9674305599153696,
            0.6414566310950451
          ],
          [
            0.9717281855483445,
            0.9732870502811337
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.3012985868042312,
            0.44550504735153595
          ],
          [
            0.4946917402881101,
            0.44121413675861226
          ],
          [
            0.24542945357555515,
            0.4655292967851792
          ],
          [
            0.767590967982028,
            0.4798323320949243
          ]
        ],
        "dw_hand_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_hand_2": [
          [
            0.09071493078845208,
            0.7329960570774143
          ],
          [
            0.1315423743017154,
            0.6800748264313572
          ],
          [
            0.17022100499849116,
            0.6300142028472488
          ],
          [
            0.17881625626444125,
            0.6228626851923763
          ],
          [
            0.15303050246659083,
            0.6157111675375038
          ],
          [
            0.11220305895332752,
            0.5341838662719561
          ],
          [
            0.12294712303576517,
            0.5184505274312363
          ],
          [
            0.11650068458630264,
            0.5427656874578033
          ],
          [
            0.11220305895332752,
            0.6042687392897075
          ],
          [
            0.0477386744587012,
            0.555638419236574
          ],
          [
            0.04344104882572608,
            0.5685111510153447
          ],
          [
            0.07352442825655175,
            0.6242929887233509
          ],
          [
            0.09931018205440231,
            0.6600505769977139
          ],
          [
            0.009060043761925426,
            0.5899657039799624
          ],
          [
            0.015506482211388098,
            0.6343051134401725
          ],
          [
            0.05633392572465142,
            0.674353612307459
          ],
          [
            0.09286374360493964,
            0.6972384688030515
          ],
          [
            -0.012428084402950014,
            0.6414566310950451
          ],
          [
            0.0026136053124628873,
            0.6772142193694081
          ],
          [
            0.039143423192751096,
            0.701529379395975
          ],
          [
            0.07567324107303931,
            0.7172627182366947
          ]
        ],
        "dw_face": [
          [
            0.2475782663920427,
            0.44550504735153595
          ],
          [
            0.24542945357555515,
            0.48698384974979686
          ],
          [
            0.2475782663920427,
            0.5298929556790326
          ],
          [
            0.264768768923943,
            0.5770929722011917
          ],
          [
            0.29055452272179344,
            0.6228626851923763
          ],
          [
            0.3206379021526191,
            0.6672020946525865
          ],
          [
            0.3528700943999322,
            0.7029596829269494
          ],
          [
            0.40444160199563334,
            0.7229839323605929
          ],
          [
            0.47320361212323464,
            0.7229839323605929
          ],
          [
            0.5398168094343485,
            0.707250593519873
          ],
          [
            0.5978347554795121,
            0.6829354334933062
          ],
          [
            0.6472574502587256,
            0.6528990593428413
          ],
          [
            0.6859360809555014,
            0.6171414710684782
          ],
          [
            0.7117218347533518,
            0.5770929722011917
          ],
          [
            0.7224658988357895,
            0.5327535627409815
          ],
          [
            0.7310611501017397,
            0.48984445681174593
          ],
          [
            0.7332099629182274,
            0.44407474382056134
          ],
          [
            0.24328064075906758,
            0.42262019085594343
          ],
          [
            0.2626199561074555,
            0.4154686732010709
          ],
          [
            0.28625689708881846,
            0.4154686732010709
          ],
          [
            0.30989383807018134,
            0.4197595837939946
          ],
          [
            0.3335307790515443,
            0.42405049438691805
          ],
          [
            0.4194832917110461,
            0.41832928026301996
          ],
          [
            0.4581619224078219,
            0.4126080661391218
          ],
          [
            0.49898936592108517,
            0.4083171555461983
          ],
          [
            0.5419656222508361,
            0.40974745907717275
          ],
          [
            0.5806442529476118,
            0.41832928026301996
          ],
          [
            0.3743582225648078,
            0.44550504735153595
          ],
          [
            0.36576297129885754,
            0.46695960031615363
          ],
          [
            0.35501890721641993,
            0.4884141532807715
          ],
          [
            0.34212603031749456,
            0.5070080991834401
          ],
          [
            0.33567959186803203,
            0.5370444733339051
          ],
          [
            0.34857246876695713,
            0.5441959909887777
          ],
          [
            0.36576297129885754,
            0.5427656874578033
          ],
          [
            0.39369753791319567,
            0.5441959909887777
          ],
          [
            0.42163210452753364,
            0.5427656874578033
          ],
          [
            0.26906639455691816,
            0.4512262614754339
          ],
          [
            0.29055452272179344,
            0.4383535296966632
          ],
          [
            0.32278671496910666,
            0.4397838332276378
          ],
          [
            0.3464236559504697,
            0.4526565650064085
          ],
          [
            0.3206379021526191,
            0.45837777913030664
          ],
          [
            0.29270333553828115,
            0.45980808266128104
          ],
          [
            0.44956667114187165,
            0.4512262614754339
          ],
          [
            0.47320361212323464,
            0.43406261910373967
          ],
          [
            0.5118822428200104,
            0.43406261910373967
          ],
          [
            0.5441144350673235,
            0.44550504735153595
          ],
          [
            0.514031055636498,
            0.4555171720683574
          ],
          [
            0.4817988633891849,
            0.456947475599332
          ],
          [
            0.32923315341856924,
            0.6157111675375038
          ],
          [
            0.33567959186803203,
            0.6042687392897075
          ],
          [
            0.3507212815834448,
            0.5971172216348349
          ],
          [
            0.36361415848237,
            0.5956869181038604
          ],
          [
            0.38080466101427035,
            0.5971172216348349
          ],
          [
            0.4280785429769963,
            0.6042687392897075
          ],
          [
            0.47750123775620973,
            0.6142808640065293
          ],
          [
            0.4474178583253842,
            0.6314445063782235
          ],
          [
            0.415185666078071,
            0.6443172381569942
          ],
          [
            0.3765070353812952,
            0.6471778452189432
          ],
          [
            0.35501890721641993,
            0.6414566310950451
          ],
          [
            0.339977217501007,
            0.6300142028472488
          ],
          [
            0.33782840468451947,
            0.6157111675375038
          ],
          [
            0.3507212815834448,
            0.6114202569445802
          ],
          [
            0.367911784115345,
            0.6099899534136056
          ],
          [
            0.41733447889455855,
            0.6114202569445802
          ],
          [
            0.4646083608572844,
            0.6157111675375038
          ],
          [
            0.42163210452753364,
            0.6242929887233509
          ],
          [
            0.3743582225648078,
            0.6271535957852999
          ],
          [
            0.3528700943999322,
            0.6242929887233509
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "cup",
      "possible_names": [
        "cup"
      ],
      "box": [
        0.07511737089201878,
        0.4546875,
        0.49061032863849763,
        0.7375
      ]
    },
    {
      "name": "cup",
      "possible_names": [
        "cup"
      ],
      "box": [
        0.07511737089201878,
        0.4359375,
        0.5046948356807511,
        0.7953125
      ]
    }
  ],
  "scene": "A person is drinking from a white mug against a blurred background with warm bokeh lights suggesting an indoor setting possibly a cafe or restaurant with a cozy ambiance and neutral tones dominating the scene",
  "overall_past": "Before this moment, the individual likely had been engaged in a conversation or social interaction—perhaps discussing ideas, sharing stories, or simply enjoying the company of others in a quiet, intimate setting. The relaxed yet attentive posture, combined with the wide-eyed expression, suggests they were listening intently or reacting to something said, possibly a thoughtful comment or a lighthearted remark. The act of sipping from the white mug indicates a brief pause in the exchange, a moment of reflection or comfort, as they took a break from talking to enjoy the warmth of their drink. This moment of stillness, framed by the soft bokeh lights and neutral tones, implies a peaceful interlude within a larger, ongoing social experience—perhaps a coffee break during a meaningful conversation or a quiet respite after a busy day.",
  "overall_past_clean": "A dynamic exchange unfolded in close proximity, voices rising and falling with animated gestures, laughter punctuating moments of shared insight, as the energy built through rapid-fire remarks and responsive silences, culminating in a sudden shift toward quiet attentiveness.",
  "past_scene_ok": false,
  "overall_future": "After the current scene, the individual is likely to set the mug down gently on the table, perhaps after finishing the last sip, and then turn slightly toward the person or people nearby, engaging in conversation with a soft smile or a thoughtful expression. The warm, blurred lights and cozy ambiance suggest a moment of pause in a social setting, so the next action would naturally involve interaction—perhaps sharing a story, responding to a comment, or simply enjoying the quiet comfort of the moment.",
  "overall_future_clean": "A quiet pause dissolves into a soft exchange of words, eyes meeting with ease, as the moment unfolds with effortless connection and shared presence.",
  "future_scene_ok": true
}