{
  "image_path": "./ref_datasets/hico_det/images/test/test_00000557.jpg",
  "image_id": "test_00000557",
  "width": 550,
  "height": 577,
  "split": "test",
  "source": "zhimeng/hico_det",
  "dataset_index": 557,
  "annotations": {
    "objects": "[{'id': 278, 'bbox_human': [7, 493, 6, 573], 'bbox_object': [1, 549, 386, 449], 'connection': 1, 'invis': 0}, {'id': 280, 'bbox_human': [1, 495, 1, 575], 'bbox_object': [2, 547, 331, 528], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('carrot', 'hold'), ('carrot', 'smell')]",
    "negative_captions": "[('carrot', 'cook'), ('carrot', 'cut'), ('carrot', 'eat'), ('carrot', 'peel'), ('carrot', 'stir'), ('carrot', 'wash'), ('carrot', 'no_interaction')]",
    "ambiguous_captions": "[('carrot', 'carry')]",
    "positive_objects": "[277, 279]",
    "negative_objects": "[274, 275, 276, 278, 280, 281, 282]",
    "ambiguous_objects": "[273]",
    "size": "[550, 577, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "female",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a gray tank top.",
        "clothing": [
          {
            "possible_names": [
              "tank top"
            ],
            "name": "tank top",
            "type": "top",
            "color": [
              "gray"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "carrot"
            ],
            "name": "carrot",
            "position": "hand"
          }
        ],
        "description": "The person is an adult female wearing a gray tank top. She appears to be happy while holding a carrot with her right hand. The setting includes some greenery and a side of a house in the background, and her fingernails are painted blue.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to be in a neutral state, possibly engaged in an activity involving a carrot, which suggests a playful or casual mood rather than strong emotions.",
        "meaningful": true,
        "story": "The person seems to be outdoors, possibly in a garden or backyard setting, holding a carrot close to their mouth as if they are about to eat it or are playfully pretending to do so. The presence of plants and flowers in the background adds to the relaxed, domestic atmosphere.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is holding a freshly picked carrot close to their face, appearing to smell it with a curious and slightly amused expression. Their blue-painted nails suggest attention to personal style, and the dirt under their fingernails indicates they have been actively involved in gardening. The act of smelling the carrot could be driven by a sense of pride in their gardening efforts or simply enjoying the natural scent of the fresh produce. The background plants and the casual setting imply a relaxed environment where they feel comfortable engaging in such an intimate interaction with their harvest.",
        "intention": "The individual is savoring the sensory experience of freshly harvested produce as a moment of connection and satisfaction with their gardening effort",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.0263298579624721,
            0.21019253739038002
          ],
          [
            0.01007673337862089,
            0.3138750480171122
          ],
          [
            0.012543455346838222,
            0.43109814274809866
          ],
          [
            0.021882766252988347,
            0.5557773280303115
          ],
          [
            0.042990678316586976,
            0.6628765062283514
          ],
          [
            0.08557771707510019,
            0.7712238745749601
          ],
          [
            0.13941037809693968,
            0.8631600547350406
          ],
          [
            0.20133449504901835,
            0.930885040337982
          ],
          [
            0.2832354756144734,
            0.9375775056949021
          ],
          [
            0.3501581464494977,
            0.902018933108491
          ],
          [
            0.39378792898995535,
            0.8406215194547255
          ],
          [
            0.4381222415280032,
            0.7649314806702764
          ],
          [
            0.47949119666954143,
            0.688922380804868
          ],
          [
            0.5177490432541091,
            0.6073560955560452
          ],
          [
            0.5290726500672179,
            0.5256525024561399
          ],
          [
            0.5474276356882863,
            0.42534495639635744
          ],
          [
            0.5613897249296115,
            0.33731442177818094
          ],
          [
            0.1346497969193892,
            0.3622735595608679
          ],
          [
            0.18332277025495258,
            0.36185898819781964
          ],
          [
            0.23552057340547636,
            0.3901010318745714
          ],
          [
            0.2757971379664037,
            0.4090038090833611
          ],
          [
            0.3214229236949574,
            0.41728373098031046
          ],
          [
            0.4365243787889357,
            0.42596025695951895
          ],
          [
            0.4675493611917867,
            0.41240652837089814
          ],
          [
            0.5044213827554282,
            0.397789515018109
          ],
          [
            0.5318865590281301,
            0.3693013195945473
          ],
          [
            0.5553430334314123,
            0.3803000378118762
          ],
          [
            0.37875898782309,
            0.47688470024907903
          ],
          [
            0.37217023775175023,
            0.5472790563894575
          ],
          [
            0.3718664739039037,
            0.605656895138126
          ],
          [
            0.36576954730145345,
            0.6811989087461098
          ],
          [
            0.2650355301894151,
            0.6613179549453105
          ],
          [
            0.30303613241616784,
            0.68163831908681
          ],
          [
            0.3364217931574041,
            0.7009595132871678
          ],
          [
            0.37423071923193996,
            0.693938593428806
          ],
          [
            0.40720496239600246,
            0.6727900952506578
          ],
          [
            0.17366451412052303,
            0.442951220927719
          ],
          [
            0.20637497344574374,
            0.45584306010930775
          ],
          [
            0.25967414657790944,
            0.4522947112827438
          ],
          [
            0.2794219178038758,
            0.4561320496361041
          ],
          [
            0.258203580782011,
            0.461459663605507
          ],
          [
            0.20356115118249668,
            0.45771247687154437
          ],
          [
            0.4174514374175629,
            0.4675675303843329
          ],
          [
            0.4550991306057224,
            0.4681133574617768
          ],
          [
            0.49177209433023034,
            0.45784940646408395
          ],
          [
            0.5066617495053775,
            0.45015007990898254
          ],
          [
            0.48682789988331987,
            0.46166086562879904
          ],
          [
            0.44583065478832695,
            0.4644976470930265
          ],
          [
            0.1896505727396383,
            0.6945658091360697
          ],
          [
            0.24316042119806464,
            0.7093325183072223
          ],
          [
            0.30852917262486046,
            0.7157129834093177
          ],
          [
            0.3274517307033787,
            0.7191036929881168
          ],
          [
            0.35030270861340806,
            0.7157508092636105
          ],
          [
            0.37654178173511055,
            0.71048746772496
          ],
          [
            0.4040909556599407,
            0.7024676300978182
          ],
          [
            0.3806580630215732,
            0.7387769198057585
          ],
          [
            0.35138018719561687,
            0.769357357794886
          ],
          [
            0.31135422842843197,
            0.7795872340021703
          ],
          [
            0.26259845882267147,
            0.7707397666831038
          ],
          [
            0.2221667549826882,
            0.7449930315709522
          ],
          [
            0.19638599048961292,
            0.7013172828256862
          ],
          [
            0.26866600730202417,
            0.7299544175505254
          ],
          [
            0.3253899363728313,
            0.7345555544666914
          ],
          [
            0.3612782366864093,
            0.7261084107722231
          ],
          [
            0.3990756443568639,
            0.707970850595775
          ],
          [
            0.36430700723226966,
            0.7292553327201057
          ],
          [
            0.317051751273019,
            0.7483037394679221
          ],
          [
            0.26262793602881496,
            0.7368114253736187
          ]
        ],
        "visibility": [
          1.0,
          1.894449894734862e-07,
          0.4612201750278473,
          1.9598310875643815e-13,
          0.99985671043396,
          1.0,
          2.818872392451599e-11,
          6.304029511738918e-07,
          1.5334468628225295e-07,
          4.120601886370423e-07,
          4.3114289915990867e-11,
          7.546157689830579e-07,
          0.9338128566741943,
          6.348035703866717e-13,
          0.0036235719453543425,
          2.430394017836928e-16,
          0.0024696141481399536,
          0.026875222101807594,
          6.6190692926682004e-09,
          0.08816676586866379,
          7.699042621241888e-22,
          3.1985459630869877e-16,
          3.2786185784061672e-06,
          1.0,
          0.9029701352119446,
          0.9999375343322754,
          0.9999991655349731,
          0.999976396560669,
          0.22687380015850067
        ],
        "headpose": {
          "pitch": -30.499233021640627,
          "yaw": -12.04973435637187,
          "roll": 10.42345350838714
        },
        "attributes": {
          "5 oClock Shadow": 0.00964451301842928,
          "Arched Eyebrows": 0.000667234999127686,
          "Attractive": 0.15818987786769867,
          "Bags Under Eyes": 0.3785814642906189,
          "Bald": 9.33215269469656e-05,
          "Bangs": 0.010450509376823902,
          "Big Lips": 0.0780109316110611,
          "Big Nose": 0.4599616825580597,
          "Black Hair": 0.001232940354384482,
          "Blond Hair": 0.11838401854038239,
          "Blurry": 0.006978858262300491,
          "Brown Hair": 0.07331643253564835,
          "Bushy Eyebrows": 0.00419663218781352,
          "Chubby": 0.014084058813750744,
          "Double Chin": 0.0014091032790020108,
          "Eyeglasses": 0.04065589979290962,
          "Goatee": 0.027037862688302994,
          "Gray Hair": 0.008523153141140938,
          "Heavy Makeup": 0.004953440278768539,
          "High Cheekbones": 0.019527176395058632,
          "Male": 0.87465500831604,
          "Mouth Slightly Open": 0.007353493943810463,
          "Mustache": 0.0227954238653183,
          "Narrow Eyes": 0.16639305651187897,
          "No Beard": 0.7386928796768188,
          "Oval Face": 0.011716472916305065,
          "Pale Skin": 0.012145550921559334,
          "Pointy Nose": 0.06529761850833893,
          "Receding Hairline": 0.04877408593893051,
          "Rosy Cheeks": 0.00023758136376272887,
          "Sideburns": 0.005432677920907736,
          "Smiling": 0.10167473554611206,
          "Straight Hair": 0.21443793177604675,
          "Wavy Hair": 0.033155977725982666,
          "Wearing Earrings": 0.010471082292497158,
          "Wearing Hat": 0.000489305064547807,
          "Wearing Lipstick": 0.004138163290917873,
          "Wearing Necklace": 0.020990040153265,
          "Wearing Necktie": 0.0027203175704926252,
          "Young": 0.893685519695282
        },
        "age": [
          0.048490602523088455,
          0.6608388423919678,
          0.9051599502563477,
          0.5450078845024109,
          0.10539499670267105,
          0.004298695828765631,
          0.0020112358033657074,
          0.0006045393529348075
        ],
        "race": [
          0.9998406171798706,
          0.0006500317831523716,
          0.06308671087026596,
          0.0034551697317510843,
          0.14071422815322876
        ],
        "gender": [
          0.40277546644210815,
          0.6226661801338196
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 8.648521135357079e-10,
          "disgust": 1.2044799850409113e-18,
          "fear": 2.1287372906075852e-10,
          "happy": 100.0,
          "sad": 7.504100354885183e-13,
          "surprise": 1.927643867963345e-23,
          "neutral": 7.61201574049717e-13
        },
        "dominant_emotion": "happy",
        "region": {
          "x": 0,
          "y": 0,
          "w": 399,
          "h": 533,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 29,
        "gender": {
          "Woman": 96.92006707191467,
          "Man": 3.079940751194954
        },
        "dominant_gender": "Woman",
        "race": {
          "asian": 0.0024380310428045006,
          "indian": 0.00015897320887512574,
          "black": 5.105737801187147e-06,
          "white": 99.83457328861078,
          "middle eastern": 0.10137089949522621,
          "latino hispanic": 0.06144728244724483
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "mouth",
                "hold"
              ],
              [
                "mouth",
                "smell"
              ]
            ],
            "negative_action": [
              "cook",
              "cut",
              "eat",
              "peel",
              "stir",
              "wash",
              "no interaction"
            ],
            "position": "mouth"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.0007460662163794041,
        0.0,
        0.8892102837562561,
        0.9982679486274719
      ]
    ],
    "face_boxes": [
      [
        0.0023796679452061653,
        0.24405528604984283,
        0.5835947394371033,
        0.9159232974052429
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.278218008899271,
            0.5894160269891053
          ],
          [
            0.0076661892735747144,
            0.5955807684031013
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.5487698285249673,
            0.5832512855751094
          ],
          [
            0.770816341843268,
            0.983959477484849
          ],
          [
            0.7686605504518281,
            0.8955981838842398
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.7406352623631105,
            0.8894334424702436
          ],
          [
            0.23618007676619476,
            0.4805055953418429
          ],
          [
            0.47547292121601376,
            0.4722859401231815
          ],
          [
            -0.007424350466503918,
            0.3900893879365681
          ],
          [
            0.5444582457420877,
            0.4106385259832215
          ]
        ],
        "dw_hand_1": [
          [
            0.7449468451459901,
            0.8873785286655784
          ],
          [
            0.6500920239226384,
            0.8401155111582758
          ],
          [
            0.5832624907880042,
            0.8051819764789652
          ],
          [
            0.5466140371335274,
            0.7784680970183158
          ],
          [
            0.5121213748704905,
            0.7620287865809932
          ],
          [
            0.63284569279112,
            0.6202397340590853
          ],
          [
            0.6026646133109625,
            0.6222946478637504
          ],
          [
            0.5897298649623237,
            0.6839420620037104
          ],
          [
            0.5832624907880042,
            0.7435345623390052
          ],
          [
            0.6953636431428746,
            0.624349561668416
          ],
          [
            0.6716499378370366,
            0.6305143030824119
          ],
          [
            0.6479362325311986,
            0.716820682878356
          ],
          [
            0.6457804411397587,
            0.7681935279949892
          ],
          [
            0.7621931762775086,
            0.6448986997150694
          ],
          [
            0.7471026365374299,
            0.6407888721057384
          ],
          [
            0.712609974274393,
            0.7270952519016826
          ],
          [
            0.7018310173171939,
            0.7764131832136504
          ],
          [
            0.8333342921950223,
            0.6901068034177066
          ],
          [
            0.8290227094121428,
            0.6716125791757186
          ],
          [
            0.8031532127148651,
            0.7147657690736904
          ],
          [
            0.7751279246261475,
            0.7681935279949892
          ]
        ],
        "dw_hand_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_face": [
          [
            0.011977772056454382,
            0.38186973271790675
          ],
          [
            0.009821980665014548,
            0.4435171468578669
          ],
          [
            0.014133563447894217,
            0.501054733388496
          ],
          [
            0.02922410318797285,
            0.55653740611446
          ],
          [
            0.05509359988525065,
            0.6140749926450892
          ],
          [
            0.10252101049692638,
            0.655173268738396
          ],
          [
            0.14563683832572266,
            0.6921617172223719
          ],
          [
            0.1865968747630791,
            0.7209305104876864
          ],
          [
            0.24264745094051415,
            0.7455894761436704
          ],
          [
            0.300853818509389,
            0.7496993037530012
          ],
          [
            0.348281229121065,
            0.7229854242923518
          ],
          [
            0.38277389138410195,
            0.6777773205897144
          ],
          [
            0.4302013019957779,
            0.6448986997150694
          ],
          [
            0.47116133843313407,
            0.6017455098170973
          ],
          [
            0.5078097920876109,
            0.5524275785051292
          ],
          [
            0.529367706002009,
            0.49694490577916534
          ],
          [
            0.5423024543506479,
            0.4435171468578669
          ],
          [
            0.13701367275996343,
            0.4435171468578669
          ],
          [
            0.1887526661545189,
            0.44146223305320137
          ],
          [
            0.2404916595490743,
            0.4496818882718628
          ],
          [
            0.28791907016075013,
            0.45995645729518936
          ],
          [
            0.33534648077242596,
            0.4722859401231815
          ],
          [
            0.4409802589529768,
            0.4681761125138508
          ],
          [
            0.47116133843313407,
            0.4579015434905242
          ],
          [
            0.5013424179132916,
            0.44762697446719746
          ],
          [
            0.529367706002009,
            0.43735240544387066
          ],
          [
            0.5509256199164071,
            0.4394073192485362
          ],
          [
            0.38277389138410195,
            0.4928350781698348
          ],
          [
            0.37846230860122226,
            0.544207923286468
          ],
          [
            0.3741507258183426,
            0.5914709407937706
          ],
          [
            0.37199493442690296,
            0.6407888721057384
          ],
          [
            0.2857632787693104,
            0.6387339583010733
          ],
          [
            0.3159443582494677,
            0.6510634411290652
          ],
          [
            0.34612543772962495,
            0.6633929239570574
          ],
          [
            0.3741507258183426,
            0.661338010152392
          ],
          [
            0.4000202225156204,
            0.6490085273243998
          ],
          [
            0.1801295005887596,
            0.4763957677325122
          ],
          [
            0.22108953702611603,
            0.4702310263185161
          ],
          [
            0.26204957346347246,
            0.4722859401231815
          ],
          [
            0.29438644433506966,
            0.4928350781698348
          ],
          [
            0.2534264078977132,
            0.49899981958383066
          ],
          [
            0.2146221628517966,
            0.4928350781698348
          ],
          [
            0.4280455106043378,
            0.48667033675583876
          ],
          [
            0.4539150073016156,
            0.4681761125138508
          ],
          [
            0.48625187817321275,
            0.4620113710998549
          ],
          [
            0.5142771662619303,
            0.4702310263185161
          ],
          [
            0.48840766956465276,
            0.48667033675583876
          ],
          [
            0.4582265900844953,
            0.49078016436516947
          ],
          [
            0.19737583172027814,
            0.6510634411290652
          ],
          [
            0.2577379906805928,
            0.6675027515663878
          ],
          [
            0.3159443582494677,
            0.6777773205897144
          ],
          [
            0.33103489798954633,
            0.6818871481990453
          ],
          [
            0.34612543772962495,
            0.67983223439438
          ],
          [
            0.36552756025258326,
            0.6716125791757186
          ],
          [
            0.3870854741669816,
            0.6633929239570574
          ],
          [
            0.36552756025258326,
            0.6839420620037104
          ],
          [
            0.34612543772962495,
            0.7024362862456984
          ],
          [
            0.3181001496409075,
            0.7127108552690252
          ],
          [
            0.27498432181211135,
            0.7044912000503638
          ],
          [
            0.23402428537475492,
            0.6839420620037104
          ],
          [
            0.21031058006891695,
            0.6572281825430613
          ],
          [
            0.2685169476377918,
            0.6777773205897144
          ],
          [
            0.32672331520666664,
            0.6859969758083758
          ],
          [
            0.35043702051250464,
            0.6839420620037104
          ],
          [
            0.3763065172097824,
            0.6695576653710532
          ],
          [
            0.35043702051250464,
            0.6839420620037104
          ],
          [
            0.3224117324237872,
            0.6921617172223719
          ],
          [
            0.26636115624635215,
            0.6818871481990453
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "carrot",
      "possible_names": [
        "carrot"
      ],
      "box": [
        0.0018181818181818182,
        0.6689774696707106,
        0.9981818181818182,
        0.7781629116117851
      ]
    },
    {
      "name": "carrot",
      "possible_names": [
        "carrot"
      ],
      "box": [
        0.0036363636363636364,
        0.5736568457538995,
        0.9945454545454545,
        0.9150779896013865
      ]
    }
  ],
  "scene": "A person is holding a freshly picked carrot with green leaves and blue nail polish against a backdrop of a white wall and a potted plant with orange flowers.",
  "overall_past": "Before this scene, the individual had been gardening in a small outdoor or indoor space, likely tending to a vegetable patch or container garden. They carefully dug up the carrot from the soil, brushing off some of the dirt with their hands, and then paused to examine and appreciate their harvest. The presence of dirt under their nails and the fresh, earthy scent of the carrot suggest they had just pulled it from the ground moments earlier. The choice to wear blue nail polish while gardening indicates a blend of personal expression and practicality—perhaps they were gardening in their free time, enjoying the process without concern for appearances. The act of bringing the carrot close to their face to smell it reflects a moment of quiet satisfaction, a personal celebration of the fruit of their labor.",
  "overall_past_clean": "The individual had been tending to a cultivated plot, their hands moving deliberately through the soil, loosening the earth around the roots, their focus steady and patient as they prepared for the harvest.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to take a bite out of the carrot, savoring its crisp freshness and earthy flavor, perhaps with a small smile of satisfaction, before placing the remaining piece back into a nearby basket or bag, signaling the end of their gardening session and the beginning of enjoying their homegrown harvest.",
  "overall_future_clean": "The individual steps back, brushing off hands now dusted with soil, then turns toward the kitchen, the crisp snap of the carrot still echoing in memory, already envisioning the meal to come.",
  "future_scene_ok": true
}