{
  "image_path": "./ref_datasets/hico_det/images/test/test_00001176.jpg",
  "image_id": "test_00001176",
  "width": 426,
  "height": 640,
  "split": "test",
  "source": "zhimeng/hico_det",
  "dataset_index": 1176,
  "annotations": {
    "objects": "[{'id': 175, 'bbox_human': [14, 423, 35, 639], 'bbox_object': [122, 344, 222, 640], 'connection': 1, 'invis': 0}, {'id': 178, 'bbox_human': [19, 419, 35, 635], 'bbox_object': [125, 337, 231, 638], 'connection': 1, 'invis': 0}, {'id': 179, 'bbox_human': [16, 421, 26, 637], 'bbox_object': [126, 359, 210, 639], 'connection': 1, 'invis': 0}]",
    "positive_captions": "[('sheep', 'carry'), ('sheep', 'hold'), ('sheep', 'hug')]",
    "negative_captions": "[('sheep', 'feed'), ('sheep', 'herd'), ('sheep', 'kiss'), ('sheep', 'ride'), ('sheep', 'shear'), ('sheep', 'walk'), ('sheep', 'wash'), ('sheep', 'no_interaction')]",
    "ambiguous_captions": "[('sheep', 'pet')]",
    "positive_objects": "[174, 177, 178]",
    "negative_objects": "[175, 176, 179, 181, 182, 183, 184, 185]",
    "ambiguous_objects": "[180]",
    "size": "[426, 640, 3]"
  },
  "types": [
    "person"
  ],
  "persons": [
    {
      "body_box": 0,
      "skeleton": 0,
      "face_box": 0,
      "qwen_detailing": {
        "background": false,
        "age": "adult",
        "gender": "female",
        "emotion": "neutral",
        "clothing_description": "The person is wearing a dark coat.",
        "clothing": [
          {
            "possible_names": [
              "coat",
              "jacket"
            ],
            "name": "coat",
            "type": "top",
            "color": [
              "black",
              "dark gray"
            ]
          }
        ],
        "objects": [
          {
            "standalone": false,
            "possible_names": [
              "lamb",
              "sheep"
            ],
            "name": "lamb",
            "position": "body"
          }
        ],
        "description": "The person is an adult female in the foreground, holding a lamb close to her body. She appears happy, wearing a dark coat. The setting suggests she might be in a barn or similar indoor area, given the wooden background.",
        "blurry": false,
        "face_seen": true,
        "emotion_description": "The person appears to have a neutral expression, possibly slightly pensive, while holding a lamb. There is no strong indication of happiness, sadness, anger, or surprise.",
        "meaningful": true,
        "story": "The person seems to be in a setting like a farm or petting zoo, holding a young lamb. This could suggest a moment of care or interaction with animals, possibly as part of daily activities or an event involving animals.",
        "race": "white",
        "text": "no_text",
        "text_relationship": "no_text",
        "behaviour": "The person is holding a small lamb close to their body, suggesting a gentle and caring interaction with the animal. Their facial expression appears neutral to slightly concerned, possibly indicating attentiveness to the lamb's well-being or a moment of contemplation. The setting seems to be indoors, likely a barn or similar environment, which supports the idea that they might be involved in animal care or farming activities. The way they cradle the lamb shows a protective stance, hinting at a nurturing role or a bond formed through regular contact with the animal.",
        "intention": "The individual is motivated by a desire to protect and care for the lamb ensuring its safety and well being through close physical contact and attentive presence",
        "intention_ok": true
      },
      "facex_detailing": {
        "landmarks": [
          [
            0.2888655929578222,
            0.2002630299755505
          ],
          [
            0.2849317913404013,
            0.23476518483034203
          ],
          [
            0.2871269044821731,
            0.26792152055672236
          ],
          [
            0.2899243213601116,
            0.29898098079221586
          ],
          [
            0.3017889321210159,
            0.32747767376048226
          ],
          [
            0.3263978182589904,
            0.3525882089776652
          ],
          [
            0.35643898697206433,
            0.37074891946145466
          ],
          [
            0.38631790072865163,
            0.3877439656427929
          ],
          [
            0.4166355454465993,
            0.3961735681763717
          ],
          [
            0.4514373575257903,
            0.3958975614181587
          ],
          [
            0.4842276243776542,
            0.3884506192590509
          ],
          [
            0.5193724338037707,
            0.37885466558592656
          ],
          [
            0.5472418834985622,
            0.3588549869401114
          ],
          [
            0.5669208075039184,
            0.3338813103735447
          ],
          [
            0.5815454159384202,
            0.3073895966368062
          ],
          [
            0.5938526997224026,
            0.28090515828558377
          ],
          [
            0.6074987350095445,
            0.2494070635310241
          ],
          [
            0.3411021186232487,
            0.17837760193007335
          ],
          [
            0.3704644324074329,
            0.16844285950064658
          ],
          [
            0.3994452093048914,
            0.17285568607704982
          ],
          [
            0.4303618651120796,
            0.1822672748672111
          ],
          [
            0.45580836720789625,
            0.19472325635807858
          ],
          [
            0.49772083831265185,
            0.20157142920153484
          ],
          [
            0.5227687136588043,
            0.19666616586702212
          ],
          [
            0.5496394165724736,
            0.19740628371281282
          ],
          [
            0.5721491814299289,
            0.20422422300492013
          ],
          [
            0.5871274360948405,
            0.21758260668388435
          ],
          [
            0.4676071174347824,
            0.2243868473917246
          ],
          [
            0.46202842410501427,
            0.24187569128615513
          ],
          [
            0.46286084474452305,
            0.2599503817835024
          ],
          [
            0.4579386148254477,
            0.27894698573010307
          ],
          [
            0.43219678470706235,
            0.2962681777775288
          ],
          [
            0.4411685829751049,
            0.2996602626783507
          ],
          [
            0.44884050348155213,
            0.3036197004573686
          ],
          [
            0.4598256395136566,
            0.3021291113325527
          ],
          [
            0.4678505834199534,
            0.30066164572324067
          ],
          [
            0.3735216493817642,
            0.20804631880351473
          ],
          [
            0.3938005415086855,
            0.20318140078868185
          ],
          [
            0.41707143569296434,
            0.20741834619215557
          ],
          [
            0.431177317416564,
            0.22528351321816445
          ],
          [
            0.4077742008616027,
            0.2220247001520225
          ],
          [
            0.38919502852508325,
            0.21836146722946848
          ],
          [
            0.4938487462914286,
            0.23565704279712268
          ],
          [
            0.513498685889241,
            0.22421423505459512
          ],
          [
            0.5322776349257655,
            0.22551384530961513
          ],
          [
            0.547598476780892,
            0.2372323291110141
          ],
          [
            0.5319279383525682,
            0.24321466748203552
          ],
          [
            0.5136688249691632,
            0.24005764621709075
          ],
          [
            0.39633236292782126,
            0.32645558384912354
          ],
          [
            0.41671977948215005,
            0.3176807566412857
          ],
          [
            0.43754165832985176,
            0.31318919818316193
          ],
          [
            0.44702628920495624,
            0.31640219667128155
          ],
          [
            0.45850010417116954,
            0.3144316864865167
          ],
          [
            0.46909110998643644,
            0.32554343874965397
          ],
          [
            0.47822127431771005,
            0.33786196378724914
          ],
          [
            0.46168226703231885,
            0.35084076821804044
          ],
          [
            0.44663212860614004,
            0.35694311229246
          ],
          [
            0.43408390054120705,
            0.3563110695353576
          ],
          [
            0.4255354143804068,
            0.35284709568534584
          ],
          [
            0.4083950262114476,
            0.34270146882959773
          ],
          [
            0.40725146372953097,
            0.32952849300844334
          ],
          [
            0.43686578083486066,
            0.3262055751468454
          ],
          [
            0.4445786866303821,
            0.32959895740662304
          ],
          [
            0.4531199418802857,
            0.33174290667687145
          ],
          [
            0.46766751680815477,
            0.3377253595207419
          ],
          [
            0.45076129773892787,
            0.3381013901105949
          ],
          [
            0.4405112840600017,
            0.3377672565834863
          ],
          [
            0.43112938984538945,
            0.33420704922505784
          ]
        ],
        "visibility": [
          1.0,
          0.04978823661804199,
          0.9999933242797852,
          3.620183122166054e-07,
          1.0,
          1.0,
          8.752453339866406e-08,
          5.892198621376687e-13,
          0.0662103071808815,
          2.785526504077364e-15,
          9.684316574305653e-15,
          4.231797780306166e-15,
          2.073838459182298e-06,
          2.8276001332039336e-12,
          3.476006635305673e-11,
          2.8186905223731933e-18,
          9.114753396488595e-11,
          7.390001111051703e-11,
          5.262819518593901e-16,
          8.5329192600625e-09,
          1.2128377959377724e-22,
          7.298734533423001e-12,
          0.07410920411348343,
          0.42273271083831787,
          7.824234316000656e-12,
          5.141679183112702e-13,
          9.943710210791323e-06,
          0.01778465509414673,
          0.0197280440479517
        ],
        "headpose": {
          "pitch": -7.97640675708855,
          "yaw": -4.635990045372509,
          "roll": 11.088174348601951
        },
        "attributes": {
          "5 oClock Shadow": 4.7417019231943414e-05,
          "Arched Eyebrows": 0.3802426755428314,
          "Attractive": 0.24716715514659882,
          "Bags Under Eyes": 0.08231361210346222,
          "Bald": 1.4269978532155392e-09,
          "Bangs": 0.9270450472831726,
          "Big Lips": 0.28568968176841736,
          "Big Nose": 0.08669285476207733,
          "Black Hair": 0.0020831432193517685,
          "Blond Hair": 0.013317788951098919,
          "Blurry": 0.00023894026526249945,
          "Brown Hair": 0.506397008895874,
          "Bushy Eyebrows": 0.006176650058478117,
          "Chubby": 0.009197203442454338,
          "Double Chin": 0.0010354561964049935,
          "Eyeglasses": 0.000203605683054775,
          "Goatee": 6.697115622955607e-06,
          "Gray Hair": 0.00040337463724426925,
          "Heavy Makeup": 0.6301079392433167,
          "High Cheekbones": 0.05111214518547058,
          "Male": 0.002212046179920435,
          "Mouth Slightly Open": 0.7706233263015747,
          "Mustache": 5.3648777793569025e-06,
          "Narrow Eyes": 0.05893588811159134,
          "No Beard": 0.9998693466186523,
          "Oval Face": 0.351061075925827,
          "Pale Skin": 0.002959662349894643,
          "Pointy Nose": 0.07599285244941711,
          "Receding Hairline": 2.788195342873223e-05,
          "Rosy Cheeks": 0.006877133157104254,
          "Sideburns": 2.954092678919551e-06,
          "Smiling": 0.006387328263372183,
          "Straight Hair": 0.12642110884189606,
          "Wavy Hair": 0.19149337708950043,
          "Wearing Earrings": 0.10126861184835434,
          "Wearing Hat": 0.00034292315831407905,
          "Wearing Lipstick": 0.9190203547477722,
          "Wearing Necklace": 0.02956976741552353,
          "Wearing Necktie": 0.00045021253754384816,
          "Young": 0.971327543258667
        },
        "age": [
          0.5938386917114258,
          0.9757381081581116,
          0.8908506631851196,
          0.2973629832267761,
          0.05173411965370178,
          0.00034882198087871075,
          4.116157288081013e-05,
          2.103587803503615e-06
        ],
        "race": [
          0.9993919134140015,
          0.00044593363418243825,
          0.6874036192893982,
          0.01009326335042715,
          0.16140508651733398
        ],
        "gender": [
          0.0034879508893936872,
          0.9974633455276489
        ]
      },
      "deepface_detailing": {
        "emotion": {
          "angry": 0.348796253092587,
          "disgust": 2.7718954243027838e-05,
          "fear": 8.99127945303917,
          "happy": 0.00015796773595866398,
          "sad": 90.65868258476257,
          "surprise": 2.234053795291402e-05,
          "neutral": 0.0010321598892915063
        },
        "dominant_emotion": "sad",
        "region": {
          "x": 0,
          "y": 0,
          "w": 189,
          "h": 238,
          "left_eye": null,
          "right_eye": null
        },
        "face_confidence": 0.0,
        "age": 31,
        "gender": {
          "Woman": 99.84555244445801,
          "Man": 0.1544521888718009
        },
        "dominant_gender": "Woman",
        "race": {
          "asian": 0.0037873992401528146,
          "indian": 0.0007217528995155626,
          "black": 1.2578917029492175e-05,
          "white": 99.08807288550462,
          "middle eastern": 0.3745251354396182,
          "latino hispanic": 0.5328774748292595
        },
        "dominant_race": "white"
      },
      "hoi": [
        {
          "relationship": {
            "action": [
              [
                "body",
                "carry"
              ],
              [
                "body",
                "hold"
              ],
              [
                "body",
                "hug"
              ]
            ],
            "negative_action": [
              "feed",
              "herd",
              "kiss",
              "ride",
              "shear",
              "walk",
              "wash",
              "no interaction"
            ],
            "position": "body"
          },
          "object": 0
        }
      ]
    }
  ],
  "detect_results": {
    "body_boxes": [
      [
        0.040221020579338074,
        0.05566005781292915,
        0.9558361768722534,
        0.9983730316162109
      ]
    ],
    "face_boxes": [
      [
        0.30624961853027344,
        0.12865248322486877,
        0.6027206182479858,
        0.37717604637145996
      ]
    ],
    "skeletons": [
      {
        "dw_body": [
          [
            -1.0,
            -1.0
          ],
          [
            0.37012162788279923,
            0.41490872952466207
          ],
          [
            0.19085806955660647,
            0.40420031744676327
          ],
          [
            0.5080166727491015,
            0.7545183697094519
          ],
          [
            0.7838067624817057,
            0.5036355724558235
          ],
          [
            0.5493851862089919,
            0.42561714160256087
          ],
          [
            0.884929795383661,
            0.5571776328453172
          ],
          [
            0.7401399982740436,
            0.7132144945518423
          ],
          [
            0.29427935320633314,
            0.9595079723435145
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.5930519504166545,
            0.9457400139576446
          ],
          [
            0.8320700281849117,
            0.8570131710264832
          ],
          [
            0.760824255003989,
            0.9533888797275722
          ],
          [
            0.3977006368560598,
            0.22368708527646958
          ],
          [
            0.5287009294790468,
            0.23898481681632494
          ],
          [
            0.28508635021524614,
            0.24204436312429606
          ],
          [
            0.5930519504166545,
            0.27722914566596357
          ]
        ],
        "dw_hand_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_hand_2": [
          [
            0.7723155087428474,
            0.49445693353191017
          ],
          [
            0.7332452460307286,
            0.4837485214540115
          ],
          [
            0.7079644878052398,
            0.47151033622212707
          ],
          [
            0.6941749833186093,
            0.4760996556840837
          ],
          [
            0.689578481823066,
            0.4822187483000258
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.689578481823066,
            0.5066951187637946
          ],
          [
            0.6826837295797509,
            0.5097546650717655
          ],
          [
            0.7654207564995322,
            0.4929271603779247
          ],
          [
            0.7263504937874132,
            0.5005760261478522
          ],
          [
            0.6987714848141531,
            0.5143439845337221
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.7700172579950757,
            0.5066951187637946
          ],
          [
            0.7585260042562171,
            0.5097546650717655
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            0.7861050132294776,
            0.5174035308416933
          ],
          [
            0.7677190072473039,
            0.5265821697656066
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_face": [
          [
            0.3080688576929632,
            0.21450844635255634
          ],
          [
            0.30347235619741986,
            0.23898481681632494
          ],
          [
            0.30117410544964823,
            0.26499096043407916
          ],
          [
            0.30347235619741986,
            0.29099710405183343
          ],
          [
            0.31036710844073495,
            0.315473474515602
          ],
          [
            0.3264548636751368,
            0.3384200718253851
          ],
          [
            0.3494373711528539,
            0.35830712282719707
          ],
          [
            0.37931463087388606,
            0.3736048543670526
          ],
          [
            0.41378839209046164,
            0.38278349329096584
          ],
          [
            0.45285865480258053,
            0.38278349329096584
          ],
          [
            0.4873324160191561,
            0.3736048543670526
          ],
          [
            0.5172096757401883,
            0.35830712282719707
          ],
          [
            0.5424904339656771,
            0.3399498449793706
          ],
          [
            0.5608764399478505,
            0.3200627939775586
          ],
          [
            0.574665944434481,
            0.2971161966677755
          ],
          [
            0.5861571981733393,
            0.27263982620400695
          ],
          [
            0.5953502011644264,
            0.24816345574023818
          ],
          [
            0.35173562190062563,
            0.19921071481270092
          ],
          [
            0.3747181293783426,
            0.19615116850472986
          ],
          [
            0.3977006368560598,
            0.19768094165871544
          ],
          [
            0.42068314433377674,
            0.20227026112067206
          ],
          [
            0.4436656518114938,
            0.20685958058262868
          ],
          [
            0.5011219205057864,
            0.216038219506542
          ],
          [
            0.5218061772357317,
            0.216038219506542
          ],
          [
            0.5447886847134488,
            0.216038219506542
          ],
          [
            0.565472941443394,
            0.22062753896849863
          ],
          [
            0.5815606966777959,
            0.2282764047384262
          ],
          [
            0.4666481592892108,
            0.23286572420038282
          ],
          [
            0.4620516577936674,
            0.24969322889422382
          ],
          [
            0.4597534070458958,
            0.26499096043407916
          ],
          [
            0.4574551562981239,
            0.2818184651279202
          ],
          [
            0.4229813950815485,
            0.2925268772058189
          ],
          [
            0.43677089956817855,
            0.29558642351379005
          ],
          [
            0.4505604040548089,
            0.298645969821761
          ],
          [
            0.46434990854143915,
            0.3001757429757467
          ],
          [
            0.4781394130280692,
            0.298645969821761
          ],
          [
            0.36782337713502744,
            0.22215731212248394
          ],
          [
            0.3885076338649728,
            0.216038219506542
          ],
          [
            0.41149014134268985,
            0.22062753896849863
          ],
          [
            0.42757789657709183,
            0.23286572420038282
          ],
          [
            0.4045953890993749,
            0.23439549735436832
          ],
          [
            0.3839111323694296,
            0.23133595104639734
          ],
          [
            0.49652541901024294,
            0.24204436312429606
          ],
          [
            0.5172096757401883,
            0.23439549735436832
          ],
          [
            0.5378939324701336,
            0.23592527050835396
          ],
          [
            0.556279938452307,
            0.2451039094322672
          ],
          [
            0.5355956817223618,
            0.24969322889422382
          ],
          [
            0.5149114249924165,
            0.24816345574023818
          ],
          [
            0.3977006368560598,
            0.3261818865935007
          ],
          [
            0.4160866428382332,
            0.3200627939775586
          ],
          [
            0.43677089956817855,
            0.3170032476695875
          ],
          [
            0.44596390255926555,
            0.31853302082357315
          ],
          [
            0.4574551562981239,
            0.31853302082357315
          ],
          [
            0.47354291153252587,
            0.3246521134395152
          ],
          [
            0.48273591452361275,
            0.33536052551741397
          ],
          [
            0.47124466078475424,
            0.3430093912873417
          ],
          [
            0.45515690555035226,
            0.3491284839032838
          ],
          [
            0.43906915031595045,
            0.3491284839032838
          ],
          [
            0.42068314433377674,
            0.3460689375953129
          ],
          [
            0.4068936398471465,
            0.33689029867139964
          ],
          [
            0.4045953890993749,
            0.3277116597474864
          ],
          [
            0.4229813950815485,
            0.3261818865935007
          ],
          [
            0.4436656518114938,
            0.3277116597474864
          ],
          [
            0.4620516577936674,
            0.32924143290147184
          ],
          [
            0.4781394130280692,
            0.33383075236342846
          ],
          [
            0.4597534070458958,
            0.33536052551741397
          ],
          [
            0.4413674010637221,
            0.33536052551741397
          ],
          [
            0.4229813950815485,
            0.332300979209443
          ]
        ],
        "dw_foot_1": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ],
        "dw_foot_2": [
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ],
          [
            -1.0,
            -1.0
          ]
        ]
      }
    ]
  },
  "objects": [
    {
      "name": "sheep",
      "possible_names": [
        "sheep"
      ],
      "box": [
        0.2863849765258216,
        0.346875,
        0.8075117370892019,
        1.0
      ]
    }
  ],
  "scene": "A lamb is being held in an indoor setting with wooden walls and a red rope visible in the background suggesting a farm environment with warm earthy tones and a casual style of interaction between humans and animals",
  "overall_past": "Before this scene, the lamb likely became separated from its mother or was found in a vulnerable state—perhaps weak, injured, or lost—prompting the individual to gently retrieve it from a corner of the barn or a nearby stall. The warm, earthy tones and the presence of the red rope suggest a familiar farm setting, where such care is routine. The person’s protective hold and attentive expression indicate they had just noticed the lamb’s distress or need, possibly after hearing it bleat or spotting it alone, and quickly moved to comfort it, establishing a moment of immediate care and connection.",
  "overall_past_clean": "A small, trembling creature had been left alone in the dim corner of the barn, its faint cries barely audible over the quiet hum of the farm, until a sudden movement drew attention to its distress, prompting an immediate response to lift it from the ground and cradle it close.",
  "past_scene_ok": true,
  "overall_future": "After the current scene, the individual is likely to gently place the lamb down on a soft bedding area, perhaps near a feeding trough or in a quiet corner of the barn, before checking on its health—such as ensuring it is warm, hydrated, and free from injury—before continuing with routine farm care tasks like feeding or cleaning the enclosure.",
  "overall_future_clean": "The individual carefully lowers the lamb onto a cushioned surface, then inspects its body for signs of distress, confirms its breathing is steady, and ensures it is in a safe, sheltered spot before proceeding to prepare food and maintain the surrounding area.",
  "future_scene_ok": true
}