{
    "type": "multihoi",
    "question": [
        {
            "type": "multi_image",
            "image_A": "./final_labeling/test_00001176.jpg",
            "image_B": "./final_labeling/train_00011242.jpg",
            "image_C": "./final_labeling/017994.jpg",
            "image_D": "./final_labeling/train_00030935.jpg"
        },
        {
            "type": "text",
            "text": "This is a description of a human-object interaction: \"body part: thigh, action: hold, object: sheep\"\n\nWhich one of four images listed above best represents this interaction? Provide your answer with the corresponding image letter."
        }
    ],
    "answer": {
        "type": "choice",
        "answer": "D"
    },
    "data": {
        "type": "multihoi",
        "object": "sheep",
        "hoi": [
            [
                "thigh",
                "feed"
            ],
            [
                "thigh",
                "hug"
            ],
            [
                "thigh",
                "hold"
            ]
        ],
        "full": "./final_labeling/train_00030935.jpg",
        "diff_object": "./final_labeling/017994.jpg",
        "object_diff": [
            "sand"
        ],
        "diff_position": "./final_labeling/test_00001176.jpg",
        "position_diff": [
            "body"
        ],
        "distinct": [
            "multihoi-sheep-thigh"
        ],
        "extra_type": "position",
        "extra_diff": [
            "body"
        ],
        "diff_extra": "./final_labeling/train_00011242.jpg"
    }
}