{
    "uuid": "fbfeab50-b132-5933-95c4-cb5034790ab3",
    "question": "In the respective main experiment of SLED and DoLa, do they use the same evaluation datasets?Do they use the same model family?",
    "answer_format": "Your answer should be a list of two integers, where the first integer is 1 if the evaluation datasets are the same and 0 otherwise, and the second integer is 1 if the model family is the same and 0 otherwise.",
    "tags": [
        "multiple",
        "text",
        "table",
        "objective"
    ],
    "anchor_pdf": [
        "af4ffb53-3311-5964-8b17-a1e9c2a13467",
        "0467b86d-a44f-5df3-a263-173d18b922ef"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                1,
                0
            ],
            "ignore_order": false
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}