{
    "uuid": "d49c4e91-ace9-5ba1-a728-6083ffc72194",
    "question": "According to Table 3, on which single-task and on which metric, no multi-task model can outperform the corresponding single-task model?",
    "answer_format": "Your answer should be a Python list of two strings. The first string is the name of the task, and the second string is the name of the metric.",
    "tags": [
        "single",
        "text",
        "table",
        "objective"
    ],
    "anchor_pdf": [
        "c3936fc4-4cf3-5550-b694-4cdc10986752"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [
        "Locate Table 3 in the paper and read the text in its caption.",
        "Determine the row corresponding to the single-task models.",
        "Find the column where a single-task model performs best."
    ],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                "TextLM",
                "sBLIMP"
            ],
            "lowercase": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}