{
    "uuid": "2a6abc65-d61b-5a48-b3a8-978d92c55720",
    "question": "What is the Overall Fmacro score corresponding to the three baselines in the StanceEval 2024 task? How many baselines did the PICT team surpass?",
    "answer_format": "Your answer should be a python list of four number, The first three are fractions (from largest to smallest), and the last one is a int.",
    "tags": [
        "multiple",
        "text",
        "table",
        "objective"
    ],
    "anchor_pdf": [
        "c2a5a650-b328-5a98-afb0-fd841f66214d",
        "5e3b136c-9d16-5b89-80d1-5316ff78eaa9"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                78.89,
                72.81,
                71.34,
                2
            ]
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}