{
    "uuid": "e26e0a8e-7e6b-55b0-b658-af94309cd496",
    "question": "According to the experimental results, if we remove the document fact attention module and use mean pooling to fuse all document semantic representation vectors, by how much does the F1 score of FINEGRAINFACT decline in summaries generated by pre-trained language models published in or after 2020?",
    "answer_format": "Your answer should be a single python float",
    "tags": [
        "objective",
        "single",
        "table",
        "text"
    ],
    "conference": [],
    "reasoning_steps": [
        "Locate the table about the performance results in summaries generated by different systems.",
        "If the column names of the table don't match the question, refer to the section about the experiment design for further clarification or naming of different modules and models.",
        "Compare and calculate the F1 score drop when switching from the attention module to mean pooling."
    ],
    "evaluator": {
        "eval_func": "eval_float_exact_match",
        "eval_kwargs": {
            "gold": 0.33
        }
    },
    "state": {
        "gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "0e6978a1-3a5d-5fdd-808a-033cc79fb049"
    ],
    "reference_pdf": []
}