{
    "uuid": "5c98eeb0-ae95-530c-85b4-6e7dc3c12ecf",
    "question": "For Llama2 on DialogSum, which newly proposed module contributes more to the improvement of performance? How do the authors further explain why that module works?",
    "answer_format": "Your answer should be a Python list of 2 elements, where the first element is the FULL NAME of the module that contributes more to the improvement of performance, and the second element is a string, explaining why that module works. e.g. [\"module\", \"explanation\"].",
    "tags": [
        "single",
        "subjective",
        "table",
        "text"
    ],
    "anchor_pdf": [
        "0a2c3d8b-dc16-570c-b354-11797aebe290"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [
        "Locate the table that lists the performance of Llama2 on DialogSum with different modules utilized.",
        "Find the module that contributes more to the improvement of performance.",
        "Locate the section that explains why that module works.",
        "Understand the explanation and form the final answer."
    ],
    "evaluator": {
        "eval_func": "eval_conjunction",
        "eval_kwargs": {
            "eval_func_list": [
                "eval_string_exact_match",
                "eval_reference_answer_with_llm"
            ],
            "eval_kwargs_list": [
                {
                    "gold": "fusion generation",
                    "lowercase": true
                },
                {
                    "reference_answer": "In our main experiments, we use dialogue as the condition for generating the final summary in the FG. To explore the effect of using such condition, we run experiments without using it, where the results are ported in Table 10. It clearly shows that, compared with the models without using the entire dialogue, our approach is able to generate better summaries, which emphasizes the contribution of the entire dialogue, for the reason that it provides global or environmental information to guide FG identifying useful content produced by the experts.",
                    "question": "How do the authors further explain why Fusion Generation (FG) works?"
                }
            ]
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}