{
    "uuid": "0100f339-d8b0-5277-a73f-e0b3f6b10d0c",
    "question": "For the dataset where \"Before we dive into the answer.\" performs the best in specific setting in the paper that proposes IAP, what deficiencies of previous datasets were raised by the authors of the dataset? Additionally, what variations did the authors propose to address these deficiencies?",
    "answer_format": "Your answer should be a Python list of 2 elements, the first is a string, the deficiencies, the second is a python list of strings, the categories of variations in general as proposed in the paper.",
    "tags": [
        "multiple",
        "text",
        "table",
        "subjective"
    ],
    "anchor_pdf": [
        "0f6aee28-1439-5d69-9173-7d21f9bb0daa"
    ],
    "reference_pdf": [
        "e05cbd04-192e-5761-97ce-7250058cf895",
        "ad5ecb28-5270-5e7b-b161-6d994db6c2f7",
        "a87a7490-623a-54af-bad6-ef68b0757499",
        "c2c5bf1a-3d4a-508e-a217-b3e4b78ce7f7"
    ],
    "conference": [],
    "reasoning_steps": [
        "Read the sections to find the correspondence of prompts and indexs.",
        "Find the table that compares different prompts on different datasets.",
        "Identify the dataset where the prompt performs the best.",
        "Read the corresponding paper to find the deficiencies and variations."
    ],
    "evaluator": {
        "eval_func": "eval_conjunction",
        "eval_kwargs": {
            "eval_func_list": [
                "eval_reference_answer_with_llm",
                "eval_structured_object_exact_match"
            ],
            "eval_kwargs_list": [
                {
                    "reference_answer": "A large number of the problems in ASDiv-A and MAWPS can be correctly answered without even looking at the question. This suggests the presence of patterns in the bodies of MWPs in these datasets that have a direct correlation with the output equation.",
                    "question": "What deficiencies of previous datasets were raised?"
                },
                {
                    "gold": ["Question Sensitivity", "Reasoning Ability", "Structural Invariance"],
                    "ignore_order": true,
                    "ignore_blank": true,
                    "lowercase": true
                }
            ]
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}