{
    "uuid": "34fed469-2cc2-531c-8d93-4e318d5de7c0",
    "question": "Which datasets are used for Multi-Document QA in this paper?",
    "answer_format": "Your answer should be python list, each element of the list is a string of the name of a dataset.",
    "tags": [
        "objective",
        "single",
        "text"
    ],
    "conference": [],
    "reasoning_steps": [
        "Firstly, find the section(s) about dataset, especially about Multi-Document QA",
        "Then identify the Multi-Document QA datasets used in this paper."
    ],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                "HotpotQA",
                "2WikiMultihopQA",
                "MuSiQue",
                "DuReader"
            ],
            "ignore_order": true
        }
    },
    "state": {
        "gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "d6b892b8-cf43-5b62-bde9-48c070c2e5dc"
    ],
    "reference_pdf": []
}