{
    "uuid": "27d44cad-3277-5e38-9d8a-87f953efe90f",
    "question": "Which datasets in the reading comprehension domain are used for instruction tuning datset curation in both FLAN and INTERS?",
    "answer_format": "Your answer should be a Python list of strings, the abbreviation of the datasets.",
    "tags": [
        "multiple",
        "text",
        "image",
        "objective"
    ],
    "anchor_pdf": [
        "6cf825a3-6133-57ec-9a68-5789597b122e",
        "7908763f-3a9d-5ce5-af59-f68888750583"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [
        "Locate the figures in each paper that list all the datasets.",
        "Find the datasets that are both used."
    ],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": ["SQuAD", "BoolQ"],
            "ignore_order": true,
            "lowercase": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}