{
    "uuid": "eb327f3c-93ea-5851-a544-9c05b109ac16",
    "question": "In the experiments, which datasets did the authors use, and how many samples are there in the training set of each dataset?",
    "answer_format": "Your answer should be a Python dictionary, where the keys are the names of datasets and the values are the number of samples in the respective training set. e.g. {\\\"dataset1\\\": 10, \\\"dataset2\\\": 20, ...} .",
    "tags": ["single", "table", "objective"],
    "anchor_pdf": ["4f6a36fe-eac7-58cf-81e9-584f786b2f38"],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": ["First, locate the datasets used in the paper, which are typically found in the experimental section.",
                        "Then, enumerate the datasets mentioned in the paper and record the number of samples in the training set for each dataset."],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {"gold": {"BIOSSES": 60, "CASUAL JUDGEMENT": 90, "EPISTEMIC REASONING": 500,
                                "TEMPORAL SEQUENCE": 300, "IAC Vulnerability Detection": 166, "HOTPOTQA": 50},
                        "ignore_order": true}
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}