{
    "uuid": "b4dcc93d-635a-54c4-be8f-c5ec443d08db",
    "question": "The training dataset used in the paper \"Semiparametric Token-Sequence Co-Supervision\" is filtered to 42932 instances, then what's the original size of this dataset?",
    "answer_format": "Your answer should be a single integer.",
    "tags": [
        "objective",
        "multiple",
        "text"
    ],
    "anchor_pdf": ["e13b0b17-08cb-50fa-b144-a14b676118bf"],
    "reference_pdf": ["23e4f6c4-0d28-52be-8ab4-7aef1c19b5ce"],
    "conference": [],
    "reasoning_steps": [
        "Firstly, locate and identify the training dataset used in the anchor paper.",
        "Then turn to the original paper about the dataset.",
        "Finally, locate the section about the description of the training data to extract its size."
    ],
    "evaluator": {
        "eval_func": "eval_int_exact_match",
        "eval_kwargs": {
            "gold": 150000 
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}