{
    "uuid": "3c770698-2830-5eea-9b03-3984091527a3",
    "question": "How many more LLMs are evaluated in ConvBench paper than in MINT paper?",
    "answer_format": "Your answer should be an integer.",
    "tags": [
        "multiple",
        "text",
        "objective"
    ],
    "anchor_pdf": [
        "1cf7ea57-d128-5619-9361-6b35db040c25",
        "09d48a2a-4ad0-5a7f-84ec-557ac57f5830"
    ],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [
        "Read the experiment section of each paper to calculate the difference."
    ],
    "evaluator": {
        "eval_func": "eval_int_exact_match",
        "eval_kwargs": {
            "gold": 0
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}