{
    "uuid": "1bc803b2-807d-5218-891c-da60a470cd93",
    "question": "Which model achieves the highest accuracy of the classification when the training data consists of 512 pairs of FPQ and TPQs in this paper?",
    "answer_format": "Your answer should be a python string about the name of the best model. You\"d better use the names as they are referred to in the paper.",
    "tags": [
        "image",
        "objective",
        "single"
    ],
    "conference": [],
    "reasoning_steps": [
        "Usually, the experiment results between models are mentioned in the experiment or result section, especially in the form of tables or figures. Search the correpsonding parts.",
        "Find all the experiment results of models when the training data consists of 512 pairs of FPQ and TPQs.",
        "Finally, compare and rank to get the name of the model demonstrating the highest accuracy."
    ],
    "evaluator": {
        "eval_func": "eval_string_exact_match",
        "eval_kwargs": {
            "gold": "MACAW-11B",
            "lowercase": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "529875d6-5189-5c4d-9076-1635a01a862d"
    ],
    "reference_pdf": []
}