{
    "uuid": "f640029c-539b-58b1-a742-05b8bb0edacb",
    "question": "What's the biggest reason of incorrect action for each model?",
    "answer_format": "Your answer should be a Python dictionary. e.g. {\"model1\": \"answer1\", \"model2\": \"answer2\", ...}. YOU MUST USE THE EXACT AND FULL TEXT FROM PDF WITHOUT CHANGING CAPITALIZATION.",
    "tags": [
        "image",
        "objective",
        "single"
    ],
    "conference": [],
    "reasoning_steps": [
        "Firstly, locate the section that analyses the incorrect actions of the models.",
        "Find the figure that shows the proportions of correct and incorrect actions for each model.",
        "Finally, identify the biggest reason of incorrect action for each model."
    ],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": {
                "Vicuna-13B": "Invalid Action/Object",
                "OpenChat-3.5": "Object-Mismatched Action",
                "Mixtral-7Bx8": "Invalid Action/Object",
                "Gemini Pro": "Object-Mismatched Action",
                "GPT-3.5-turbo": "Object-Mismatched Action",
                "GPT-4": "Dependency Violation"
            }
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "4ee26cdd-4e52-5090-b1c8-46f5dcdba09c"
    ],
    "reference_pdf": []
}