{
    "uuid": "c4db8fe1-fe59-5b60-af98-b3e8edd5ef16",
    "question": "Which language performs better on old sense IDs compared to new sense IDs during experiments?",
    "answer_format": "Your answer should be a string of a language name.",
    "tags": ["single","table","objective"],
    "anchor_pdf": ["55bc6198-c2b1-518f-9612-8d58ec050f2f"],
    "reference_pdf": [],
    "conference": [],
    "reasoning_steps": [
        "First, locate the content that compares the performance of languages on old sense IDs and new sense IDs.",
        "Second, identify the language that performs better on old sense IDs in the related tabular."
    ],
    "evaluator": {
        "eval_func": "eval_string_exact_match",
        "eval_kwargs": {
            "gold": "Russian",
            "lowercase": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}