{
    "uuid": "8beb15f1-2c64-5aa7-a1a3-1579452b2ecc",
    "question": "For the specific dataset where CLiCoTEA does not outperform all models, in which languages does this occur?",
    "answer_format": "Your answer should be a Python list of string elements, every element is the abbreviation of a langugage mentioned in the paper, e.g. [\"AR\", \"ES\", \"FR\", ...].",
    "tags": [
        "objective",
        "single",
        "table"
    ],
    "conference": [],
    "reasoning_steps": [
        "Firstly, identify relevant performances (usually in the main experiment table) from the specified PDF for CLiCoTEA and other models.",
        "Analyse and recognize the specific dataset in which CLiCoTEA does not outperform all models.",
        "Extract relevant performances (possibly in the ablation table or appendices) for that dataset.",
        "Finally, analyze the table content to identify the languages where CLiCoTEA underperforms previous methods."
    ],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                "DE",
                "ES",
                "ID",
                "RU",
                "TR"
            ],
            "ignore_order": true,
            "ignore_blank": true
        }
    },
    "state": {
        "gpt-4o-2024-11-20": false
    },
    "annotator": "human",
    "anchor_pdf": [
        "0d6ea045-b831-520d-9b99-ba22a081a403"
    ],
    "reference_pdf": []
}