{
    "uuid": "561f7371-37d2-5940-9171-73472e33cded",
    "question": "Which core NLP problem is mentioned in the paper \"Are Machines Better at Complex Reasoning? Unveiling Human-Machine Inference Gaps in Entailment Verification\", and what is it usually structured as?",
    "answer_format": "Your answer should be a python list of two elements. The first one is a core NLP problem name and you should use abbreviation as given in the papers. The second one is a python strings, describing how the problem is usually structured.",
    "tags": [
        "multiple",
        "text",
        "subjective"
    ],
    "anchor_pdf": [
        "283d3b36-27d2-5459-b625-f2496fa4c35f"
    ],
    "reference_pdf": [
        "02048feb-ade8-5efc-9465-547e1969410d"
    ],
    "conference": [],
    "reasoning_steps": [
        "Find the core NLP problem mentioned in the paper \"Are Machines Better at Complex Reasoning? Unveiling Human-Machine Inference Gaps in Entailment Verification\".",
        "Then search the paper about the core NLP problem.",
        "Finally, find its structure in the papers."
    ],
    "evaluator": {
        "eval_func": "eval_conjunction",
        "eval_kwargs": {
            "eval_func_list": [
                "eval_string_exact_match",
                "eval_reference_answer_with_llm"
            ],
            "eval_kwargs_list":[
                {
                    "gold": "NLI",
                    "lowercase": true
                },
                {
                    "reference_answer": "It is usually structured as a two or three class classification problem.",
                    "question": "what is NLI usually structured as?"
                }
            ]
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}