{
    "uuid": "3a86e8ba-3a3c-5cd4-a799-b76cfc9b643f",
    "question": "In the dataset used in the experiment of the paper \"Soft-Label Integration for Robust Toxicity Classification\" containing 3 classes, which two explainability based metrics are applied?",
    "answer_format": "Your answer should be a Python list of 2 strings, the names of the datasets.",
    "tags": [
        "multiple",
        "text",
        "objective"
    ],
    "anchor_pdf": [
        "3c858455-bd01-5499-9960-eaffa5af22e8"
    ],
    "reference_pdf": [
        "ce71bd6d-c5e8-5730-95ab-8e5d96efa77c"
    ],
    "conference": [],
    "reasoning_steps": [],
    "evaluator": {
        "eval_func": "eval_structured_object_exact_match",
        "eval_kwargs": {
            "gold": [
                "Plausibility",
                "Faithfulness"
            ],
            "ignore_order": true,
            "lowercase": true
        }
    },
    "state": {
        "gui-gpt-4o-2024-11-20": false
    },
    "annotator": "human"
}