[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "ethene (C2H4)",
            "propene (C3H6)",
            "butene (C4H8)",
            "pentene (C5H10)",
            "hexene (C6H12)",
            "heptene (C7H14)",
            "octene (C8H16)",
            "nonene (C9H18)",
            "decene (C10H20)",
            "undecene (C11H22)",
            "dodecene (C12H24)",
            "tridecene (C13H26)",
            "tetradecene (C14H28)",
            "pentadecene (C15H30)",
            "hexadecene (C16H32)",
            "heptadecene (C17H34)",
            "octadecene (C18H36)",
            "nonadecene (C19H38)",
            "eicosene (C20H40)",
            "heneicosene (C21H42)",
            "docosene (C22H44)",
            "tricosene (C23H46)",
            "tetracosene (C24H48)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "butene",
                "octene",
                "tricosene"
            ],
            "mismatches": [
                "pentene",
                "hexene",
                "heptene",
                "nonene",
                "decene",
                "undecene",
                "dodecene",
                "tridecene",
                "tetradecene",
                "pentadecene",
                "hexadecene",
                "heptadecene",
                "octadecene",
                "nonadecene",
                "eicosene",
                "heneicosene",
                "docosene",
                "tetracosene"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "cis-2-Butene (C4H8)",
            "trans-2-Butene (C4H8)",
            "1-Pentene (C5H10)",
            "cis-2-Pentene (C5H10)",
            "trans-2-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "cis-3-Hexene (C6H12)",
            "trans-3-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "cis-2-Heptene (C7H14)",
            "trans-2-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "cis-3-Octene (C8H16)",
            "trans-3-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "cis-4-Nonene (C9H18)",
            "trans-4-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "cis-5-Decene (C10H20)",
            "trans-5-Decene (C10H20)",
            "2-Methyl-1-butene (C5H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "cis-2-Butene (C4H8)",
                "trans-2-Butene (C4H8)",
                "1-Hexene (C6H12)",
                "cis-3-Hexene (C6H12)",
                "trans-3-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "cis-3-Octene (C8H16)",
                "trans-3-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)",
                "2-Methyl-1-butene (C5H10)"
            ],
            "mismatches": [
                "1-Pentene (C5H10)",
                "cis-2-Pentene (C5H10)",
                "trans-2-Pentene (C5H10)",
                "1-Heptene (C7H14)",
                "cis-2-Heptene (C7H14)",
                "trans-2-Heptene (C7H14)",
                "cis-4-Nonene (C9H18)",
                "trans-4-Nonene (C9H18)",
                "cis-5-Decene (C10H20)",
                "trans-5-Decene (C10H20)"
            ],
            "true_referents": [
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "(Z)-3-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "3-hexene",
                "3-octene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "butene",
                "cis-3-hexene",
                "cis-alkene",
                "cis-but-2-ene",
                "cis-octadec-9-ene",
                "ethene",
                "octene",
                "propene",
                "trans-3-hexene",
                "trans-but-2-ene",
                "trans-octadec-9-ene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "Isobutene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "2-Methyl-1-butene (C5H10)",
            "3-Methyl-1-butene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "2-Methyl-1-pentene (C6H12)",
            "3-Methyl-1-pentene (C6H12)",
            "2-Methyl-2-pentene (C6H12)",
            "4-Methyl-1-pentene (C6H12)",
            "2,3-Dimethyl-1-butene (C6H12)",
            "1-Heptene (C7H14)",
            "2-Heptene (C7H14)",
            "3-Heptene (C7H14)",
            "4-Heptene (C7H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "2-Butene (C4H8)",
                "3-Hexene (C6H12)",
                "2-Methyl-1-pentene (C6H12)"
            ],
            "mismatches": [
                "Isobutene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "2-Methyl-1-butene (C5H10)",
                "3-Methyl-1-butene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "1-Hexene (C6H12)",
                "2-Hexene (C6H12)",
                "3-Methyl-1-pentene (C6H12)",
                "2-Methyl-2-pentene (C6H12)",
                "4-Methyl-1-pentene (C6H12)",
                "2,3-Dimethyl-1-butene (C6H12)",
                "1-Heptene (C7H14)",
                "2-Heptene (C7H14)",
                "3-Heptene (C7H14)",
                "4-Heptene (C7H14)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-octene",
                "1-heptadecene",
                "1-hexene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-methyleneheptane",
                "4,5-dimethyl-1-hexene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "ethene",
                "propene"
            ],
            "TP": 6,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "Isobutylene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "2-Methyl-1-butene (C5H10)",
            "3-Methyl-1-butene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "2-Heptene (C7H14)",
            "3-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "2-Octene (C8H16)",
            "3-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "2-Nonene (C9H18)",
            "3-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "2-Decene (C10H20)",
            "3-Decene (C10H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "1-hexene",
                "3-hexene",
                "1-octene",
                "2-octene",
                "3-octene",
                "1-nonene",
                "1-decene"
            ],
            "mismatches": [
                "1-butene",
                "2-butene",
                "isobutylene",
                "1-pentene",
                "2-pentene",
                "2-methyl-1-butene",
                "3-methyl-1-butene",
                "2-hexene",
                "1-heptene",
                "2-heptene",
                "3-heptene",
                "2-nonene",
                "3-nonene",
                "2-decene",
                "3-decene"
            ],
            "true_referents": [
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "2-octene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-methyleneheptane",
                "3-octene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "ethene",
                "octene",
                "propene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "Isobutylene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "3-Methyl-1-butene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "2-Methyl-1-pentene (C6H12)",
            "3-Methyl-1-pentene (C6H12)",
            "2-Methyl-2-pentene (C6H12)",
            "1-Heptene (C7H14)",
            "2-Heptene (C7H14)",
            "3-Heptene (C7H14)",
            "4-Methyl-1-hexene (C7H14)",
            "1-Octene (C8H16)",
            "2-Octene (C8H16)",
            "3-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "1-Decene (C10H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "1-Hexene (C6H12)",
                "3-Hexene (C6H12)",
                "2-Methyl-1-pentene (C6H12)",
                "1-Octene (C8H16)",
                "2-Octene (C8H16)",
                "3-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "1-Butene (C4H8)",
                "2-Butene (C4H8)",
                "Isobutylene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "3-Methyl-1-butene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "3-Methyl-1-pentene (C6H12)",
                "2-Methyl-2-pentene (C6H12)",
                "1-Heptene (C7H14)",
                "2-Heptene (C7H14)",
                "3-Heptene (C7H14)",
                "4-Methyl-1-hexene (C7H14)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-octene",
                "(E)-3-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "2-octene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-methyleneheptane",
                "3-octene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "octene",
                "propene"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butylene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Heneicosene (C21H42)",
            "Docosene (C22H44)",
            "Tritriacontene (C23H46)",
            "Tetracontene (C24H48)",
            "Pentacontene (C25H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Octene",
                "Docosene"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "Butylene (C4H8)",
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Heneicosene (C21H42)",
                "Tritriacontene (C23H46)",
                "Tetracontene (C24H48)",
                "Pentacontene (C25H50)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "trans-3-hexene",
                "tricosene"
            ],
            "TP": 2,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Heneicosene (C21H42)",
            "Docosene (C22H44)",
            "Tricosene (C23H46)",
            "Tetracosene (C24H48)",
            "Pentacosene (C25H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)",
                "Tricosene (C23H46)"
            ],
            "mismatches": [
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Heneicosene (C21H42)",
                "Docosene (C22H44)",
                "Tetracosene (C24H48)",
                "Pentacosene (C25H50)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Docosene (C22H44)",
            "Tetracosene (C24H48)",
            "Hexacosene (C26H52)",
            "Octacosene (C28H56)",
            "Triacontene (C30H60)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)",
                "Docosene (C22H44)"
            ],
            "mismatches": [
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Tetracosene (C24H48)",
                "Hexacosene (C26H52)",
                "Octacosene (C28H56)",
                "Triacontene (C30H60)"
            ],
            "true_referents": [
                "(10Z)-tricosene",
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octadec-1-ene",
                "octene",
                "propene",
                "trans-octadec-7-ene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (ethene)",
            "Propylene (propene)",
            "1-Butene",
            "cis-2-Butene",
            "trans-2-Butene",
            "1-Pentene",
            "cis-2-Pentene",
            "trans-2-Pentene",
            "2-Methyl-1-Butene (isobutylene)",
            "2-Methyl-2-Butene",
            "1-Hexene",
            "cis-2-Hexene",
            "trans-2-Hexene",
            "3-Methyl-1-Pentene",
            "2-Methyl-2-Pentene",
            "1-Heptene",
            "cis-2-Heptene",
            "trans-2-Heptene",
            "1-Octene",
            "cis-2-Octene",
            "trans-2-Octene",
            "1-Nonene",
            "1-Decene",
            "1-Dodecene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "1-hexene",
                "1-octene",
                "1-nonene",
                "1-decene",
                "1-dodecene"
            ],
            "mismatches": [
                "1-butene",
                "cis-2-butene",
                "trans-2-butene",
                "1-pentene",
                "cis-2-pentene",
                "trans-2-pentene",
                "2-methyl-1-butene",
                "2-methyl-2-butene",
                "cis-2-hexene",
                "trans-2-hexene",
                "3-methyl-1-pentene",
                "2-methyl-2-pentene",
                "1-heptene",
                "cis-2-heptene",
                "trans-2-heptene",
                "cis-2-octene",
                "trans-2-octene"
            ],
            "true_referents": [
                "(E)-2-octene",
                "(Z)-2-octene",
                "1-decene",
                "1-dodecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "2-octene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-methyleneheptane",
                "3-octene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "butene",
                "cis-3-hexene",
                "cis-alkene",
                "cis-but-2-ene",
                "ethene",
                "octene",
                "propene",
                "trans-3-hexene",
                "trans-but-2-ene"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "1-Undecene (C11H22)",
            "1-Dodecene (C12H24)",
            "1-Tridecene (C13H26)",
            "1-Tetradecene (C14H28)",
            "1-Pentadecene (C15H30)",
            "1-Hexadecene (C16H32)",
            "1-Heptadecene (C17H34)",
            "1-Octadecene (C18H36)",
            "1-Nonadecene (C19H38)",
            "1-Eicosene (C20H40)",
            "1-Docosene (C22H44)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "1-hexene",
                "3-hexene",
                "1-octene",
                "1-decene",
                "1-undecene",
                "1-dodecene",
                "1-tridecene",
                "1-tetradecene",
                "1-pentadecene",
                "1-hexadecene",
                "1-heptadecene",
                "1-docosene"
            ],
            "mismatches": [
                "1-butene",
                "2-butene",
                "1-pentene",
                "2-pentene",
                "2-hexene",
                "1-heptene",
                "1-nonene",
                "1-octadecene",
                "1-nonadecene",
                "1-eicosene"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "ethene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "1-Undecene (C11H22)",
            "1-Dodecene (C12H24)",
            "1-Tridecene (C13H26)",
            "1-Tetradecene (C14H28)",
            "1-Pentadecene (C15H30)",
            "1-Hexadecene (C16H32)",
            "1-Heptadecene (C17H34)",
            "1-Octadecene (C18H36)",
            "1-Nonadecene (C19H38)",
            "1-Eicosene (C20H40)",
            "1-Heneicosene (C21H42)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "2-Butene (C4H8)",
                "3-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)",
                "1-Undecene (C11H22)",
                "1-Dodecene (C12H24)",
                "1-Pentadecene (C15H30)",
                "1-Hexadecene (C16H32)",
                "1-Heptadecene (C17H34)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "2-Hexene (C6H12)",
                "1-Heptene (C7H14)",
                "1-Tridecene (C13H26)",
                "1-Tetradecene (C14H28)",
                "1-Octadecene (C18H36)",
                "1-Nonadecene (C19H38)",
                "1-Eicosene (C20H40)",
                "1-Heneicosene (C21H42)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 12,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Heneicosene (C21H42)",
            "Docosene (C22H44)",
            "Tricosene (C23H46)",
            "Tetracosene (C24H48)",
            "Pentacosene (C25H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "butene",
                "octene",
                "tricosene"
            ],
            "mismatches": [
                "pentene",
                "hexene",
                "heptene",
                "nonene",
                "decene",
                "undecene",
                "dodecene",
                "tridecene",
                "tetradecene",
                "pentadecene",
                "hexadecene",
                "heptadecene",
                "octadecene",
                "nonadecene",
                "eicosene",
                "heneicosene",
                "docosene",
                "tetracosene",
                "pentacosene"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "1-Butene (1-C4H8)",
            "2-Butene (2-C4H8)",
            "Isobutylene (i-C4H8)",
            "1-Pentene (1-C5H10)",
            "2-Pentene (2-C5H10)",
            "1-Hexene (1-C6H12)",
            "2-Hexene (2-C6H12)",
            "1-Heptene (1-C7H14)",
            "2-Heptene (2-C7H14)",
            "1-Octene (1-C8H16)",
            "2-Octene (2-C8H16)",
            "1-Nonene (1-C9H18)",
            "2-Nonene (2-C9H18)",
            "1-Decene (1-C10H20)",
            "2-Decene (2-C10H20)",
            "1-Undecene (1-C11H22)",
            "2-Undecene (2-C11H22)",
            "1-Dodecene (1-C12H24)",
            "2-Dodecene (2-C12H24)",
            "1-Tridecene (1-C13H26)",
            "2-Tridecene (2-C13H26)",
            "1-Tetradecene (1-C14H28)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "1-Hexene (1-C6H12)",
                "1-Octene (1-C8H16)",
                "1-Nonene (1-C9H18)",
                "1-Decene (1-C10H20)",
                "1-Undecene (1-C11H22)",
                "1-Dodecene (1-C12H24)",
                "1-Tetradecene (1-C14H28)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "1-Butene (1-C4H8)",
                "2-Butene (2-C4H8)",
                "Isobutylene (i-C4H8)",
                "1-Pentene (1-C5H10)",
                "2-Pentene (2-C5H10)",
                "2-Hexene (2-C6H12)",
                "1-Heptene (1-C7H14)",
                "2-Heptene (2-C7H14)",
                "2-Octene (2-C8H16)",
                "2-Nonene (2-C9H18)",
                "2-Decene (2-C10H20)",
                "2-Undecene (2-C11H22)",
                "2-Dodecene (2-C12H24)",
                "1-Tridecene (1-C13H26)",
                "2-Tridecene (2-C13H26)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(Z)-2-octene",
                "1-decene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "2-octene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "octene",
                "propene",
                "trans-but-2-ene"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "But-1-ene (C4H8)",
            "Isobutene (C4H8)",
            "2-Methyl-1-butene (C5H10)",
            "Hex-1-ene (C6H12)",
            "Hept-1-ene (C7H14)",
            "Oct-1-ene (C8H16)",
            "Non-1-ene (C9H18)",
            "Dec-1-ene (C10H20)",
            "Undec-1-ene (C11H22)",
            "Dodec-1-ene (C12H24)",
            "Tridec-1-ene (C13H26)",
            "Tetradec-1-ene (C14H28)",
            "Pentadec-1-ene (C15H30)",
            "Hexadec-1-ene (C16H32)",
            "Heptadec-1-ene (C17H34)",
            "Octadec-1-ene (C18H36)",
            "Nonadec-1-ene (C19H38)",
            "Icos-1-ene (C20H40)",
            "Heneicos-1-ene (C21H42)",
            "Docos-1-ene (C22H44)",
            "Tricos-1-ene (C23H46)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "But-1-ene (C4H8)",
                "Hexadec-1-ene (C16H32)",
                "Heptadec-1-ene (C17H34)",
                "Nonadec-1-ene (C19H38)",
                "Docos-1-ene (C22H44)",
                "Tricos-1-ene (C23H46)"
            ],
            "mismatches": [
                "Isobutene (C4H8)",
                "2-Methyl-1-butene (C5H10)",
                "Hex-1-ene (C6H12)",
                "Hept-1-ene (C7H14)",
                "Oct-1-ene (C8H16)",
                "Non-1-ene (C9H18)",
                "Dec-1-ene (C10H20)",
                "Undec-1-ene (C11H22)",
                "Dodec-1-ene (C12H24)",
                "Tridec-1-ene (C13H26)",
                "Tetradec-1-ene (C14H28)",
                "Pentadec-1-ene (C15H30)",
                "Octadec-1-ene (C18H36)",
                "Icos-1-ene (C20H40)",
                "Heneicos-1-ene (C21H42)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(3E)-2,6-dimethyloct-3-ene",
                "(4E)-7-methyldec-4-ene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-3-dodecene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "4,5-dimethyl-1-hexene",
                "4-ethyl-1-hexene",
                "5-methyldec-1-ene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-but-2-ene",
                "cis-octadec-7-ene",
                "dec-9-ene-1,2-diol",
                "ethene",
                "nonacos-1-ene",
                "nonadec-1-ene",
                "octadec-1-ene",
                "octadec-2-ene",
                "octadec-7-ene",
                "octadec-9-ene",
                "pentacos-1-ene",
                "propene",
                "trans-octadec-7-ene",
                "tricosene"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "1-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "1-Undecene (C11H22)",
            "1-Dodecene (C12H24)",
            "1-Tridecene (C13H26)",
            "1-Tetradecene (C14H28)",
            "1-Pentadecene (C15H30)",
            "1-Hexadecene (C16H32)",
            "1-Heptadecene (C17H34)",
            "1-Octadecene (C18H36)",
            "1-Nonadecene (C19H38)",
            "1-Icosene (C20H40)",
            "1-Heneicosene (C21H42)",
            "1-Docosene (C22H44)",
            "1-Tricosene (C23H46)",
            "1-Tetracosene (C24H48)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "1-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)",
                "1-Undecene (C11H22)",
                "1-Dodecene (C12H24)",
                "1-Tetradecene (C14H28)",
                "1-Pentadecene (C15H30)",
                "1-Hexadecene (C16H32)",
                "1-Heptadecene (C17H34)",
                "1-Docosene (C22H44)"
            ],
            "mismatches": [
                "1-Pentene (C5H10)",
                "1-Heptene (C7H14)",
                "1-Tridecene (C13H26)",
                "1-Octadecene (C18H36)",
                "1-Nonadecene (C19H38)",
                "1-Icosene (C20H40)",
                "1-Heneicosene (C21H42)",
                "1-Tricosene (C23H46)",
                "1-Tetracosene (C24H48)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 14,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "ethene",
                "Canonical name": "ethene",
                "Abbreviated identifier": "C2H4"
            },
            {
                "Referent": "propene",
                "Canonical name": "propene",
                "Abbreviated identifier": "C3H6"
            },
            {
                "Referent": "butene",
                "Canonical name": "butene",
                "Abbreviated identifier": "C4H8"
            },
            {
                "Referent": "pentene",
                "Canonical name": "pentene",
                "Abbreviated identifier": "C5H10"
            },
            {
                "Referent": "hexene",
                "Canonical name": "hexene",
                "Abbreviated identifier": "C6H12"
            },
            {
                "Referent": "heptene",
                "Canonical name": "heptene",
                "Abbreviated identifier": "C7H14"
            },
            {
                "Referent": "octene",
                "Canonical name": "octene",
                "Abbreviated identifier": "C8H16"
            },
            {
                "Referent": "nonene",
                "Canonical name": "nonene",
                "Abbreviated identifier": "C9H18"
            },
            {
                "Referent": "decene",
                "Canonical name": "decene",
                "Abbreviated identifier": "C10H20"
            },
            {
                "Referent": "undecene",
                "Canonical name": "undecene",
                "Abbreviated identifier": "C11H22"
            },
            {
                "Referent": "dodecene",
                "Canonical name": "dodecene",
                "Abbreviated identifier": "C12H24"
            },
            {
                "Referent": "tridecene",
                "Canonical name": "tridecene",
                "Abbreviated identifier": "C13H26"
            },
            {
                "Referent": "tetradecene",
                "Canonical name": "tetradecene",
                "Abbreviated identifier": "C14H28"
            },
            {
                "Referent": "pentadecene",
                "Canonical name": "pentadecene",
                "Abbreviated identifier": "C15H30"
            },
            {
                "Referent": "hexadecene",
                "Canonical name": "hexadecene",
                "Abbreviated identifier": "C16H32"
            },
            {
                "Referent": "heptadecene",
                "Canonical name": "heptadecene",
                "Abbreviated identifier": "C17H34"
            },
            {
                "Referent": "octadecene",
                "Canonical name": "octadecene",
                "Abbreviated identifier": "C18H36"
            },
            {
                "Referent": "nonadecene",
                "Canonical name": "nonadecene",
                "Abbreviated identifier": "C19H38"
            },
            {
                "Referent": "eicosene",
                "Canonical name": "eicosene",
                "Abbreviated identifier": "C20H40"
            },
            {
                "Referent": "docosene",
                "Canonical name": "docosene",
                "Abbreviated identifier": "C22H44"
            },
            {
                "Referent": "tetracosene",
                "Canonical name": "tetracosene",
                "Abbreviated identifier": "C24H48"
            },
            {
                "Referent": "pentacosene",
                "Canonical name": "pentacosene",
                "Abbreviated identifier": "C26H50"
            },
            {
                "Referent": "hexacosene",
                "Canonical name": "hexacosene",
                "Abbreviated identifier": "C28H52"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "butene",
                "octene",
                "octadecene"
            ],
            "mismatches": [
                "pentene",
                "hexene",
                "heptene",
                "nonene",
                "decene",
                "undecene",
                "dodecene",
                "tridecene",
                "tetradecene",
                "pentadecene",
                "hexadecene",
                "heptadecene",
                "nonadecene",
                "eicosene",
                "docosene",
                "tetracosene",
                "pentacosene",
                "hexacosene"
            ],
            "true_referents": [
                "(10Z)-tricosene",
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "3-hexene",
                "butene",
                "ethene",
                "octadecene",
                "octene",
                "propene"
            ],
            "TP": 5,
            "FP": 18,
            "FN": 0
        }
    }
]