[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Isobutene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Octadecene (C18H36)",
            "Ethylenediamine (C2H8N2)",
            "Vinyl acetate (C4H6O2)",
            "Vinyl chloride (C2H3Cl)",
            "Vinyl fluoride (C2H3F)",
            "Vinylidene chloride (C2H2Cl2)",
            "2-Butene (C4H8)",
            "3-Hexene (C6H12)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)",
                "3-Hexene (C6H12)"
            ],
            "mismatches": [
                "Isobutene (C4H8)",
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Octadecene (C18H36)",
                "Ethylenediamine (C2H8N2)",
                "Vinyl acetate (C4H6O2)",
                "Vinyl chloride (C2H3Cl)",
                "Vinyl fluoride (C2H3F)",
                "Vinylidene chloride (C2H2Cl2)",
                "2-Butene (C4H8)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(Z)-1,2-ethenediol",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "ethene",
                "octene",
                "propene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "cis-2-Butene (C4H8)",
            "trans-2-Butene (C4H8)",
            "1-Pentene (C5H10)",
            "cis-2-Pentene (C5H10)",
            "trans-2-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "cis-3-Hexene (C6H12)",
            "trans-3-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "cis-2-Heptene (C7H14)",
            "trans-2-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "cis-2-Octene (C8H16)",
            "trans-2-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "cis-3-Nonene (C9H18)",
            "trans-3-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "cis-2-Decene (C10H20)",
            "trans-2-Decene (C10H20)",
            "Chloroethene (Vinyl chloride) (C2H3Cl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "cis-2-Butene (C4H8)",
                "trans-2-Butene (C4H8)",
                "1-Hexene (C6H12)",
                "cis-3-Hexene (C6H12)",
                "trans-3-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "cis-2-Octene (C8H16)",
                "trans-2-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)"
            ],
            "mismatches": [
                "1-Pentene (C5H10)",
                "cis-2-Pentene (C5H10)",
                "trans-2-Pentene (C5H10)",
                "1-Heptene (C7H14)",
                "cis-2-Heptene (C7H14)",
                "trans-2-Heptene (C7H14)",
                "cis-3-Nonene (C9H18)",
                "trans-3-Nonene (C9H18)",
                "cis-2-Decene (C10H20)",
                "trans-2-Decene (C10H20)",
                "Chloroethene (Vinyl chloride) (C2H3Cl)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(Z)-2-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-octene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "butene",
                "cis-3-hexene",
                "cis-alkene",
                "cis-but-2-ene",
                "cis-octadec-9-ene",
                "ethene",
                "octene",
                "propene",
                "trans-3-hexene",
                "trans-but-2-ene",
                "trans-octadec-9-ene"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "Isobutene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "2-Methyl-1-butene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "Cyclopentene (C5H8)",
            "Cyclohexene (C6H10)",
            "1-Heptene (C7H14)",
            "2-Heptene (C7H14)",
            "3-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "2-Octene (C8H16)",
            "3-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "2-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "2-Decene (C10H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "2-Butene (C4H8)",
                "1-Hexene (C6H12)",
                "3-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "2-Octene (C8H16)",
                "3-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)"
            ],
            "mismatches": [
                "Isobutene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "2-Methyl-1-butene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "2-Hexene (C6H12)",
                "Cyclopentene (C5H8)",
                "Cyclohexene (C6H10)",
                "1-Heptene (C7H14)",
                "2-Heptene (C7H14)",
                "3-Heptene (C7H14)",
                "2-Nonene (C9H18)",
                "2-Decene (C10H20)"
            ],
            "true_referents": [
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "2-octene",
                "3-hexene",
                "3-methyleneheptane",
                "3-octene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "ethene",
                "octene",
                "propene"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (Ethylene)",
            "Propene (Propylene)",
            "But-1-ene",
            "But-2-ene",
            "Isoprene (2-Methyl-1,3-butadiene)",
            "Pent-1-ene",
            "Pent-2-ene",
            "2-Methyl-1-butene",
            "3-Methyl-1-butene",
            "Hex-1-ene",
            "Cyclopentene",
            "Cyclohexene",
            "Oct-1-ene",
            "Dec-1-ene",
            "Dodecene",
            "Tetradecene",
            "Linoleic acid (Octadeca-9,12-dienoic acid)",
            "Oleic acid (cis-9-Octadecenoic acid)",
            "Palmitoleic acid (9-Octadecenoic acid)",
            "Vitamin A (Retinol)",
            "Beta-carotene",
            "Styrene (Vinylbenzene)",
            "Vinyl chloride",
            "Methyl methacrylate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (Ethylene)",
                "Propene (Propylene)",
                "But-1-ene",
                "But-2-ene",
                "Dodecene"
            ],
            "mismatches": [
                "Isoprene (2-Methyl-1,3-butadiene)",
                "Pent-1-ene",
                "Pent-2-ene",
                "2-Methyl-1-butene",
                "3-Methyl-1-butene",
                "Hex-1-ene",
                "Cyclopentene",
                "Cyclohexene",
                "Oct-1-ene",
                "Dec-1-ene",
                "Tetradecene",
                "Linoleic acid (Octadeca-9,12-dienoic acid)",
                "Oleic acid (cis-9-Octadecenoic acid)",
                "Palmitoleic acid (9-Octadecenoic acid)",
                "Vitamin A (Retinol)",
                "Beta-carotene",
                "Styrene (Vinylbenzene)",
                "Vinyl chloride",
                "Methyl methacrylate"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-pentadecene",
                "1-tetradecene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-methyleneheptane",
                "4,5-dimethyl-1-hexene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-alkene",
                "cis-octadec-9-ene",
                "dec-9-ene-1,2-diol",
                "dodecene",
                "ethene",
                "octadec-1-ene",
                "octadec-9-ene",
                "octadecene",
                "octene",
                "pentacos-1-ene",
                "propene",
                "trans-but-2-ene",
                "trans-octadec-9-ene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Cyclohexene (C6H10)",
            "Cyclopentene (C5H8)",
            "Cyclooctene (C8H14)",
            "1,3-Butadiene (C4H6)",
            "Isoprene (C5H8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Butene (C4H8)",
                "Octene (C8H16)",
                "Propylene (C3H6)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Cyclohexene (C6H10)",
                "Cyclopentene (C5H8)",
                "Cyclooctene (C8H14)",
                "1,3-Butadiene (C4H6)",
                "Isoprene (C5H8)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "nonadec-1-ene",
                "octene",
                "propene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butylene (C4H8)",
            "Isobutylene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "1-Heptene (C7H14)",
            "1-Octene (C8H16)",
            "1-Nonene (C9H18)",
            "1-Decene (C10H20)",
            "Cyclohexene (C6H10)",
            "Bicyclo[2.2.0]hexene (C6H10)",
            "Vinyl chloride (C2H3Cl)",
            "Styrene (C8H8)",
            "1,3-Butadiene (C4H6)",
            "1,5-Hexadiene (C6H10)",
            "Cyclopentene (C5H8)",
            "2-Methyl-1-propene (C4H8)",
            "3-Hexene (C6H12)",
            "Allyl chloride (C3H5Cl)",
            "2-Pentene (C5H10)",
            "Trans-2-Butene (C4H8)",
            "Cis-2-Butene (C4H8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Propylene (C3H6)",
                "1-Hexene (C6H12)",
                "1-Octene (C8H16)",
                "1-Nonene (C9H18)",
                "1-Decene (C10H20)",
                "3-Hexene (C6H12)",
                "2-Methyl-1-propene (C4H8)",
                "Trans-2-Butene (C4H8)",
                "Cis-2-Butene (C4H8)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Butylene (C4H8)",
                "Isobutylene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "1-Heptene (C7H14)",
                "Cyclohexene (C6H10)",
                "Bicyclo[2.2.0]hexene (C6H10)",
                "Vinyl chloride (C2H3Cl)",
                "Styrene (C8H8)",
                "1,3-Butadiene (C4H6)",
                "1,5-Hexadiene (C6H10)",
                "Cyclopentene (C5H8)",
                "Allyl chloride (C3H5Cl)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "1-decene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylprop-1-ene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "cis-but-2-ene",
                "octene",
                "propene",
                "trans-but-2-ene"
            ],
            "TP": 9,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Heneicosene (C21H42)",
            "Docosene (C22H44)",
            "Tricosene (C23H46)",
            "Tetracosene (C24H48)",
            "Pentacosene (C25H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "propene",
                "butene",
                "octene",
                "tricosene"
            ],
            "mismatches": [
                "pentene",
                "hexene",
                "heptene",
                "nonene",
                "decene",
                "undecene",
                "dodecene",
                "tridecene",
                "tetradecene",
                "pentadecene",
                "hexadecene",
                "heptadecene",
                "octadecene",
                "nonadecene",
                "eicosene",
                "heneicosene",
                "docosene",
                "tetracosene",
                "pentacosene"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Docosene (C22H44)",
            "Tetracosene (C24H48)",
            "Hexacosene (C26H52)",
            "Octacosene (C28H56)",
            "Triacontene (C30H60)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)",
                "Docosene (C22H44)"
            ],
            "mismatches": [
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Tetracosene (C24H48)",
                "Hexacosene (C26H52)",
                "Octacosene (C28H56)",
                "Triacontene (C30H60)"
            ],
            "true_referents": [
                "(10Z)-tricosene",
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octadec-1-ene",
                "octene",
                "propene",
                "trans-octadec-7-ene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (ethene)",
            "Propylene (propene)",
            "1-Butene",
            "cis-2-Butene",
            "trans-2-Butene",
            "1-Pentene",
            "cis-2-Pentene",
            "trans-2-Pentene",
            "1-Hexene",
            "cis-3-Hexene",
            "trans-3-Hexene",
            "1-Heptene",
            "1-Octene",
            "1-Nonene",
            "Styrene (vinylbenzene)",
            "Cyclohexene",
            "Isobutylene (2-methylpropene)",
            "2-Methyl-1-propene",
            "2-Ethyl-1-butene",
            "2,3-Dimethyl-2-butene",
            "2,4-Dimethyl-2-pentene",
            "3-Methyl-1-butene",
            "4-Methyl-1-pentene",
            "2-Butyl-2-butene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethylene (ethene)",
                "Propylene (propene)",
                "1-Butene",
                "cis-2-Butene",
                "trans-2-Butene",
                "1-Hexene",
                "cis-3-Hexene",
                "trans-3-Hexene",
                "1-Octene",
                "1-Nonene",
                "3-Methyl-1-butene"
            ],
            "mismatches": [
                "1-Pentene",
                "cis-2-Pentene",
                "trans-2-Pentene",
                "1-Heptene",
                "Styrene (vinylbenzene)",
                "Cyclohexene",
                "Isobutylene (2-methylpropene)",
                "2-Methyl-1-propene",
                "2-Ethyl-1-butene",
                "2,3-Dimethyl-2-butene",
                "2,4-Dimethyl-2-pentene",
                "4-Methyl-1-pentene",
                "2-Butyl-2-butene"
            ],
            "true_referents": [
                "1-decene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "2-methylprop-1-ene",
                "3-hexene",
                "3-methylbut-1-ene",
                "3-octene",
                "4,5-dimethyl-1-hexene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "cis-alkene",
                "cis-but-2-ene",
                "ethene",
                "octene",
                "propene",
                "trans-3-hexene",
                "trans-but-2-ene"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "1-Butene",
            "2-Butene",
            "Isobutylene",
            "1-Pentene",
            "2-Pentene",
            "1-Hexene",
            "Cyclohexene",
            "Styrene",
            "Isoprene",
            "Myrcene",
            "Limonene",
            "\u03b1-Pinene",
            "\u03b2-Pinene",
            "Camphene",
            "Anethole",
            "Eugenol",
            "Ocimene",
            "Geraniol",
            "Linalool",
            "Citral",
            "Squalene",
            "Lycopene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "1-Butene",
                "2-Butene",
                "1-Hexene"
            ],
            "mismatches": [
                "Isobutylene",
                "1-Pentene",
                "2-Pentene",
                "Cyclohexene",
                "Styrene",
                "Isoprene",
                "Myrcene",
                "Limonene",
                "\u03b1-Pinene",
                "\u03b2-Pinene",
                "Camphene",
                "Anethole",
                "Eugenol",
                "Ocimene",
                "Geraniol",
                "Linalool",
                "Citral",
                "Squalene",
                "Lycopene"
            ],
            "true_referents": [
                "(7E)-tricosene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-hexene",
                "1-nonene",
                "1-pentadecene",
                "1-undecene",
                "2-methyl-1-pentene",
                "2-octene",
                "3-hexene",
                "3-methylbut-1-ene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-alkene",
                "dodecene",
                "ethene",
                "propene",
                "trans-but-2-ene",
                "tricosene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propene (C3H6)",
            "1-Butene (C4H8)",
            "2-Butene (C4H8)",
            "Isobutylene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Pentene (C5H10)",
            "2-Methyl-1-butene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "3-Methyl-1-butene (C5H10)",
            "1-Hexene (C6H12)",
            "2-Hexene (C6H12)",
            "3-Hexene (C6H12)",
            "2-Methyl-1-pentene (C6H12)",
            "3-Methyl-1-pentene (C6H12)",
            "4-Methyl-1-pentene (C6H12)",
            "2-Methyl-2-pentene (C6H12)",
            "3-Methyl-2-pentene (C6H12)",
            "4-Methyl-2-pentene (C6H12)",
            "2,3-Dimethyl-1-butene (C6H12)",
            "2,3-Dimethyl-2-butene (C6H12)",
            "3,3-Dimethyl-1-butene (C6H12)",
            "1-Heptene (C7H14)",
            "1-Octene (C8H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Propene (C3H6)",
                "1-Butene (C4H8)",
                "2-Butene (C4H8)",
                "3-Hexene (C6H12)",
                "2-Methyl-1-pentene (C6H12)",
                "1-Hexene (C6H12)",
                "1-Octene (C8H16)"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Isobutylene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Pentene (C5H10)",
                "2-Methyl-1-butene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "3-Methyl-1-butene (C5H10)",
                "3-Methyl-1-pentene (C6H12)",
                "4-Methyl-1-pentene (C6H12)",
                "2-Methyl-2-pentene (C6H12)",
                "3-Methyl-2-pentene (C6H12)",
                "4-Methyl-2-pentene (C6H12)",
                "2,3-Dimethyl-1-butene (C6H12)",
                "2,3-Dimethyl-2-butene (C6H12)",
                "3,3-Dimethyl-1-butene (C6H12)",
                "1-Heptene (C7H14)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-octene",
                "1-heptadecene",
                "1-hexene",
                "1-octene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "3-hexene",
                "3-methylbut-1-ene",
                "4,5-dimethyl-1-hexene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "cis-3-hexene",
                "octene",
                "propene"
            ],
            "TP": 7,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Isobutene (C4H8)",
            "1-Pentene (C5H10)",
            "2-Methyl-1-butene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "Hexene (C6H12)",
            "Cyclohexene (C6H10)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Myrcene (C10H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)"
            ],
            "mismatches": [
                "Isobutene (C4H8)",
                "1-Pentene (C5H10)",
                "2-Methyl-1-butene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "Hexene (C6H12)",
                "Cyclohexene (C6H10)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Myrcene (C10H16)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "2-methylbut-1-ene",
                "2-methylbut-2-ene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octene",
                "propene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butylene (C4H8)",
            "Isobutylene (C4H8)",
            "Amylene (C5H10)",
            "Isoamylene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H32)",
            "Heptadecene (C17H34)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Heneicosene (C21H42)",
            "Docosene (C22H44)",
            "Tricosene (C23H46)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Octene",
                "Docosene",
                "Tricosene"
            ],
            "mismatches": [
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "Butylene (C4H8)",
                "Isobutylene (C4H8)",
                "Amylene (C5H10)",
                "Isoamylene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H32)",
                "Heptadecene (C17H34)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Heneicosene (C21H42)"
            ],
            "true_referents": [
                "(10Z)-tricosene",
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "7-tricosene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "nonadec-1-ene",
                "octene",
                "propene",
                "tricosene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Undecene (C11H22)",
            "Dodecene (C12H24)",
            "Tridecene (C13H26)",
            "Tetradecene (C14H28)",
            "Pentadecene (C15H30)",
            "Hexadecene (C16H34)",
            "Heptadecene (C17H36)",
            "Octadecene (C18H36)",
            "Nonadecene (C19H38)",
            "Eicosene (C20H40)",
            "Docosene (C22H44)",
            "Tetracosene (C24H48)",
            "Hexacosene (C26H52)",
            "Octacosene (C28H56)",
            "Triacontene (C30H62)",
            "Tetracontene (C34H70)",
            "Hexacontene (C36H74)",
            "Octacontene (C40H82)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Octene (C8H16)"
            ],
            "mismatches": [
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Undecene (C11H22)",
                "Dodecene (C12H24)",
                "Tridecene (C13H26)",
                "Tetradecene (C14H28)",
                "Pentadecene (C15H30)",
                "Hexadecene (C16H34)",
                "Heptadecene (C17H36)",
                "Octadecene (C18H36)",
                "Nonadecene (C19H38)",
                "Eicosene (C20H40)",
                "Docosene (C22H44)",
                "Tetracosene (C24H48)",
                "Hexacosene (C26H52)",
                "Octacosene (C28H56)",
                "Triacontene (C30H62)",
                "Tetracontene (C34H70)",
                "Hexacontene (C36H74)",
                "Octacontene (C40H82)"
            ],
            "true_referents": [
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(9Z)-tricosene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "1,2-epoxy-9-decene",
                "1-decene",
                "1-docosene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "3-hexene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "8-methyl-1-undecene",
                "but-1-ene",
                "butene",
                "ethene",
                "nonadec-1-ene",
                "octadec-1-ene",
                "octene",
                "propene",
                "trans-octadec-7-ene",
                "tricosene"
            ],
            "TP": 4,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "But-1-ene (C4H8)",
            "But-2-ene (C4H8)",
            "Pent-1-ene (C5H10)",
            "Pent-2-ene (C5H10)",
            "Hex-1-ene (C6H12)",
            "Hept-1-ene (C7H14)",
            "Oct-1-ene (C8H16)",
            "Non-1-ene (C9H18)",
            "Dec-1-ene (C10H20)",
            "Dodec-1-ene (C12H24)",
            "Camphene (C10H16)",
            "Pinene (C10H16)",
            "Limone (C10H16)",
            "Myrcene (C10H16)",
            "Farnesene (C15H24)",
            "Bisabolene (C15H24)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Norbornene (C7H10)",
            "Dicyclopentadiene (C10H12)",
            "Cyclohexene (C6H10)",
            "Cycloheptene (C7H12)",
            "Cyclooctene (C8H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethene (C2H4)",
                "Propene (C3H6)",
                "But-1-ene (C4H8)",
                "But-2-ene (C4H8)"
            ],
            "mismatches": [
                "Pent-1-ene (C5H10)",
                "Pent-2-ene (C5H10)",
                "Hex-1-ene (C6H12)",
                "Hept-1-ene (C7H14)",
                "Oct-1-ene (C8H16)",
                "Non-1-ene (C9H18)",
                "Dec-1-ene (C10H20)",
                "Dodec-1-ene (C12H24)",
                "Camphene (C10H16)",
                "Pinene (C10H16)",
                "Limone (C10H16)",
                "Myrcene (C10H16)",
                "Farnesene (C15H24)",
                "Bisabolene (C15H24)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Norbornene (C7H10)",
                "Dicyclopentadiene (C10H12)",
                "Cyclohexene (C6H10)",
                "Cycloheptene (C7H12)",
                "Cyclooctene (C8H14)"
            ],
            "true_referents": [
                "(3E)-2,6-dimethyloct-3-ene",
                "(E)-2-dodecene",
                "(E)-2-octene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "1-decene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-pentadecene",
                "2,4-dimethyl-1-heptene",
                "2-dodecene",
                "2-methyl-1-pentene",
                "4,5-dimethyl-1-hexene",
                "4,6,8-trimethyl-1-nonene",
                "4-ethyl-1-hexene",
                "but-1-ene",
                "but-2-ene",
                "butene",
                "dec-9-ene-1,2-diol",
                "ethene",
                "nonacos-1-ene",
                "nonadec-1-ene",
                "octadec-1-ene",
                "octadec-9-ene",
                "propene",
                "trans-but-2-ene"
            ],
            "TP": 4,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "alkene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Ethylene",
                "Canonical Name": "Ethene"
            },
            {
                "Referent": "Propylene",
                "Canonical Name": "Propene"
            },
            {
                "Referent": "Butylene",
                "Canonical Name": "Butene"
            },
            {
                "Referent": "Pentylene",
                "Canonical Name": "Pentene"
            },
            {
                "Referent": "Hexylene",
                "Canonical Name": "Hexene"
            },
            {
                "Referent": "Heptylene",
                "Canonical Name": "Heptene"
            },
            {
                "Referent": "Octylene",
                "Canonical Name": "Octene"
            },
            {
                "Referent": "Nonylene",
                "Canonical Name": "Nonene"
            },
            {
                "Referent": "Decylene",
                "Canonical Name": "Decene"
            },
            {
                "Referent": "Undecylene",
                "Canonical Name": "Undecene"
            },
            {
                "Referent": "Dodecylene",
                "Canonical Name": "Dodecene"
            },
            {
                "Referent": "Tridecylene",
                "Canonical Name": "Tridecene"
            },
            {
                "Referent": "Tetradecylene",
                "Canonical Name": "Tetradecene"
            },
            {
                "Referent": "Pentadecylene",
                "Canonical Name": "Pentadecene"
            },
            {
                "Referent": "Hexadecylene",
                "Canonical Name": "Hexadecene"
            },
            {
                "Referent": "Heptadecylene",
                "Canonical Name": "Heptadecene"
            },
            {
                "Referent": "Octadecylene",
                "Canonical Name": "Octadecene"
            },
            {
                "Referent": "Nonadecylene",
                "Canonical Name": "Nonadecene"
            },
            {
                "Referent": "Eicosylene",
                "Canonical Name": "Eicosene"
            },
            {
                "Referent": "Heneicosylene",
                "Canonical Name": "Heneicosene"
            },
            {
                "Referent": "Docosylene",
                "Canonical Name": "Docosene"
            },
            {
                "Referent": "Tricosylene",
                "Canonical Name": "Tricosene"
            },
            {
                "Referent": "Tetracosylene",
                "Canonical Name": "Tetracosene"
            },
            {
                "Referent": "Pentacosylene",
                "Canonical Name": "Pentacosene"
            },
            {
                "Referent": "Hexacosylene",
                "Canonical Name": "Hexacosene"
            },
            {
                "Referent": "Heptacosylene",
                "Canonical Name": "Heptacosene"
            },
            {
                "Referent": "Octacosylene",
                "Canonical Name": "Octacosene"
            },
            {
                "Referent": "Nonacosylene",
                "Canonical Name": "Nonacosene"
            },
            {
                "Referent": "Icosylene",
                "Canonical Name": "Icosene"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Propene",
                "Butene",
                "Octene"
            ],
            "mismatches": [
                "Ethene",
                "Pentene",
                "Hexene",
                "Heptene",
                "Nonene",
                "Decene",
                "Undecene",
                "Dodecene",
                "Tridecene",
                "Tetradecene",
                "Pentadecene",
                "Hexadecene",
                "Heptadecene",
                "Octadecene",
                "Nonadecene",
                "Eicosene",
                "Heneicosene",
                "Docosene",
                "Tricosene",
                "Tetracosene",
                "Pentacosene",
                "Hexacosene",
                "Heptacosene",
                "Octacosene",
                "Nonacosene",
                "Icosene"
            ],
            "true_referents": [
                "(10Z)-tricosene",
                "(2E)-2-tridecene",
                "(7E)-tricosene",
                "(7Z)-tricosene",
                "(9Z)-tricosene",
                "(E)-3-dodecene",
                "(E)-3-octene",
                "(Z)-3-dodecene",
                "1-decene",
                "1-docosene",
                "1-dodecene",
                "1-heptadecene",
                "1-hexadecene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "1-pentadecene",
                "1-tetradecene",
                "1-undecene",
                "2,4-dimethyl-1-heptene",
                "3-hexene",
                "butene",
                "cis-alkene",
                "octene",
                "propene"
            ],
            "TP": 3,
            "FP": 26,
            "FN": 0
        }
    }
]