[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "methane (CH4)",
            "ethane (C2H6)",
            "propane (C3H8)",
            "butane (C4H10)",
            "pentane (C5H12)",
            "hexane (C6H14)",
            "heptane (C7H16)",
            "octane (C8H18)",
            "nonane (C9H20)",
            "decane (C10H22)",
            "benzene (C6H6)",
            "toluene (C7H8)",
            "xylene (C8H10)",
            "ethyne (acetylene, C2H2)",
            "propyne (C3H4)",
            "butyne (C4H6)",
            "pentadiene (C5H8)",
            "hexatriene (C6H10)",
            "cyclohexane (C6H12)",
            "cyclooctane (C8H16)",
            "cyclopentane (C5H10)",
            "cyclopropane (C3H6)",
            "stearic acid (octadecanoic acid, C18H36O2)",
            "palmitic acid (hexadecanoic acid, C16H32O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane (CH4)",
                "ethane (C2H6)",
                "pentane (C5H12)",
                "hexane (C6H14)",
                "heptane (C7H16)",
                "octane (C8H18)",
                "nonane (C9H20)",
                "decane (C10H22)",
                "benzene (C6H6)",
                "toluene (C7H8)",
                "xylene (C8H10)",
                "propyne (C3H4)",
                "cyclohexane (C6H12)",
                "cyclooctane (C8H16)",
                "cyclopentane (C5H10)",
                "cyclopropane (C3H6)"
            ],
            "mismatches": [
                "propane (C3H8)",
                "butane (C4H10)",
                "ethyne (acetylene, C2H2)",
                "butyne (C4H6)",
                "pentadiene (C5H8)",
                "hexatriene (C6H10)",
                "stearic acid (octadecanoic acid, C18H36O2)",
                "palmitic acid (hexadecanoic acid, C16H32O2)"
            ],
            "true_referents": [
                "(E)-1,3-pentadiene",
                "(E)-7-Pentadecene",
                "(Z)-1,3,5-Hexatriene",
                "1,2,4-trimethyl-Cyclopentane",
                "1,3,5-Heptatriene",
                "1,6-diphenylhexatriene",
                "1-hexene",
                "1-pentadecene",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-1-butyne",
                "5,6-dimethyl-Decane",
                "6-phenyldodecane",
                "Cyclooctane",
                "Ethylcyclopentane",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acetylenes",
                "alkane",
                "benzene",
                "benzene-d6",
                "buta-1,3-diyne",
                "butane",
                "butene",
                "cyclohexane",
                "cyclohexene",
                "cyclooctatriene",
                "cyclooctene",
                "cyclopentane",
                "cyclopropane",
                "cyclopropene",
                "decane",
                "ethane",
                "ethene",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "nonane",
                "o-xylene",
                "octadecanedioic acid",
                "octane",
                "p-xylene",
                "pentane",
                "propene",
                "propyne",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylene (C2H4)",
            "Acetylene (C2H2)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Cyclohexane (C6H12)",
            "Cyclopentane (C5H10)",
            "Styrene (C8H8)",
            "Isoprene (C5H8)",
            "Chloroform (CHCl3)",
            "Benzaldehyde (C7H6O)",
            "Toluene diisocyanate (C9H6N2O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Acetylene (C2H2)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Cyclohexane (C6H12)",
                "Cyclopentane (C5H10)",
                "Styrene (C8H8)",
                "Isoprene (C5H8)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Ethylene (C2H4)",
                "Chloroform (CHCl3)",
                "Benzaldehyde (C7H6O)",
                "Toluene diisocyanate (C9H6N2O2)"
            ],
            "true_referents": [
                "(3R,6R)-1,3,4,6-tetrachlorocyclohexa-1,4-diene",
                "1,2,4-trimethyl-Cyclopentane",
                "1-Ethyl-3,5-diisopropyl-benzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-Dihydroxy-3,6-dimethylbenzaldehyde",
                "3,5-dimethylbenzaldehyde",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-Dichloro-1,3-cyclohexadiene",
                "5,6-dimethyl-Decane",
                "Ethylcyclopentane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "beta-hexaprene",
                "butane",
                "butene",
                "cyclohexane",
                "cyclohexene",
                "cyclopentane",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "isoprene",
                "methane",
                "naphthalene",
                "nonane",
                "octane",
                "pentane",
                "phenanthrene",
                "propene",
                "styrene",
                "toluene",
                "toluene-d8"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Heptane (C\u2087H\u2081\u2086)",
            "Octane (C\u2088H\u2081\u2088)",
            "Nonane (C\u2089H\u2082\u2080)",
            "Decane (C\u2081\u2080H\u2082\u2082)",
            "Ethylene (C\u2082H\u2084)",
            "Propylene (C\u2083H\u2086)",
            "Butylene (C\u2084H\u2088)",
            "Acetylene (C\u2082H\u2082)",
            "Benzene (C\u2086H\u2086)",
            "Toluene (C\u2087H\u2088)",
            "Xylene (C\u2088H\u2081\u2080)",
            "Naphthalene (C\u2081\u2080H\u2088)",
            "Anthracene (C\u2081\u2084H\u2081\u2080)",
            "Phenanthrene (C\u2081\u2084H\u2081\u2080)",
            "Pyrene (C\u2081\u2086H\u2081\u2080)",
            "Dodecane (C\u2081\u2082H\u2082\u2086)",
            "Hexadecane (C\u2081\u2086H\u2083\u2084)",
            "Eicosane (C\u2082\u2080H\u2084\u2082)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH\u2084)",
                "Ethane (C\u2082H\u2086)",
                "Propane (C\u2083H\u2088)",
                "Butane (C\u2084H\u2081\u2080)",
                "Pentane (C\u2085H\u2081\u2082)",
                "Hexane (C\u2086H\u2081\u2084)",
                "Heptane (C\u2087H\u2081\u2086)",
                "Octane (C\u2088H\u2081\u2088)",
                "Nonane (C\u2089H\u2082\u2080)",
                "Decane (C\u2081\u2080H\u2082\u2082)",
                "Acetylene (C\u2082H\u2082)",
                "Benzene (C\u2086H\u2086)",
                "Toluene (C\u2087H\u2088)",
                "Xylene (C\u2088H\u2081\u2080)",
                "Naphthalene (C\u2081\u2080H\u2088)",
                "Anthracene (C\u2081\u2084H\u2081\u2080)",
                "Phenanthrene (C\u2081\u2084H\u2081\u2080)",
                "Pyrene (C\u2081\u2086H\u2081\u2080)",
                "Dodecane (C\u2081\u2082H\u2082\u2086)",
                "Hexadecane (C\u2081\u2086H\u2083\u2084)"
            ],
            "mismatches": [
                "Ethylene (C\u2082H\u2084)",
                "Propylene (C\u2083H\u2086)",
                "Butylene (C\u2084H\u2088)",
                "Eicosane (C\u2082\u2080H\u2084\u2082)"
            ],
            "true_referents": [
                "(E)-2-dodecene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "10-Eicosene",
                "2,4-dimethylhexane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "6-ethyl-2-methyl-Octane",
                "Dibenzo[a,h]pyrene",
                "Eicosene",
                "N-(eicosanoyl)-4-hydroxyeicosasphinganine",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acetylenes",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "butadiene",
                "butane",
                "butene",
                "butylbenzene",
                "decane",
                "dodecane",
                "ethane",
                "ethylbenzene",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propane",
                "propene",
                "pyrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Cyclohexane (C\u2086H\u2081\u2082)",
            "Benzene (C\u2086H\u2086)",
            "Toluene (C\u2087H\u2088)",
            "Naphthalene (C\u2081\u2080H\u2088)",
            "Octane (C\u2088H\u2081\u2088)",
            "Ethylene (C\u2082H\u2084)",
            "Propylene (C\u2083H\u2086)",
            "Acetylene (C\u2082H\u2082)",
            "Isooctane (C\u2088H\u2081\u2088)",
            "Camphor (C\u2081\u2080H\u2081\u2086O)",
            "Cholesterol (C\u2082\u2087H\u2084\u2086O)",
            "Squalene (C\u2083\u2080H\u2085\u2080)",
            "Retinol (C\u2082\u2080H\u2083\u2080O)",
            "Stearic Acid (C\u2081\u2088H\u2083\u2086O\u2082)",
            "Docosahexaenoic acid (C\u2082\u2082H\u2083\u2082O\u2082)",
            "Polyethylene (-(C\u2082H\u2084)n-)",
            "Polypropylene (-(C\u2083H\u2086)n-)",
            "Bitumen (C\u2085H\u2088 - C\u2083\u2085H\u2087\u2082)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH\u2084)",
                "Ethane (C\u2082H\u2086)",
                "Propane (C\u2083H\u2088)",
                "Butane (C\u2084H\u2081\u2080)",
                "Pentane (C\u2085H\u2081\u2082)",
                "Hexane (C\u2086H\u2081\u2084)",
                "Benzene (C\u2086H\u2086)",
                "Toluene (C\u2087H\u2088)",
                "Naphthalene (C\u2081\u2080H\u2088)",
                "Octane (C\u2088H\u2081\u2088)",
                "Acetylene (C\u2082H\u2082)",
                "Isooctane (C\u2088H\u2081\u2088)",
                "Squalene (C\u2083\u2080H\u2085\u2080)"
            ],
            "mismatches": [
                "Cyclohexane (C\u2086H\u2081\u2082)",
                "Ethylene (C\u2082H\u2084)",
                "Propylene (C\u2083H\u2086)",
                "Camphor (C\u2081\u2080H\u2081\u2086O)",
                "Cholesterol (C\u2082\u2087H\u2084\u2086O)",
                "Retinol (C\u2082\u2080H\u2083\u2080O)",
                "Stearic Acid (C\u2081\u2088H\u2083\u2086O\u2082)",
                "Docosahexaenoic acid (C\u2082\u2082H\u2083\u2082O\u2082)",
                "Polyethylene (-(C\u2082H\u2084)n-)",
                "Polypropylene (-(C\u2083H\u2086)n-)",
                "Bitumen (C\u2085H\u2088 - C\u2083\u2085H\u2087\u2082)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "(5S,6R)-beta-carotene 5,6-epoxide",
                "(6'R)-beta,epsilon-carotene",
                "(7E)-tricosene",
                "1,2-octadecanediol",
                "1,6-dimethyl-Cyclohexene",
                "1-docosene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3,5,8-tetramethyldecane",
                "2,4-dimethylhexane",
                "2,6,10,14-tetramethylheptadecane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "6-Hydroxybenzo[a]pyrene",
                "6-ethyl-2-methyl-Octane",
                "6-phenyldodecane",
                "N-(2-hydroxytetradecanoyl)icosasphinganine",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "acetylene",
                "acetylenes",
                "acyclic acetylene",
                "alkane",
                "benzene",
                "benzene-d6",
                "beta-carotene",
                "butane",
                "butene",
                "cyclohexene",
                "ethane",
                "ethylbenzene",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "naphthalene",
                "octadecanedioic acid",
                "octane",
                "olefin",
                "pentane",
                "polycyclic olefin",
                "polyene",
                "propane",
                "propene",
                "quaterrylene",
                "squalene",
                "toluene",
                "toluene-d8"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butadiene (C4H6)",
            "Acetylene (C2H2)",
            "Cyclohexane (C6H12)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Styrene (C8H8)",
            "Isobutane (C4H10)",
            "Isopentane (C5H12)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane",
                "ethane",
                "butane",
                "pentane",
                "hexane",
                "heptane",
                "octane",
                "nonane",
                "decane",
                "butadiene",
                "acetylene",
                "cyclohexane",
                "benzene",
                "toluene",
                "xylene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "styrene",
                "isobutane",
                "isopentane"
            ],
            "mismatches": [
                "propane",
                "ethylene",
                "propylene"
            ],
            "true_referents": [
                "1,4-diphenylbutadiene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "butadiene",
                "butane",
                "butene",
                "cyclohexane",
                "cyclohexene",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Cyclohexane (C6H12)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Phenanthrene (C14H10)",
            "Anthracene (C14H10)",
            "Cyclopentane (C5H10)",
            "Cyclobutane (C4H8)",
            "Cyclooctane (C8H16)",
            "Dodecane (C12H26)",
            "Hexadecane (C16H34)",
            "Octadecane (C18H38)",
            "Eicosane (C20H42)",
            "Squalene (C30H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Cyclohexane (C6H12)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Phenanthrene (C14H10)",
                "Anthracene (C14H10)",
                "Cyclopentane (C5H10)",
                "Cyclobutane (C4H8)",
                "Cyclooctane (C8H16)",
                "Dodecane (C12H26)",
                "Hexadecane (C16H34)",
                "Octadecane (C18H38)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Eicosane (C20H42)",
                "Squalene (C30H50)"
            ],
            "true_referents": [
                "(E)-2-dodecene",
                "(E,Z)-2,4-Dodecadiene",
                "1,2,4-trimethyl-Cyclopentane",
                "1-hexene",
                "10-Eicosene",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,6,8-trimethyldecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "5,6-dimethyl-Decane",
                "Cyclooctane",
                "Eicosene",
                "Ethylcyclopentane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "butane",
                "butene",
                "cyclobutane",
                "cyclobutene",
                "cyclohexane",
                "cyclohexene",
                "cyclooctatriene",
                "cyclooctene",
                "cyclopentane",
                "decane",
                "dodecane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octadecane",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 22,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Ethylene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Propene (C3H6)",
                "Butene (C4H8)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Naphthalene (C10H8)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Ethylene (C2H4)",
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Octene (C8H16)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)"
            ],
            "true_referents": [
                "(E)-2-octene",
                "(E)-3,4-dimethyl-2-Pentene",
                "1-Heptene",
                "1-Pentene",
                "1-decene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "2,3-dimethylpentane",
                "2,4-Dimethyl-2-pentene",
                "2,4-dimethyl-1-heptene",
                "2-Heptene",
                "2-Hexene",
                "2-methyl-1-Decene",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-hexene",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "6-methyl- 4E-decene",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "but-1-ene",
                "butadiene",
                "butane",
                "butene",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonacene",
                "nonane",
                "octane",
                "octene",
                "pentane",
                "phenanthrene",
                "propene",
                "toluene",
                "toluene-d8"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Isopropanol (C3H8O)",
            "Butanol (C4H9OH)",
            "Pentanol (C5H11OH)",
            "Hexanol (C6H13OH)",
            "Heptanol (C7H15OH)",
            "Octanol (C8H17OH)",
            "Nonanol (C9H19OH)",
            "Decanol (C10H21OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Isopropanol (C3H8O)",
                "Butanol (C4H9OH)",
                "Pentanol (C5H11OH)",
                "Hexanol (C6H13OH)",
                "Heptanol (C7H15OH)",
                "Octanol (C8H17OH)",
                "Nonanol (C9H19OH)",
                "Decanol (C10H21OH)"
            ],
            "true_referents": [
                "1-Hexacosene",
                "1-Pentene",
                "1-hexene",
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3-dimethylpentane",
                "2,6-dimethyl-Nonane",
                "2-Isopropyl-1,4-hexadiene",
                "2-Methyl-1-phenyl-2-propanol",
                "3,3-dimethylhexane",
                "3,5-dimethyl-Heptane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3,7-dimethyl-Decane",
                "3-Phenyl-1-propanol",
                "3-ethylpentane",
                "3-methylbenzyl alcohol",
                "4-isopropylbenzyl alcohol",
                "5,6-dimethyl-Decane",
                "Benzeneheptanol, 3-hydroxy-4-[(1r,3s)-3-hydroxycyclohexyl]-.eta.,.eta.-dimethyl-, rel-",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Nonane, 5-methyl-5-propyl-",
                "acene",
                "alkane",
                "benzene",
                "benzene-d6",
                "butane",
                "butene",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexacosane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octacosane",
                "octane",
                "p-xylene",
                "pentane",
                "propene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Heptane (C\u2087H\u2081\u2086)",
            "Octane (C\u2088H\u2081\u2088)",
            "Nonane (C\u2089H\u2082\u2080)",
            "Decane (C\u2081\u2080H\u2082\u2082)",
            "Ethylene (Ethene, C\u2082H\u2084)",
            "Acetylene (Ethyne, C\u2082H\u2082)",
            "Benzene (C\u2086H\u2086)",
            "Toluene (C\u2087H\u2088)",
            "Xylene (C\u2088H\u2081\u2080)",
            "Cyclohexane (C\u2086H\u2081\u2082)",
            "Naphthalene (C\u2081\u2080H\u2088)",
            "Isobutane (2-Methylpropane, C\u2084H\u2081\u2080)",
            "Cyclopropane (C\u2083H\u2086)",
            "Cis-2-Butene (C\u2084H\u2088)",
            "Trans-2-Butene (C\u2084H\u2088)",
            "Anthracene (C\u2081\u2084H\u2081\u2080)",
            "Styrene (C\u2088H\u2088)",
            "Methylcyclohexane (C\u2087H\u2081\u2084)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane",
                "ethane",
                "propane",
                "butane",
                "pentane",
                "hexane",
                "heptane",
                "octane",
                "nonane",
                "decane",
                "ethene",
                "acetylene",
                "benzene",
                "toluene",
                "xylene",
                "isobutane",
                "cyclopropane",
                "cis-but-2-ene",
                "trans-but-2-ene",
                "anthracene",
                "styrene",
                "methylcyclohexane"
            ],
            "mismatches": [
                "cyclohexane",
                "ethylene",
                "naphthalene"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "(1-methylethylidene)-Cyclohexane",
                "1,6-dimethyl-Cyclohexene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "1-methyl-Cyclohexene",
                "2,3,5,8-tetramethyldecane",
                "2,4-dimethylhexane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "6-ethyl-2-methyl-Octane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acetylenes",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "butane",
                "butene",
                "cis-alkene",
                "cis-but-2-ene",
                "cyclohexene",
                "cyclopropane",
                "cyclopropene",
                "decane",
                "ethane",
                "ethene",
                "heptadecane",
                "heptane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "methylcyclohexane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "propane",
                "propene",
                "styrene",
                "toluene",
                "toluene-d8",
                "trans-but-2-ene",
                "vinylarene",
                "xylene"
            ],
            "TP": 22,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylene (C2H4)",
            "Acetylene (C2H2)",
            "Cyclohexane (C6H12)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Isoprene (C5H8)",
            "Styrene (C8H8)",
            "Limonene (C10H16)",
            "Cyclopentane (C5H10)",
            "Isobutane (C4H10)",
            "Propylene (C3H6)",
            "Xylene (C8H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Acetylene (C2H2)",
                "Cyclohexane (C6H12)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Isoprene (C5H8)",
                "Styrene (C8H8)",
                "Cyclopentane (C5H10)",
                "Isobutane (C4H10)",
                "Xylene (C8H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Ethylene (C2H4)",
                "Limonene (C10H16)",
                "Propylene (C3H6)"
            ],
            "true_referents": [
                "(4S)-limonene",
                "1,2,4-trimethyl-Cyclopentane",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "Ethylcyclopentane",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "acetylene",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "beta-hexaprene",
                "butane",
                "butene",
                "cyclohexane",
                "cyclohexene",
                "cyclopentane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "isoprene",
                "methane",
                "naphthalene",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Cyclohexane (C6H12)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "Styrene (C8H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Coronene (C24H12)",
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Squalene (C30H50)",
            "\u03b2-Carotene (C40H56)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Cyclohexane (C6H12)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Ethylbenzene (C8H10)",
                "Styrene (C8H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Coronene (C24H12)",
                "Pinene (C10H16)",
                "Squalene (C30H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Limonene (C10H16)",
                "\u03b2-Carotene (C40H56)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "(4S)-limonene",
                "(6'R)-beta,epsilon-carotene",
                "(6'S)-beta,epsilon-carotene",
                "1-hexene",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "3,3-dimethylhexane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "5,6-dimethyl-Decane",
                "A-Pinene",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "beta-carotene",
                "beta-pinene",
                "butane",
                "butene",
                "coronene",
                "cyclohexane",
                "cyclohexene",
                "decane",
                "ethane",
                "ethylbenzene",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "methylbenzene",
                "naphthalene",
                "nonane",
                "octane",
                "pentane",
                "phenanthrene",
                "pinene",
                "propene",
                "pyrene",
                "squalene",
                "styrene",
                "toluene",
                "toluene-d8"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butene (C4H8)",
            "Isoprene (C5H8)",
            "Acetylene (C2H2)",
            "Cyclohexane (C6H12)",
            "Isooctane (C8H18)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Butene (C4H8)",
                "Isoprene (C5H8)",
                "Acetylene (C2H2)",
                "Cyclohexane (C6H12)",
                "Isooctane (C8H18)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Ethylene (C2H4)",
                "Propylene (C3H6)"
            ],
            "true_referents": [
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "acetylene",
                "acyclic acetylene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "beta-hexaprene",
                "but-1-ene",
                "butadiene",
                "butane",
                "butene",
                "cyclohexane",
                "cyclohexene",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "isoprene",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propene",
                "pyrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 20,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Butylene (C4H8)",
            "Isobutane (C4H10)",
            "Isopentane (C5H12)",
            "Cyclohexane (C6H12)",
            "Cyclopentane (C5H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Isobutane (C4H10)",
                "Isopentane (C5H12)",
                "Cyclohexane (C6H12)",
                "Cyclopentane (C5H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "Butylene (C4H8)"
            ],
            "true_referents": [
                "1,2,4-trimethyl-Cyclopentane",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1-hexene",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "4-ethyl-1-hexene",
                "5,6-dimethyl-Decane",
                "Ethylcyclopentane",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "anthracene",
                "benzene",
                "benzene-d6",
                "butadiene",
                "butane",
                "butene",
                "butylbenzene",
                "cyclohexane",
                "cyclohexene",
                "cyclopentane",
                "decane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "naphthalene",
                "nonane",
                "o-xylene",
                "octane",
                "p-xylene",
                "pentane",
                "phenanthrene",
                "propene",
                "pyrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Methane (CH4)",
            "Ethene (C2H4)",
            "Propene (C3H6)",
            "Butene (C4H8)",
            "Pentene (C5H10)",
            "Hexene (C6H12)",
            "Heptene (C7H14)",
            "Octene (C8H16)",
            "Nonene (C9H18)",
            "Decene (C10H20)",
            "Isobutane (C4H10)",
            "Isopentane (C5H12)",
            "Isohexane (C6H14)",
            "Isopentene (C5H10)",
            "Isobutene (C4H8)",
            "Toluene (C6H5CH3)",
            "Xylene (C6H4(CH3)2)",
            "Ethylbenzene (C6H5CH2CH3)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethane (C2H6)",
                "Propene (C3H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Methane (CH4)",
                "Ethene (C2H4)",
                "Butene (C4H8)",
                "Isobutane (C4H10)",
                "Isopentane (C5H12)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Pentene (C5H10)",
                "Hexene (C6H12)",
                "Heptene (C7H14)",
                "Octene (C8H16)",
                "Nonene (C9H18)",
                "Decene (C10H20)",
                "Isohexane (C6H14)",
                "Isopentene (C5H10)",
                "Isobutene (C4H8)",
                "Toluene (C6H5CH3)",
                "Xylene (C6H4(CH3)2)",
                "Ethylbenzene (C6H5CH2CH3)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)"
            ],
            "true_referents": [
                "(E)-2-octene",
                "(E)-3,4-dimethyl-2-Pentene",
                "(E)-4,4-Dimethyl-2-pentene",
                "1-Heptene",
                "1-Pentene",
                "1-decene",
                "1-hexene",
                "1-nonene",
                "1-octene",
                "2,3-dimethylpentane",
                "2,4-Dimethyl-2-pentene",
                "2,4-dimethyl-1-heptene",
                "2,4-dimethylhexane",
                "2-Heptene",
                "2-Hexene",
                "2-methyl-1-Decene",
                "3,3-dimethylhexane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-hexene",
                "5,6-dimethyl-Decane",
                "6-methyl- 4E-decene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "but-1-ene",
                "butadiene",
                "butane",
                "butene",
                "decane",
                "ethane",
                "ethene",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isobutylbenzene",
                "isooctane",
                "isopentane",
                "methane",
                "nonacene",
                "nonane",
                "octane",
                "octene",
                "pentane",
                "propene"
            ],
            "TP": 14,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Dodecane (C12H26)",
            "Cholesterol",
            "Squalene",
            "Lycopene",
            "Beta-Carotene",
            "Geraniol",
            "Farnesol",
            "Squalane",
            "Mineral oil",
            "Petrolatum",
            "Paraffin wax",
            "Naphthalene",
            "Anthracene",
            "Toluene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Dodecane (C12H26)",
                "Squalene",
                "Lycopene",
                "Beta-Carotene",
                "Naphthalene",
                "Anthracene",
                "Toluene"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Cholesterol",
                "Geraniol",
                "Farnesol",
                "Squalane",
                "Mineral oil",
                "Petrolatum",
                "Paraffin wax"
            ],
            "true_referents": [
                "(-)-alpha-acorenol",
                "(E)-2-dodecene",
                "(E,Z)-2,4-Dodecadiene",
                "1-docosene",
                "1-hexene",
                "2,3-dimethylpentane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "5,6-dimethyl-Decane",
                "7,7',9,9'-tetra-cis-lycopene",
                "9-cis-lycopene",
                "Naphtho[2,3-e]pyrene",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "acene",
                "alkane",
                "alpha-carotene",
                "anthracene",
                "benzene",
                "beta-carotene",
                "butane",
                "butene",
                "carotene",
                "chrysene",
                "decane",
                "dodecane",
                "ethane",
                "farnesane",
                "gamma-carotene",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "limonene",
                "lycopene",
                "methane",
                "naphthalene",
                "nonane",
                "octadecanedioic acid",
                "octane",
                "pentane",
                "propene",
                "serrulatane",
                "sobralene",
                "squalene",
                "toluene",
                "tricosene",
                "xylene"
            ],
            "TP": 15,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "hydrocarbon",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Alkane",
                "Canonical name": "Alkane"
            },
            {
                "Referent": "Alkenes",
                "Canonical name": "Alkenes"
            },
            {
                "Referent": "Alkynes",
                "Canonical name": "Alkynes"
            },
            {
                "Referent": "Alkanols",
                "Canonical name": "Alkanols"
            },
            {
                "Referent": "Alkanoic acids",
                "Canonical name": "Alkanoic acids"
            },
            {
                "Referent": "Alkanoic acid",
                "Canonical name": "Alkanoic acid"
            },
            {
                "Referent": "Alkanoic acid, 2-methylpropyl ester",
                "Canonical name": "Alkanoic acid, 2-methylpropyl ester"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            },
            {
                "Referent": "Alkane, 2-methylpropyl",
                "Canonical name": "Alkane, 2-methylpropyl"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alkane",
                "Alkenes",
                "Alkynes"
            ],
            "mismatches": [
                "Alkanols",
                "Alkanoic acids",
                "Alkanoic acid",
                "Alkanoic acid, 2-methylpropyl ester",
                "Alkane, 2-methylpropyl"
            ],
            "true_referents": [
                "(+)-cubenene",
                "4,6-dimethyldodecane",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dodecane, 4-methyl-",
                "Hexane, 2,2,5-trimethyl-",
                "acetylenes",
                "alkane",
                "alkene",
                "alkyne",
                "cis-alkene",
                "hexadecane",
                "long-chain alkane",
                "monocyclic hydrocarbon"
            ],
            "TP": 3,
            "FP": 5,
            "FN": 0
        }
    }
]