[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Acetophenone (C6H5COCH3)",
            "Acetic acid (CH3COOH)",
            "Ethyl acetate (CH3COOC2H5)",
            "Benzaldehyde (C6H5CHO)",
            "Benzophenone (C6H5COC6H5)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Methacrolein (CH2=C(CH3)-CHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Pyruvic acid (CH3COCOOH)",
            "Methyl ethyl ketone (CH3COCH2CH3)",
            "Methyl isobutyl ketone (CH3COCH(CH3)CH2CH3)",
            "Methyl propyl ketone (CH3COCH2CH2CH3)",
            "Acetone cyanohydrin (CH3COCH(OH)CN)",
            "Methylglyoxal (OHCCHO)",
            "Glyoxal (OHCCHO)",
            "Diacetyl (CH3COCHCOCH3)",
            "Methyl vinyl ketone (CH3C(O)CH=CH2)",
            "Methyl ethyl amide (CH3COCH2NH2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Acetone (CH3COCH3)",
                "Acetophenone (C6H5COCH3)",
                "Acetic acid (CH3COOH)",
                "Ethyl acetate (CH3COOC2H5)",
                "Benzaldehyde (C6H5CHO)",
                "Benzophenone (C6H5COC6H5)",
                "Valeraldehyde (CH3(CH2)3CHO)",
                "Methacrolein (CH2=C(CH3)-CHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Pyruvic acid (CH3COCOOH)",
                "Methyl isobutyl ketone (CH3COCH(CH3)CH2CH3)",
                "Methylglyoxal (OHCCHO)",
                "Glyoxal (OHCCHO)"
            ],
            "mismatches": [
                "Butyraldehyde (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Methyl ethyl ketone (CH3COCH2CH3)",
                "Methyl propyl ketone (CH3COCH2CH2CH3)",
                "Acetone cyanohydrin (CH3COCH(OH)CN)",
                "Diacetyl (CH3COCHCOCH3)",
                "Methyl vinyl ketone (CH3C(O)CH=CH2)",
                "Methyl ethyl amide (CH3COCH2NH2)"
            ],
            "true_referents": [
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "1,3-Diacetylpropane",
                "2',6'-dimethoxy-4'-hydroxyacetophenone",
                "2,3-butanediol diacetate",
                "2-Butoxyethyl acetate",
                "2-[(9E,12E,15E)-octadecatrienoyl]glycerol",
                "2-cyano-3-(3,4-dihydroxyphenyl)-2-propenamide",
                "3,4-dimethoxy cinnamaldehyde",
                "3,4-dimethyl valeric acid",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "4,4-dimethyl valeric acid",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Ethyl isobutyl ketone",
                "Ethyl isopropyl ketone",
                "Glycerol 1-propanoate diacetate",
                "Isopropyl 3-methylbutanoate",
                "Methacrolein",
                "Methyl isobutyl ketone",
                "N,N-bis(4-acetamidobutyl)-15-methylhexadec-6E-enamide",
                "N-(2-benzoyl-1-methylvinyl)acetamide",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "benzophenones",
                "crotonaldehyde",
                "crotonic acid",
                "cyanoacetic acid",
                "dialdehyde",
                "ethyl 3-amino-4-(methylamino)benzoate",
                "ethyl acetate",
                "formaldehyde",
                "glyoxal",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "pyruvic acid",
                "valeraldehyde"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Acetophenone (C6H5COCH3)",
            "Benzaldehyde (C6H5CHO)",
            "Acetic acid (CH3COOH)",
            "Propionic acid (CH3CH2COOH)",
            "Butyric acid (CH3CH2CH2COOH)",
            "Valeraldehyde (CH3CH2CH2CH2CHO)",
            "Cyclohexanone (C6H10O)",
            "Methyl ethyl ketone (2-Butanone, MEK, CH3COC2H5)",
            "Phenylacetaldehyde (C6H5CH2CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Glutaraldehyde (OHC(CH2)3CHO)",
            "Methyl methacrylate (C5H8O2)",
            "Urea (CH4N2O)",
            "Acrolein (CH2=CHCHO)",
            "Acetamide (CH3CONH2)",
            "N,N-Dimethylformamide (DMF, C3H7NO)",
            "Phenyl acetate (C8H8O2)",
            "Ethyl acetate (CH3COOC2H5)",
            "Methyl benzoate (C8H8O2)",
            "Glyoxal (OCHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Acetone (CH3COCH3)",
                "Acetophenone (C6H5COCH3)",
                "Benzaldehyde (C6H5CHO)",
                "Acetic acid (CH3COOH)",
                "Propionic acid (CH3CH2COOH)",
                "Butyric acid (CH3CH2CH2COOH)",
                "Valeraldehyde (CH3CH2CH2CH2CHO)",
                "Cyclohexanone (C6H10O)",
                "Phenylacetaldehyde (C6H5CH2CHO)",
                "Glutaraldehyde (OHC(CH2)3CHO)",
                "Methyl methacrylate (C5H8O2)",
                "Acrolein (CH2=CHCHO)",
                "N,N-Dimethylformamide (DMF, C3H7NO)",
                "Phenyl acetate (C8H8O2)",
                "Ethyl acetate (CH3COOC2H5)",
                "Methyl benzoate (C8H8O2)",
                "Glyoxal (OCHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)"
            ],
            "mismatches": [
                "Methyl ethyl ketone (2-Butanone, MEK, CH3COC2H5)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Urea (CH4N2O)",
                "Acetamide (CH3CONH2)"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "1-(3-acetylphenyl)-3-(2-pyridinylmethyl)urea",
                "1-Phenylpropyl acetate",
                "1-[1-[oxo(1-pyrrolidinyl)methyl]cyclohexyl]-3-(phenylmethyl)urea",
                "2-ethyl-3-ketopentanoic acid",
                "2-hydroxyphenylacetaldehyde",
                "3,4,5,6-tetrahydroxyoxane-2-carboxylic acid",
                "3,4-dimethoxy cinnamaldehyde",
                "3,4-dimethyl valeric acid",
                "4,4-dimethyl valeric acid",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "6-[(2-carboxyacetyl)oxy]-3,4,5-trihydroxyoxane-2-carboxylic acid",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "L-trans-4-Methyl-2-pyrrolidinecarboxylic acid",
                "N'-(3-hydroxyphenyl)-N,N-dimethylformamidine",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-isopropyl-N'-(2-oxoazepan-3-yl)urea",
                "acetaldehyde",
                "acetic acid",
                "acetoacetamide",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "acrolein",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butyric acid",
                "crotonaldehyde",
                "crotonic acid",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "ethyl acetate",
                "ethyl propionate",
                "formaldehyde",
                "glutaraldehyde",
                "glyoxal",
                "hydroxycyclohexanone",
                "methyl benzoate",
                "methyl ketone",
                "methyl methacrylate",
                "methyl propenyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "phenyl acetate",
                "phenylacetaldehyde",
                "propionic acid",
                "valeraldehyde"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (Methanolal)",
            "Acetaldehyde",
            "Propionaldehyde",
            "Butyraldehyde",
            "Benzaldehyde",
            "Acetone",
            "Methyl Ethyl Ketone (MEK)",
            "Methyl Isobutyl Ketone (MIBK)",
            "Cyclohexanone",
            "Acetic Anhydride",
            "Propionic Anhydride",
            "Benzoyl Chloride",
            "Ethyl Acetate",
            "Methyl Acetate",
            "Acetyl Chloride",
            "Propionyl Chloride",
            "Butyryl Chloride",
            "Benzoyl Peroxide",
            "Glyoxal",
            "Methyl Glyoxal",
            "Diacetyl",
            "Pyruvate",
            "Oxaloacetate",
            "Acetyl CoA"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "benzaldehyde",
                "acetone",
                "methyl isobutyl ketone",
                "cyclohexanone",
                "acetic anhydride",
                "benzoyl chloride",
                "ethyl acetate",
                "methyl acetate",
                "acetyl chloride",
                "benzoyl peroxide",
                "glyoxal",
                "methylglyoxal",
                "oxaloacetic acid",
                "acetyl-CoA"
            ],
            "mismatches": [
                "formaldehyde (methanolal)",
                "propionaldehyde",
                "butyraldehyde",
                "methyl ethyl ketone (mek)",
                "propionic anhydride",
                "propionyl chloride",
                "butyryl chloride",
                "diacetyl",
                "pyruvate",
                "oxaloacetate"
            ],
            "true_referents": [
                "1,3-Diacetylpropane",
                "3,3-dichloro-propionic acid",
                "Acetyl citrate",
                "BenzAldehyde, 3-ethyl-",
                "Benzoyl peroxide",
                "Ethyl isobutyl ketone",
                "Glycerol 1-propanoate diacetate",
                "Methyl isobutyl ketone",
                "acetaldehyde",
                "acetic anhydride",
                "acetic benzoic anhydride",
                "acetoacetyl-CoA",
                "acetone",
                "acetyl",
                "acetyl chloride",
                "acetyl-CoA",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzoyl chloride",
                "benzoyl cyanide",
                "butyl acetate",
                "carboxylic anhydride",
                "cyclohexanone",
                "cyclohexanones",
                "decyl butyrate",
                "dialdehyde",
                "ethyl acetate",
                "ethyl butyrate",
                "ethyl propionate",
                "formaldehyde",
                "glyoxal",
                "hydroxycyclohexanone",
                "isopropyl salicylate",
                "lactaldehyde",
                "methyl acetate",
                "methyl isobutyrate",
                "methyl ketone",
                "methyl propenyl ketone",
                "methyl pyruvate",
                "methylglyoxal",
                "oxaloacetic acid",
                "propionic acid",
                "undecyl butyrate"
            ],
            "TP": 15,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propanal (CH3CH2CHO)",
            "Butanal (CH3CH2CH2CHO)",
            "Pentanone (CH3CH2CH2COCH3)",
            "Acetone (CH3COCH3)",
            "Benzaldehyde (C6H5CHO)",
            "Acetophenone (C6H5COCH3)",
            "Glycolaldehyde (HOCH2CHO)",
            "Pyruvaldehyde (CH3COCHO)",
            "Methyl glyoxal (CH3COCHO)",
            "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
            "Camphorquinone (C10H14O3)",
            "Ribulose-5-phosphate (C5H11O5P)",
            "Fructose (C6H12O6)",
            "Glucose (C6H12O6)",
            "Glyceraldehyde-3-phosphate (C3H7O6P)",
            "Dihydroxyacetone phosphate (C3H7O6P)",
            "Methyl vinyl ketone (CH3COCH=CH2)",
            "Acrylaldehyde (CH2=CHCHO)",
            "Benzoyl Formate (C8H6O3)",
            "Phacelia tanacetifolia ketone (C11H14O)",
            "Cortisone (C21H28O5)",
            "Progesterone (C21H30O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Propanal (CH3CH2CHO)",
                "Butanal (CH3CH2CH2CHO)",
                "Pentanone (CH3CH2CH2COCH3)",
                "Acetone (CH3COCH3)",
                "Benzaldehyde (C6H5CHO)",
                "Acetophenone (C6H5COCH3)",
                "Glycolaldehyde (HOCH2CHO)",
                "Dihydroxyacetone phosphate (C3H7O6P)",
                "Glyceraldehyde-3-phosphate (C3H7O6P)",
                "Cortisone (C21H28O5)",
                "Progesterone (C21H30O2)"
            ],
            "mismatches": [
                "Pyruvaldehyde (CH3COCHO)",
                "Methyl glyoxal (CH3COCHO)",
                "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
                "Camphorquinone (C10H14O3)",
                "Ribulose-5-phosphate (C5H11O5P)",
                "Fructose (C6H12O6)",
                "Glucose (C6H12O6)",
                "Methyl vinyl ketone (CH3COCH=CH2)",
                "Acrylaldehyde (CH2=CHCHO)",
                "Benzoyl Formate (C8H6O3)",
                "Phacelia tanacetifolia ketone (C11H14O)"
            ],
            "true_referents": [
                "(+)-6-endo-hydroxycamphor",
                "(E)-8-Tetradecenyl formate",
                "(benzyloxy)acetic acid",
                "1,5-anhydro-D-fructose",
                "1-(2-carboxyphenylamino)-1-deoxy-D-ribulose 5-phosphate",
                "1-isopropoxyacetone",
                "2,2-dimethylpropanal",
                "2,4-dimethyl-3-pentanone",
                "3,6-anhydro-D-glucose",
                "3-[(4-aminobutyl)amino]propanal",
                "3-deoxy-keto-D-fructose",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "6-phospho-2-dehydro-D-gluconic acid",
                "BenzAldehyde, 3-ethyl-",
                "Benzylformate",
                "CDP-D-ribulose",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "Ethyl acrylate",
                "L-glyceraldehyde 3-phosphate",
                "Methyl acrylate",
                "N-(2-benzoyl-1-methylvinyl)acetamide",
                "Tanacetin",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butanal",
                "cortisone",
                "dialdehyde",
                "dihydroxyacetone phosphate",
                "ethyl vanillin",
                "formaldehyde",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "hydroxypyruvaldehyde",
                "methyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "pentanone",
                "progesterone",
                "propanal",
                "pyridine-3-carbaldehyde",
                "triketone",
                "vanillin acetate"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Benzaldehyde (C6H5CHO)",
            "Acetophenone (C6H5COCH3)",
            "Butanal (C4H8O)",
            "Propionaldehyde (C3H6O)",
            "Cyclohexanone (C6H10O)",
            "Ethyl acetate (C4H8O2)",
            "Methyl ethyl ketone (MEK, C4H8O)",
            "Methyl isobutyl ketone (MIBK, C6H12O)",
            "Valeraldehyde (C5H10O)",
            "Hexanal (C6H12O)",
            "Heptanal (C7H14O)",
            "Octanal (C8H16O)",
            "Nonanal (C9H18O)",
            "Decanal (C10H20O)",
            "Acrolein (C3H4O)",
            "Crotonaldehyde (C4H6O)",
            "2-Pentanone (C5H10O)",
            "3-Pentanone (C5H10O)",
            "4-Heptanone (C7H14O)",
            "Butanone (C4H8O)",
            "Phenylacetaldehyde (C8H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Acetone (CH3COCH3)",
                "Benzaldehyde (C6H5CHO)",
                "Acetophenone (C6H5COCH3)",
                "Butanal (C4H8O)",
                "Cyclohexanone (C6H10O)",
                "Ethyl acetate (C4H8O2)",
                "Methyl isobutyl ketone (MIBK, C6H12O)",
                "Valeraldehyde (C5H10O)",
                "Hexanal (C6H12O)",
                "Acrolein (C3H4O)",
                "Crotonaldehyde (C4H6O)",
                "4-Heptanone (C7H14O)",
                "Butanone (C4H8O)",
                "Phenylacetaldehyde (C8H8O)"
            ],
            "mismatches": [
                "Propionaldehyde (C3H6O)",
                "Methyl ethyl ketone (MEK, C4H8O)",
                "Heptanal (C7H14O)",
                "Octanal (C8H16O)",
                "Nonanal (C9H18O)",
                "Decanal (C10H20O)",
                "2-Pentanone (C5H10O)",
                "3-Pentanone (C5H10O)"
            ],
            "true_referents": [
                "(3R,5S)-1-pyrroline-3-hydroxy-5-carboxylic acid",
                "(4-Methylphenyl)acetaldehyde",
                "1-Hydroxy-2-pentanone",
                "1-hydroxy-3-propoxyacetone",
                "2,4-dimethyl-3-pentanone",
                "2-Methyl-4-heptanone",
                "2-Nonylheneicosanal",
                "3-Hydroxy-2-pentanone",
                "4,4-Dimethoxy-2-butanone",
                "4,6-heptadienal",
                "4-(O-methyl-aci-nitro)crotonic acid",
                "4-heptanone",
                "4-hydroxy-crotonic acid",
                "4-oxo-4-(pyridin-3-yl)butanal",
                "4-propoxy-2-butanone",
                "5-aceto valeric acid",
                "7-phenyl heptanoic acid",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Ethyl isobutyl ketone",
                "Ethyl isopropyl ketone",
                "Methyl isobutyl ketone",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acrolein",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "beta,beta-dimethyl valeric acid",
                "butan-1-ol",
                "butanal",
                "butanone",
                "crotonaldehyde",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "decanal",
                "dialdehyde",
                "ethyl acetate",
                "formaldehyde",
                "heptan-4-ol",
                "hexanal",
                "hydroxycyclohexanone",
                "methyl isobutyrate",
                "methyl ketone",
                "methyl propenyl ketone",
                "non-2-enal",
                "octanal",
                "p-Ethylacetophenone",
                "pentanone",
                "phenylacetaldehyde",
                "valeraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (C2H4O)",
            "Formaldehyde (CH2O)",
            "Acetone (C3H6O)",
            "Benzaldehyde (C7H6O)",
            "Propionaldehyde (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Valeraldehyde (C5H10O)",
            "Crotonaldehyde (C4H6O)",
            "Cinnamaldehyde (C9H8O)",
            "Glutaraldehyde (C5H8O2)",
            "Isovaleraldehyde (C5H10O)",
            "2-Butanone (C4H8O)",
            "Cyclohexanone (C6H10O)",
            "Acetophenone (C8H8O)",
            "3-Pentanone (C5H10O)",
            "4-Pentanone (C5H10O)",
            "2-Pentanone (C5H10O)",
            "2-Hexanone (C6H12O)",
            "2-Octanone (C8H16O)",
            "2-Nonanone (C9H18O)",
            "2-Undecanone (C11H22O)",
            "Octanal (C8H16O)",
            "Nonanal (C9H18O)",
            "Decanal (C10H20O)",
            "Dodecanal (C12H24O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "formaldehyde",
                "acetone",
                "benzaldehyde",
                "valeraldehyde",
                "crotonaldehyde",
                "glutaraldehyde",
                "2-butanone",
                "cyclohexanone",
                "acetophenone",
                "2-octanone",
                "octanal",
                "decanal"
            ],
            "mismatches": [
                "propionaldehyde",
                "butyraldehyde",
                "cinnamaldehyde",
                "isovaleraldehyde",
                "3-pentanone",
                "4-pentanone",
                "2-pentanone",
                "2-hexanone",
                "2-nonanone",
                "2-undecanone",
                "nonanal",
                "dodecanal"
            ],
            "true_referents": [
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "1-Hydroxy-2-pentanone",
                "1-hydroxy-3-propoxyacetone",
                "1-phenyl-2-hexanone",
                "2,4-dimethyl-3-pentanone",
                "2-Dodecanone",
                "2-Nonadecanone",
                "2-Nonylheneicosanal",
                "2-butyl-1-octanol",
                "2-octanone",
                "3,3,4,4-tetramethyl-2-pentanone",
                "3,3-dimethyl-2-Hexanone",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Hydroxy-2-pentanone",
                "3-Methylene-2-octanone",
                "4-(O-methyl-aci-nitro)crotonic acid",
                "4-hydroxy-crotonic acid",
                "4-propoxy-2-butanone",
                "5-aceto valeric acid",
                "6-Undecanone",
                "BenzAldehyde, 3-ethyl-",
                "Nonan-2,5-dione",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acetophenones",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "beta,beta-dimethyl valeric acid",
                "butanone",
                "crotonaldehyde",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "decanal",
                "dialdehyde",
                "formaldehyde",
                "glutaraldehyde",
                "glutaric anhydride",
                "hexanone",
                "hydroxycyclohexanone",
                "isovaleric acid",
                "non-2-enal",
                "nonanone",
                "octanal",
                "pentanone",
                "undecan-2-ol",
                "valeraldehyde"
            ],
            "TP": 13,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (ACE)",
            "Formaldehyde (FA)",
            "Acetaldehyde (AA)",
            "Benzaldehyde (BA)",
            "Propionaldehyde (PA)",
            "Butyraldehyde (BUA)",
            "Pentanal (PEN)",
            "Hexanal (HEX)",
            "Heptanal (HEP)",
            "Octanal (OCT)",
            "Nonanal (NON)",
            "Decanal (DEC)",
            "Undecanal (UND)",
            "Dodecanal (DOD)",
            "Tridecanal (TRI)",
            "Tetradecanal (TET)",
            "Pentadecanal (PEN)",
            "Hexadecanal (HEX)",
            "Heptadecanal (HEP)",
            "Octadecanal (OCT)",
            "Nonadecanal (NON)",
            "Icosanal (ICO)",
            "Heneicosanal (HEN)",
            "Docosanal (DOC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (ACE)",
                "Formaldehyde (FA)",
                "Acetaldehyde (AA)",
                "Benzaldehyde (BA)",
                "Pentanal (PEN)",
                "Hexanal (HEX)",
                "Heptanal (HEP)",
                "Octanal (OCT)",
                "Nonanal (NON)",
                "Decanal (DEC)",
                "Undecanal (UND)",
                "Dodecanal (DOD)",
                "Tridecanal (TRI)",
                "Tetradecanal (TET)",
                "Pentadecanal (PEN)",
                "Hexadecanal (HEX)",
                "Heptadecanal (HEP)",
                "Octadecanal (OCT)"
            ],
            "mismatches": [
                "Propionaldehyde (PA)",
                "Butyraldehyde (BUA)",
                "Nonadecanal (NON)",
                "Icosanal (ICO)",
                "Heneicosanal (HEN)",
                "Docosanal (DOC)"
            ],
            "true_referents": [
                "2-pentenal",
                "9-Octadecenal",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Heneicosan-2-one",
                "Nonadecanal",
                "Nonadecenoicacid",
                "acetaldehyde",
                "acetone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butyl butanoate",
                "decanal",
                "docosan-1-ol",
                "dodecanal",
                "formaldehyde",
                "henicosan-1-ol",
                "heptadecanal",
                "heptanal",
                "hexadecanal",
                "hexanal",
                "icosan-1-ol",
                "nonanal",
                "octadecanal",
                "octanal",
                "p-tolualdehyde",
                "pentadecanal",
                "pentadecanol",
                "pentanal",
                "phenylacetaldehyde",
                "tetradecanal",
                "tridecanal",
                "undecanal",
                "undecyl butyrate"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetone (CH3COCH3)",
            "Acetaldehyde (CH3CHO)",
            "Acetophenone (C6H5C(O)CH3)",
            "Benzaldehyde (C6H5CHO)",
            "Butanone (CH3COCH2CH3)",
            "Cyclohexanone (C6H10O)",
            "Acrolein (CH2=CHCHO)",
            "Propanal (CH3CH2CHO)",
            "Propiophenone (C6H5C(O)CH2CH3)",
            "Butanal (CH3(CH2)2CHO)",
            "Benzophenone (C6H5C(O)C6H5)",
            "Methyl ethyl ketone (CH3COCH2CH3)",
            "Propionaldehyde (CH3CH2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Malondialdehyde (CH2(CHO)2)",
            "Glyoxal (OHCCHO)",
            "Isobutyraldehyde (CH3CH(CH3)CHO)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Methyl isobutyl ketone (CH3COCH2CH(CH3)2)",
            "Pentanal (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetone (CH3COCH3)",
                "Acetaldehyde (CH3CHO)",
                "Acetophenone (C6H5C(O)CH3)",
                "Benzaldehyde (C6H5CHO)",
                "Butanone (CH3COCH2CH3)",
                "Cyclohexanone (C6H10O)",
                "Acrolein (CH2=CHCHO)",
                "Propanal (CH3CH2CHO)",
                "Propiophenone (C6H5C(O)CH2CH3)",
                "Benzophenone (C6H5C(O)C6H5)",
                "Glyoxal (OHCCHO)",
                "Isobutyraldehyde (CH3CH(CH3)CHO)",
                "Methyl isobutyl ketone (CH3COCH2CH(CH3)2)",
                "Pentanal (CH3(CH2)3CHO)",
                "Valeraldehyde (CH3(CH2)3CHO)"
            ],
            "mismatches": [
                "Butanal (CH3(CH2)2CHO)",
                "Methyl ethyl ketone (CH3COCH2CH3)",
                "Propionaldehyde (CH3CH2CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Malondialdehyde (CH2(CHO)2)",
                "Butyraldehyde (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)"
            ],
            "true_referents": [
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "(S)-methylmalonaldehydic acid",
                "2',6'-dimethoxy-4'-hydroxyacetophenone",
                "2'-methyl-Propiophenone",
                "2,2-dimethylpropanal",
                "2,3',4,6-tetrahydroxybenzophenone",
                "2,3,4,5-tetrahydroxypentanal",
                "2-Butoxyethyl acetate",
                "3,4-dimethoxy cinnamaldehyde",
                "3,4-dimethyl valeric acid",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-(3-hydroxyphenyl)propanoic acid",
                "3-Methyl-1-phenyl-1-butanone",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-[(4-aminobutyl)amino]propanal",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "4,4-dimethyl valeric acid",
                "4,6-heptadienal",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "4-propoxy-2-butanone",
                "BenzAldehyde, 3-ethyl-",
                "Benzyl 2,3-dimethyl-2-butenoate",
                "Decanoyl acetaldehyde",
                "Ethyl isobutyl ketone",
                "Ethyl isopropyl ketone",
                "Isopentyl 3-methyl-2-butenoate",
                "L-trans-4-Methyl-2-pyrrolidinecarboxylic acid",
                "Methyl isobutyl ketone",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acrolein",
                "aldehyde",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "butanone",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "formaldehyde",
                "glyoxal",
                "hydroxycyclohexanone",
                "isobutyraldehyde",
                "malonaldehyde",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "pentanal",
                "propanal",
                "propiophenone",
                "valeraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (Propanone)",
            "Formaldehyde (Methanal)",
            "Acetaldehyde (Ethanal)",
            "Benzaldehyde",
            "Cyclohexanone",
            "Benzophenone",
            "Acetic Acid (Ethanoic Acid)",
            "Propionaldehyde (Propanal)",
            "Butanone (Methyl Ethyl Ketone)",
            "Glutaraldehyde",
            "Succinyl-CoA",
            "Malonaldehyde",
            "Furfural",
            "Pyranone",
            "Diacetyl",
            "Isocaproic Acid",
            "Stearoyl-CoA",
            "Lactone",
            "Nicotinamide Adenine Dinucleotide (NAD+)",
            "Pyruvate",
            "Citral",
            "3-Buten-2-one",
            "Camphor",
            "Methyl Acrylate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (Propanone)",
                "Formaldehyde (Methanal)",
                "Acetaldehyde (Ethanal)",
                "Benzaldehyde",
                "Cyclohexanone",
                "Benzophenone",
                "Acetic Acid (Ethanoic Acid)",
                "Propionaldehyde (Propanal)",
                "Butanone (Methyl Ethyl Ketone)",
                "Glutaraldehyde",
                "Succinyl-CoA",
                "Malonaldehyde",
                "Furfural",
                "Isocaproic Acid",
                "Stearoyl-CoA",
                "Lactone",
                "Methyl Acrylate"
            ],
            "mismatches": [
                "Pyranone",
                "Diacetyl",
                "Nicotinamide Adenine Dinucleotide (NAD+)",
                "Pyruvate",
                "Citral",
                "3-Buten-2-one",
                "Camphor"
            ],
            "true_referents": [
                "1,3-Diacetylpropane",
                "2-pyranones",
                "3-Methyl-1-phenyl-1-butanone",
                "4-propoxy-2-butanone",
                "4-pyranones",
                "BenzAldehyde, 3-ethyl-",
                "Ethyl acrylate",
                "Furfuryl pentanoate",
                "Glycerol 1-propanoate diacetate",
                "Lactone II",
                "Methyl acrylate",
                "Pyranone C",
                "acetaldehyde",
                "acetic acid",
                "acetic acid-d4",
                "acetoacetic acid",
                "acetone",
                "acetyl",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "benzophenones",
                "butanone",
                "buten-2-one",
                "butyl acrylate",
                "camphor",
                "citronellal",
                "cyclohexanone",
                "cyclohexanones",
                "dialdehyde",
                "ethenone",
                "formaldehyde",
                "furfural",
                "glutaraldehyde",
                "glutaramic acid",
                "hydroxycyclohexanone",
                "isocaproic acid",
                "lactaldehyde",
                "lactone",
                "malonaldehyde",
                "methyl pyruvate",
                "nicotinamide",
                "nicotinic acid",
                "odoratol",
                "propanal",
                "propanals",
                "stearoyl-CoA",
                "succinyl-CoA"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Formaldehyde (CH2O)",
            "Benzaldehyde (C6H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Cyclohexanone (C6H10O)",
            "Acrolein (CH2CHCHO)",
            "Propanal (C2H5CHO)",
            "Acetophenone (C6H5COCH3)",
            "Glyoxal (OCHCHO)",
            "Methyl ethyl ketone (MEK)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Furfural (C5H4O2)",
            "Glucose (C6H12O6)",
            "Pyruvic acid (CH3COCOOH)",
            "Urea (NH2CONH2)",
            "Acetylacetone (CH3COCH2COCH3)",
            "Vanillin (C8H8O3)",
            "Citral (C10H16O)",
            "Camphor (C10H16O)",
            "Acetic anhydride ((CH3CO)2O)",
            "Isovaleraldehyde ((CH3)2CHCH2CHO)",
            "Methylglyoxal (CH3COCHO)",
            "Acetylsalicylic acid (Aspirin)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaldehyde (CH3CHO)",
                "Acetone (CH3COCH3)",
                "Formaldehyde (CH2O)",
                "Benzaldehyde (C6H5CHO)",
                "Cyclohexanone (C6H10O)",
                "Acrolein (CH2CHCHO)",
                "Propanal (C2H5CHO)",
                "Acetophenone (C6H5COCH3)",
                "Glyoxal (OCHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Furfural (C5H4O2)",
                "Pyruvic acid (CH3COCOOH)",
                "Acetylacetone (CH3COCH2COCH3)",
                "Vanillin (C8H8O3)",
                "Camphor (C10H16O)",
                "Acetic anhydride ((CH3CO)2O)",
                "Methylglyoxal (CH3COCHO)",
                "Acetylsalicylic acid (Aspirin)"
            ],
            "mismatches": [
                "Butyraldehyde (C3H7CHO)",
                "Methyl ethyl ketone (MEK)",
                "Glucose (C6H12O6)",
                "Urea (NH2CONH2)",
                "Citral (C10H16O)",
                "Isovaleraldehyde ((CH3)2CHCH2CHO)"
            ],
            "true_referents": [
                "(2E)-2-hexenyl isovalerate",
                "(E)-cinnamaldehyde",
                "(Z)-7-Dodecenyl butyrate",
                "(Z)-7-Octadecenyl butyrate",
                "3,6-anhydro-D-glucose",
                "3-propylidene-delta1-pyrroline-5-carboxylic acid",
                "4-acetyl-4-(ethoxycarbonyl)heptanedioic acid",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "5-ureidoimidazole-4-carboxylic acid",
                "6-phospho-2-dehydro-D-gluconic acid",
                "7-Hexadecenyl acetate",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Isopropyl cinnamate",
                "L-Leu-L-Met-NH2",
                "Methyl isobutyl ketone",
                "acetaldehyde",
                "acetic anhydride",
                "acetic benzoic anhydride",
                "acetone",
                "acetophenone",
                "acetylacetonate",
                "acetylacetone",
                "acetylsalicylic acid",
                "acrolein",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "camphor",
                "carboxylic anhydride",
                "crotonaldehyde",
                "crotonic acid",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glyoxal",
                "hydroxycyclohexanone",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "propanal",
                "propanals",
                "pyruvic acid",
                "vanillin"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Acetone (CH3COCH3)",
            "Benzaldehyde (C6H5CHO)",
            "Butyraldehyde (C4H8O)",
            "Cinnamaldehyde (C9H8O)",
            "Cyclohexanone (C6H10O)",
            "Dimethylformamide (DMF)",
            "Ethanal (CH3CHO)",
            "Glutaraldehyde (C5H8O2)",
            "Glyoxal (C2H2O2)",
            "Hexanal (C6H12O)",
            "Isovaleraldehyde (C5H10O)",
            "Malondialdehyde (C3H4O2)",
            "Methylglyoxal (C3H4O2)",
            "Nonanal (C9H18O)",
            "Octanal (C8H16O)",
            "Pentanal (C5H10O)",
            "Propanal (C3H6O)",
            "Propionaldehyde (C3H6O)",
            "Pyruvaldehyde (C3H4O2)",
            "Retinal (C20H28O)",
            "Valeraldehyde (C5H10O)",
            "Vanillin (C8H8O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Acetone (CH3COCH3)",
                "Benzaldehyde (C6H5CHO)",
                "Cyclohexanone (C6H10O)",
                "Dimethylformamide (DMF)",
                "Glutaraldehyde (C5H8O2)",
                "Glyoxal (C2H2O2)",
                "Hexanal (C6H12O)",
                "Methylglyoxal (C3H4O2)",
                "Pentanal (C5H10O)",
                "Propanal (C3H6O)",
                "Valeraldehyde (C5H10O)",
                "Vanillin (C8H8O3)"
            ],
            "mismatches": [
                "Butyraldehyde (C4H8O)",
                "Cinnamaldehyde (C9H8O)",
                "Ethanal (CH3CHO)",
                "Isovaleraldehyde (C5H10O)",
                "Malondialdehyde (C3H4O2)",
                "Nonanal (C9H18O)",
                "Octanal (C8H16O)",
                "Propionaldehyde (C3H6O)",
                "Pyruvaldehyde (C3H4O2)",
                "Retinal (C20H28O)"
            ],
            "true_referents": [
                "(3R)-all-trans-3-hydroxyretinal",
                "(3S)-all-trans-3-hydroxyretinal",
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "1-hydroxy-3-propoxyacetone",
                "2,3,4,5-tetrahydroxypentanal",
                "2-Nonylheneicosanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Methylenehexyl acetate",
                "3-[(4-aminobutyl)amino]propanal",
                "3-oxo aldehyde",
                "5-aceto valeric acid",
                "BenzAldehyde, 3-ethyl-",
                "Ethyl (E)-3-hexenoate",
                "N,N-dimethylformamide",
                "acetaldehyde",
                "acetone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "beta,beta-dimethyl valeric acid",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "dimethyl fumarate",
                "ethyl vanillin",
                "formaldehyde",
                "glutaraldehyde",
                "glutaric anhydride",
                "glyoxal",
                "hexanal",
                "hydroxycyclohexanone",
                "hydroxypyruvaldehyde",
                "isovaleric acid",
                "malonaldehyde",
                "methylglyoxal",
                "non-2-enal",
                "octanal",
                "pentanal",
                "propanal",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Benzaldehyde (C7H6O)",
            "Acetophenone (C8H8O)",
            "Acrolein (C3H4O)",
            "Glyoxal (C2H2O2)",
            "Methylglyoxal (C3H4O2)",
            "Pyruvaldehyde (C3H4O2)",
            "Furfural (C5H4O2)",
            "Cyclohexanone (C6H10O)",
            "Isophorone (C9H14O)",
            "Camphor (C10H16O)",
            "Menthone (C10H18O)",
            "Testosterone (C19H28O2)",
            "Progesterone (C21H30O2)",
            "Cortisone (C21H28O5)",
            "Estrone (C18H22O2)",
            "Androstenedione (C19H26O2)",
            "Cholesterol (C27H46O)",
            "Vitamin K1 (C31H46O2)",
            "Retinal (C20H28O)",
            "Jasmone (C11H16O)",
            "Muscone (C16H30O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Formaldehyde",
                "Acetaldehyde",
                "Benzaldehyde",
                "Acetophenone",
                "Acrolein",
                "Glyoxal",
                "Methylglyoxal",
                "Furfural",
                "Cyclohexanone",
                "Isophorone",
                "Camphor",
                "Progesterone",
                "Cortisone",
                "Estrone",
                "Jasmone",
                "Muscone"
            ],
            "mismatches": [
                "Pyruvaldehyde",
                "Menthone",
                "Testosterone",
                "Androstenedione",
                "Cholesterol",
                "Vitamin K1",
                "Retinal"
            ],
            "true_referents": [
                "(1R,4S)-1-hydroperoxy-p-menth-2-en-8-ol acetate",
                "(2Z,4E)-4-hydroxymuconic semialdehyde",
                "(3R)-all-trans-3-hydroxyretinal",
                "(3R,5S)-1-pyrroline-3-hydroxy-5-carboxylic acid",
                "(3S)-all-trans-3-hydroxyretinal",
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-jasmolactone",
                "(E)-testosterone-3-CMO-Hist",
                "2-Hydroxy-vitamin K",
                "4,5alpha-Dihydro-2-(hydroxymethylene)testosterone",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "7alpha,25-dihydroxy-4-cholesten-3-one",
                "BenzAldehyde, 3-ethyl-",
                "Bis(2-methylpropanoyloxy)-9,10-epoxy-p-mentha-1,3,5-triene",
                "Jasmone",
                "Muscone",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acetophenones",
                "acrolein",
                "aldehyde",
                "androsta-1,4-diene-3,17-dione",
                "benzaldehyde",
                "benzaldehydes",
                "camphor",
                "cortisone",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "estrone",
                "estrone 3-O-(beta-D-glucuronide)",
                "formaldehyde",
                "furfural",
                "glyoxal",
                "hydroxycyclohexanone",
                "hydroxypyruvaldehyde",
                "isophorone",
                "isophorone diisocyanate",
                "methylglyoxal",
                "progesterone",
                "trans-Jasmone",
                "vitamin K"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Acetic acid (CH3COOH)",
            "Acetyl coenzyme A (Acetyl-CoA)",
            "Benzaldehyde (C6H5CHO)",
            "Butanone (CH3COC2H5)",
            "Citric acid (C6H8O7)",
            "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
            "Ethyl acetate (CH3COOC2H5)",
            "Formaldehyde (HCHO)",
            "Formic acid (HCOOH)",
            "Fructose (C6H12O6)",
            "Glucose (C6H12O6)",
            "Glutaraldehyde (OHC(CH2)3CHO)",
            "Glyceraldehyde (C3H6O3)",
            "Glyoxal (CHOCHO)",
            "Isopropyl alcohol (CH3CHCH3)",
            "Lactic acid (CH3CHOHCOOH)",
            "Malondialdehyde (CH2(CHO)2)",
            "Oxaloacetic acid (C4H4O5)",
            "Pyruvic acid (CH3COCOOH)",
            "Succinic acid (C4H6O4)",
            "Vanillin (C8H8O3)",
            "Xylose (C5H10O5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetyl coenzyme A (Acetyl-CoA)",
                "acetaldehyde",
                "acetic acid",
                "acetone",
                "benzaldehyde",
                "butanone",
                "citric acid",
                "ethyl acetate",
                "formaldehyde",
                "formic acid",
                "glutaraldehyde",
                "glyoxal",
                "oxaloacetic acid",
                "pyruvic acid",
                "succinic acid",
                "vanillin"
            ],
            "mismatches": [
                "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
                "Fructose (C6H12O6)",
                "Glucose (C6H12O6)",
                "Glyceraldehyde (C3H6O3)",
                "Isopropyl alcohol (CH3CHCH3)",
                "Lactic acid (CH3CHOHCOOH)",
                "Malondialdehyde (CH2(CHO)2)",
                "Xylose (C5H10O5)"
            ],
            "true_referents": [
                "(R)-lactic acid",
                "(S)-methylmalonaldehydic acid",
                "(homo)(2-methyl)citric acid",
                "1,5-anhydro-D-fructose",
                "2,2-dimethylsuccinic acid",
                "2-O-(4-O-Methyl-a-D-glucopyranuronosyl)-D-xylose",
                "2-[(sulfooxy)methyl]butanoic acid",
                "3,4,5,6-tetrahydroxyoxane-2-carboxylic acid",
                "3,6-anhydro-D-glucose",
                "3-Methyl-1-phenyl-1-butanone",
                "3-O-alpha-D-Glucopyranuronosyl-D-xylose",
                "3-deoxy-keto-D-fructose",
                "3-hydroxy-2-(hydroxymethyl)-2-[(sulfooxy)methyl]propanoic acid",
                "4-propoxy-2-butanone",
                "6-[(2-carboxyacetyl)oxy]-3,4,5-trihydroxyoxane-2-carboxylic acid",
                "6-phospho-2-dehydro-D-gluconic acid",
                "Acetyl coenzyme A (Acetyl-CoA)",
                "BenzAldehyde, 3-ethyl-",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "Ethyl isopropyl ketone",
                "Isopropyl hexanoate",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetyl-CoA",
                "aldehyde",
                "alpha-beta-Xylose Acetate",
                "benzaldehyde",
                "benzaldehydes",
                "butanone",
                "citric acid",
                "citric acid-d4",
                "dialdehyde",
                "ethyl acetate",
                "ethyl vanillin",
                "formaldehyde",
                "formic acid",
                "formic acid-d2",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glyoxal",
                "isopropyl acetate",
                "malonaldehyde",
                "methylglyoxal",
                "oxaloacetic acid",
                "pyruvic acid",
                "succinic acid",
                "vanillin"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (Me2CO)",
            "Acrolein",
            "Aldehydes",
            "Alizarin",
            "Anthranilic acid",
            "Arsanilic acid",
            "Aspirin",
            "Benzaldehyde",
            "Benzoin",
            "Benzophenone",
            "Benzyl cyanide",
            "Camphor",
            "Cinnamic acid",
            "Citral",
            "Citronellal",
            "Crotonaldehyde",
            "Ethanal (Acetaldehyde)",
            "Ethyl formate",
            "Ferulic acid",
            "Furfural",
            "Indole-3-carboxaldehyde",
            "Isobutyraldehyde",
            "Isophthalic acid",
            "Malonaldehyde",
            "Methyl salicylate",
            "Phenylacetaldehyde",
            "Phenylglyoxal",
            "Quercetin",
            "Salicylaldehyde",
            "Styrene",
            "Vanillin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrolein",
                "Alizarin",
                "Anthranilic acid",
                "Benzaldehyde",
                "Benzoin",
                "Benzophenone",
                "Camphor",
                "Cinnamic acid",
                "Citronellal",
                "Crotonaldehyde",
                "Ethyl formate",
                "Furfural",
                "Isobutyraldehyde",
                "Isophthalic acid",
                "Malonaldehyde"
            ],
            "mismatches": [
                "Acetone (Me2CO)",
                "Aldehydes",
                "Arsanilic acid",
                "Aspirin",
                "Benzyl cyanide",
                "Citral",
                "Ethanal (Acetaldehyde)",
                "Ferulic acid",
                "Indole-3-carboxaldehyde",
                "Methyl salicylate",
                "Phenylacetaldehyde",
                "Phenylglyoxal",
                "Quercetin",
                "Salicylaldehyde",
                "Styrene",
                "Vanillin"
            ],
            "true_referents": [
                "(R)-(+)-citronellal",
                "(S)-(-)-citronellal",
                "1-pyrrolinecarboxylic acid",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "9-dimethylarsinoyl-nonanoic acid",
                "BenzAldehyde, 3-ethyl-",
                "Furfuryl pentanoate",
                "acetaldehyde",
                "acetaldehyde (E)-hydrazone",
                "acetone",
                "acetylsalicylic acid",
                "acrolein",
                "aldehyde",
                "alizarin",
                "alizarin blue",
                "alizarin complexone",
                "anthranilic acid",
                "arsenoacetic acid",
                "benzaldehyde",
                "benzaldehydes",
                "benzoin",
                "benzoins",
                "benzophenone",
                "benzophenones",
                "benzoyl cyanide",
                "camphor",
                "cinnamic acid",
                "cinnamic acids",
                "cis-cinnamic acid",
                "citronellal",
                "crotonaldehyde",
                "crotonic acid",
                "dialdehyde",
                "ethenone",
                "ethyl formate",
                "ferulic acid",
                "ferulic acids",
                "feruloylacetic acid",
                "furfural",
                "indol-3-yl carboxylic acid",
                "indole-3-carbaldehyde",
                "indole-3-carboxylic acid",
                "isobutyl acetate",
                "isobutyraldehyde",
                "isophthalic acid",
                "malonaldehyde",
                "methyl anthranilate",
                "odoratol"
            ],
            "TP": 15,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (ACN)",
            "Acetaldehyde (ACD)",
            "Formaldehyde (FA)",
            "Glucose (GLC)",
            "Fructose (FRU)",
            "Galactose (GAL)",
            "Sucrose (SUC)",
            "Starch (STR)",
            "Cellulose (CEL)",
            "Chitin (CHT)",
            "Retinal (RET)",
            "Retinol (ROL)",
            "Vitamin C (ASC)",
            "Warfarin (WAR)",
            "Diclofenac (DCF)",
            "Ibuprofen (IBP)",
            "Acetaminophen (ACE)",
            "Paracetamol (PAR)",
            "Camphor (CMP)",
            "Menthol (MEN)",
            "Thalidomide (THL)",
            "Fluoxetine (FLT)",
            "Sertraline (SRT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (ACN)",
                "Acetaldehyde (ACD)",
                "Formaldehyde (FA)",
                "Retinal (RET)",
                "Vitamin C (ASC)",
                "Diclofenac (DCF)",
                "Ibuprofen (IBP)",
                "Camphor (CMP)"
            ],
            "mismatches": [
                "Glucose (GLC)",
                "Fructose (FRU)",
                "Galactose (GAL)",
                "Sucrose (SUC)",
                "Starch (STR)",
                "Cellulose (CEL)",
                "Chitin (CHT)",
                "Retinol (ROL)",
                "Warfarin (WAR)",
                "Acetaminophen (ACE)",
                "Paracetamol (PAR)",
                "Menthol (MEN)",
                "Thalidomide (THL)",
                "Fluoxetine (FLT)",
                "Sertraline (SRT)"
            ],
            "true_referents": [
                "(R)-warfarin",
                "(S)-warfarin",
                "1,5-anhydro-D-fructose",
                "1-O-all-trans-retinoyl-beta-glucuronic acid",
                "3',6-Disinapoyl sucrose",
                "3,6'-Disinapoyl sucrose",
                "3,6-anhydro-D-galactose",
                "3-deoxy-keto-D-fructose",
                "4-O-alpha-D-Glucopyranuronosyl-D-galactose",
                "5-(2,2-diferuloylethen-1-yl)thalidomide",
                "Chitinovorin D",
                "D-ascorbic acid",
                "D-glucopyranuronic acid",
                "acetaldehyde",
                "acetaminosalol",
                "acetone",
                "aldehyde",
                "ascorbic acid",
                "camphor",
                "carboxymethylcellulose",
                "cellulose acetate",
                "dTDP-3-dehydro-6-deoxy-alpha-D-galactose",
                "dabigatran",
                "diclofenac",
                "fluvoxamine",
                "formaldehyde",
                "galactaric acid",
                "glucaric acid",
                "ibuprofen",
                "lactaldehyde",
                "lofepramine",
                "menthyl salicylate",
                "mucidin",
                "pralatrexate",
                "propacetamol",
                "retinal",
                "retinals",
                "retinyl acetate",
                "succinic acid-d4",
                "vitamin C"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "acetaldehyde (EtCHO)",
                "Canonical name": "ethanal"
            },
            {
                "Referent": "acetone (Me2CO)",
                "Canonical name": "propanone"
            },
            {
                "Referent": "benzaldehyde (C6H5CHO)",
                "Canonical name": "benzaldehyde"
            },
            {
                "Referent": "butanal (Me2CH2CH2CHO)",
                "Canonical name": "butanal"
            },
            {
                "Referent": "butyraldehyde (Me2CH(CH3)CHO)",
                "Canonical name": "butyraldehyde"
            },
            {
                "Referent": "carbon monoxide (CO)",
                "Canonical name": "carbon monoxide"
            },
            {
                "Referent": "carbonyl chloride (COCl2)",
                "Canonical name": "carbonyl chloride"
            },
            {
                "Referent": "carbonyl sulfide (COS)",
                "Canonical name": "carbonyl sulfide"
            },
            {
                "Referent": "cyanide (CN-)",
                "Canonical name": "cyanide"
            },
            {
                "Referent": "formaldehyde (CH2O)",
                "Canonical name": "methanal"
            },
            {
                "Referent": "heptanal (C7H14CHO)",
                "Canonical name": "heptanal"
            },
            {
                "Referent": "hexanal (C6H12CHO)",
                "Canonical name": "hexanal"
            },
            {
                "Referent": "isobutyraldehyde (Me3CH(CH3)CHO)",
                "Canonical name": "isobutyraldehyde"
            },
            {
                "Referent": "methylglyoxal (CH3C(O)CHO)",
                "Canonical name": "methylglyoxal"
            },
            {
                "Referent": "oxalaldehyde (CH3C(O)CHO)",
                "Canonical name": "oxalaldehyde"
            },
            {
                "Referent": "propanal (CH3CH2CHO)",
                "Canonical name": "propanal"
            },
            {
                "Referent": "propanone (Me2CO)",
                "Canonical name": "propanone"
            },
            {
                "Referent": "propyne (C3H2)",
                "Canonical name": "propyne"
            },
            {
                "Referent": "pyruvic aldehyde (CH3C(O)COC2H5)",
                "Canonical name": "pyruvic aldehyde"
            },
            {
                "Referent": "thioacetaldehyde (CH3CS)",
                "Canonical name": "thioacetaldehyde"
            },
            {
                "Referent": "valeraldehyde (C5H10CHO)",
                "Canonical name": "valeraldehyde"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "benzaldehyde",
                "formaldehyde",
                "methylglyoxal",
                "valeraldehyde"
            ],
            "mismatches": [
                "acetone",
                "butanal",
                "butyraldehyde",
                "carbon monoxide",
                "carbonyl chloride",
                "carbonyl sulfide",
                "cyanide",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "oxalaldehyde",
                "propanal",
                "propanone",
                "propyne",
                "pyruvic aldehyde",
                "thioacetaldehyde"
            ],
            "true_referents": [
                "(+/-)-Ethyl 2-hydroxy-2-methylbutyrate",
                "(+/-)-Ethyl 2-hydroxy-3-methylvalerate",
                "(+/-)-ethylitaconic acid",
                "(-)-camphanic acid",
                "(2'S,3'S)-[(2E,4E)-2-methyl-hexa-2,4-dienoic acid isoleucinaldehyde]",
                "(2Z,4Z)-2,3-dichloromuconic acid",
                "(8E)-heptadecenal",
                "(E,E)-2,4-Hexadienedial",
                "(S)-2-mercaptopropanoic acid",
                "(S)-3-Methylthiohexyl acetate",
                "(S)-4,5-dihydroxypentane-2,3-dione",
                "(S)-dichlorprop",
                "(S)-lactaldehyde",
                "(S)-mandelic acid",
                "(Z)-non-6-enal",
                "(all-E)-1,8,10-Heptadecatriene-4,6-diyne-3,12-diol",
                "(all-Z)-8,11,14-Heptadecatrienal",
                "1-Heptadecene-4,6-diyne-3,9-diol",
                "2,6-dimethyl-hexadecanoic acid",
                "2-Methylbutyl 3-methylbutanoate",
                "3-[(4-aminobutyl)amino]propanal",
                "3-butene-1,2,3-tricarboxylic acid",
                "3-hydroxybutane-1,2,3-tricarboxylic acid",
                "5beta-Cholane-3alpha,6alpha,24-triol",
                "BenzAldehyde, 3-ethyl-",
                "Butanoic acid, 2,2-dimethyl-3-oxo-, methyl ester",
                "Methyl oxalate",
                "acetaldehyde",
                "aldehyde",
                "benzaldehyde",
                "benzene-1,3,5-tricarboxylic acid",
                "beta,beta-dimethyl valeric acid",
                "formaldehyde",
                "methyl formate",
                "methyl ketone",
                "methylglyoxal",
                "valeraldehyde"
            ],
            "TP": 5,
            "FP": 16,
            "FN": 0
        }
    }
]