[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Glyoxal (OHCCHO)",
            "Pyruvaldehyde (CH3C(OH)COH)",
            "Benzaldehyde (C6H5CHO)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexaldehyde (CH3(CH2)4CHO)",
            "Heptaldehyde (CH3(CH2)5CHO)",
            "Octaldehyde (CH3(CH2)6CHO)",
            "Nonaldehyde (CH3(CH2)7CHO)",
            "Decaldehyde (CH3(CH2)8CHO)",
            "Acrolein (CH2=CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Acetol (CH3CH(OH)CHO)",
            "Methacrolein (CH2=C(CH3)CHO)",
            "Glyoxaldehyde (CH2OCHO)",
            "Sorbitaldehyde (CH2OHCH2CH2CH2CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Vanillin (OHC6H2(OH)CH=O)",
            "Salicylaldehyde (OHC6H3(OH)CHO)",
            "Cyclohexanecarboxaldehyde (C6H11CHO)",
            "2-Furanmethanol (OHCCH2OH)",
            "2-Furanmethanal (OHCCH2CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "formaldehyde",
                "acetaldehyde",
                "glyoxal",
                "benzaldehyde",
                "valeraldehyde",
                "acrolein",
                "crotonaldehyde",
                "methacrolein",
                "salicylaldehyde"
            ],
            "mismatches": [
                "pyruvaldehyde",
                "butyraldehyde",
                "hexaldehyde",
                "heptaldehyde",
                "octaldehyde",
                "nonaldehyde",
                "decaldehyde",
                "acetol",
                "glyoxaldehyde",
                "sorbitaldehyde",
                "cinnamaldehyde",
                "vanillin",
                "cyclohexanecarboxaldehyde",
                "2-furanmethanol",
                "2-furanmethanal"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(4-hydroxy-3-methoxyphenyl)acetaldehyde",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-2-pentadecylheptadec-2-enal",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "(E,E)-2,4-Hexadienedial",
                "(S)-2-methylbutanal",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,4-Dimethyl-2E,4E-hexadienal",
                "2,4-dihydroxy-5-methyl-6-oxo-2,4-hexadienoic acid",
                "2,5-diformylfuran",
                "2,6,6-Trimethyl-1-cyclohexen-1-acetaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5-Dimethylhexanal",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-acetoxy benzaldehyde",
                "5-Heptenal, 2,6-dimethyl-",
                "5-Methoxymethyl furfural",
                "5-[(5-hydroxymethyl-furan-2-yl)methoxy-methoxymethyl]-furan-2-carbaldehyde",
                "5-methyl-2-furaldehyde",
                "6-oxohexanoic acid",
                "Decanoyl acetaldehyde",
                "Methacrolein",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "cis-8-decenal",
                "crotonaldehyde",
                "cyclohexanecarbaldehyde",
                "dialdehyde",
                "ethyl glyoxylate",
                "ethyl vanillin",
                "formaldehyde",
                "glyoxal",
                "hydroxypyruvaldehyde",
                "isobutyraldehyde",
                "methylglyoxal",
                "o-tolualdehyde",
                "phenylacetaldehyde",
                "pyridine-3-carbaldehyde",
                "salicylaldehyde",
                "trans-3-Chloroallyl aldehyde",
                "valeraldehyde",
                "vanillin acetate"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Glyoxal (OCHCHO)",
            "Glutaraldehyde (C5H8O2)",
            "Vanillin (4-hydroxy-3-methoxybenzaldehyde)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Propionaldehyde (C2H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Valeraldehyde (C4H9CHO)",
            "Caproaldehyde (C5H11CHO)",
            "Hexanal (C5H11CHO)",
            "Heptanal (C6H13CHO)",
            "Octanal (C7H15CHO)",
            "Nonanal (C8H17CHO)",
            "Decanal (C9H19CHO)",
            "Cyclohexanecarboxaldehyde (C7H12O)",
            "Furfural (C5H4O2)",
            "Glycolaldehyde (C2H4O2)",
            "Mandelaldehyde (C8H8O2)",
            "Salicylaldehyde (C7H6O2)",
            "Cinnamylaldehyde (C9H10O)",
            "Cinnamal (C9H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Glyoxal (OCHCHO)",
                "Glutaraldehyde (C5H8O2)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Valeraldehyde (C4H9CHO)",
                "Hexanal (C5H11CHO)",
                "Heptanal (C6H13CHO)",
                "Octanal (C7H15CHO)",
                "Furfural (C5H4O2)",
                "Glycolaldehyde (C2H4O2)",
                "Salicylaldehyde (C7H6O2)"
            ],
            "mismatches": [
                "Vanillin (4-hydroxy-3-methoxybenzaldehyde)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Propionaldehyde (C2H5CHO)",
                "Butyraldehyde (C3H7CHO)",
                "Caproaldehyde (C5H11CHO)",
                "Nonanal (C8H17CHO)",
                "Decanal (C9H19CHO)",
                "Cyclohexanecarboxaldehyde (C7H12O)",
                "Mandelaldehyde (C8H8O2)",
                "Cinnamylaldehyde (C9H10O)",
                "Cinnamal (C9H8O)"
            ],
            "true_referents": [
                "(4S,5S)-4,5-dihydroxy-2,6-dioxohexanoic acid",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(S)-methylmalonaldehydic acid",
                "2,6,6-Trimethyl-1-cyclohexen-1-acetaldehyde",
                "2-Nonylheneicosanal",
                "2-hydroxy caproaldehyde",
                "3,4-dihydroxymandelaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-hydroxy caproaldehyde",
                "5-Methoxymethyl furfural",
                "5-hydroxy caproaldehyde",
                "5-hydroxymethylfurfural",
                "Decanoyl acetaldehyde",
                "L-glutamic 5-semialdehyde",
                "acetaldehyde",
                "aliphatic aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "crotonaldehyde",
                "cyclohexanecarbaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "methylglyoxal",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "phenylacetaldehyde",
                "salicylaldehyde",
                "valeraldehyde",
                "vanillin acetate",
                "veratraldehyde"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (Methanolal)",
            "Acetaldehyde",
            "Propionaldehyde",
            "Butyraldehyde",
            "Benzaldehyde",
            "Cinnamaldehyde",
            "Glutaraldehyde",
            "Glyceraldehyde",
            "Isobutyraldehyde",
            "Isovaleraldehyde",
            "Malonaldehyde",
            "Methyl glyoxal",
            "Phenylacetaldehyde",
            "Retinal",
            "Vanillin",
            "4-Hydroxy-2-nonenal",
            "4-Hydroxyhexenal",
            "4-Oxo-2-nonenal",
            "Acrolein",
            "Crotonaldehyde",
            "Furfural",
            "Hexanal",
            "Heptanal",
            "Nonanal"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaldehyde",
                "Benzaldehyde",
                "Glutaraldehyde",
                "Isobutyraldehyde",
                "Malonaldehyde",
                "Phenylacetaldehyde",
                "Retinal",
                "Vanillin",
                "Acrolein",
                "Crotonaldehyde",
                "Furfural",
                "Hexanal",
                "Heptanal",
                "Nonanal"
            ],
            "mismatches": [
                "Formaldehyde (Methanolal)",
                "Propionaldehyde",
                "Butyraldehyde",
                "Cinnamaldehyde",
                "Glyceraldehyde",
                "Isovaleraldehyde",
                "Methyl glyoxal",
                "4-Hydroxy-2-nonenal",
                "4-Hydroxyhexenal",
                "4-Oxo-2-nonenal"
            ],
            "true_referents": [
                "(E)-4-oxonon-2-enal",
                "(E)-cinnamaldehyde",
                "(R)-alpha-campholenaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "4-Hydroperoxy-2-nonenal",
                "4-hydroxyhexenal",
                "4-hydroxyindole-3-carbaldehyde",
                "4-hydroxynonanal",
                "4-hydroxynonenal",
                "4-oxo-2E-Decenal",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "D-glyceraldehyde 3-phosphate",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "acrolein",
                "aldehydic acid",
                "benzaldehyde",
                "benzaldehydes",
                "cinnamaldehydes",
                "crotonaldehyde",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glyoxal",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "lactaldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "nonanal",
                "phenylacetaldehyde",
                "pyrrole-2-carboxaldehyde",
                "retinal",
                "retinals",
                "tolualdehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propanal (CH3CH2CHO)",
            "Butanal (CH3CH2CH2CHO)",
            "Pentanal (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Glyoxal (OHC-CHO)",
            "Glycolaldehyde (HOCH2CHO)",
            "Furfural (2-Furaldehyde)",
            "Sinapaldehyde (3,5-Dimethoxy-4-hydroxybenzaldehyde)",
            "Citral (3,7-Dimethyl-2,6-octadienal)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Acrolein (CH2=CHCHO)",
            "Isobutyraldehyde ((CH3)2CHCHO)",
            "2-Methylbutanal (CH3CH(CH3)CH2CHO)",
            "3-Methylbutanal (CH3CH2CH(CH3)CHO)",
            "Phenylacetaldehyde (C6H5CH2CHO)",
            "Malonaldehyde (CH2(CHO)2)",
            "Glutaraldehyde (OHC(CH2)3CHO)",
            "Formylmethionine (N-Formylmethionine)",
            "Pyruvaldehyde (CH3COCHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Propanal (CH3CH2CHO)",
                "Butanal (CH3CH2CH2CHO)",
                "Pentanal (CH3(CH2)3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Glyoxal (OHC-CHO)",
                "Glycolaldehyde (HOCH2CHO)",
                "Furfural (2-Furaldehyde)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Acrolein (CH2=CHCHO)",
                "Isobutyraldehyde ((CH3)2CHCHO)",
                "2-Methylbutanal (CH3CH(CH3)CH2CHO)",
                "3-Methylbutanal (CH3CH2CH(CH3)CHO)",
                "Phenylacetaldehyde (C6H5CH2CHO)",
                "Malonaldehyde (CH2(CHO)2)",
                "Glutaraldehyde (OHC(CH2)3CHO)"
            ],
            "mismatches": [
                "Hexanal (CH3(CH2)4CHO)",
                "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Sinapaldehyde (3,5-Dimethoxy-4-hydroxybenzaldehyde)",
                "Citral (3,7-Dimethyl-2,6-octadienal)",
                "Formylmethionine (N-Formylmethionine)",
                "Pyruvaldehyde (CH3COCHO)"
            ],
            "true_referents": [
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(E)-sinapaldehyde",
                "(S)-2-methylbutanal",
                "(S)-methylmalonaldehydic acid",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,2-dimethylpropanal",
                "2,3,4,5-tetrahydroxypentanal",
                "2-hydroxyphenylacetaldehyde",
                "2-methylbutanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methylhexanal",
                "3-[(4-aminobutyl)amino]propanal",
                "3-furaldehyde",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "3-methylbutanal",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-formyl-2-methoxyphenyl L-menthyl glutarate",
                "4-methylpentanal",
                "5-formylcytosine",
                "5-methyl-2-furaldehyde",
                "Decanoyl acetaldehyde",
                "N-formyl-L-tyrosine",
                "acetaldehyde",
                "acrolein",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butanal",
                "crotonaldehyde",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "formylmethyl",
                "furfural",
                "glutaraldehyde",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "hydroxypyruvaldehyde",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "o-tolualdehyde",
                "pentanal",
                "phenylacetaldehyde",
                "propanal",
                "pyridine-3-carbaldehyde",
                "vanillin acetate"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (C2H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Isobutyraldehyde (C3H7CHO)",
            "Valeraldehyde (C4H9CHO)",
            "Isovaleraldehyde (C4H9CHO)",
            "Caproaldehyde (C5H11CHO)",
            "Heptanal (C6H13CHO)",
            "Octanal (C7H15CHO)",
            "Nonanal (C8H17CHO)",
            "Decanal (C9H19CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Furfural (C4H3OCHO)",
            "Glyoxal (C2H2O2)",
            "Glutaraldehyde (C5H8O2)",
            "Malondialdehyde (C3H4O2)",
            "Crotonaldehyde (C4H6O)",
            "Methacrolein (C4H6O)",
            "Cinnamaldehyde (C9H8O)",
            "Vanillin (C8H8O3)",
            "Salicylaldehyde (C7H6O2)",
            "Citral (C10H16O)",
            "Pyridoxal (C8H9NO3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Isobutyraldehyde (C3H7CHO)",
                "Valeraldehyde (C4H9CHO)",
                "Heptanal (C6H13CHO)",
                "Octanal (C7H15CHO)",
                "Decanal (C9H19CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Furfural (C4H3OCHO)",
                "Glyoxal (C2H2O2)",
                "Glutaraldehyde (C5H8O2)",
                "Crotonaldehyde (C4H6O)",
                "Methacrolein (C4H6O)",
                "Vanillin (C8H8O3)",
                "Salicylaldehyde (C7H6O2)",
                "Pyridoxal (C8H9NO3)"
            ],
            "mismatches": [
                "Propionaldehyde (C2H5CHO)",
                "Butyraldehyde (C3H7CHO)",
                "Isovaleraldehyde (C4H9CHO)",
                "Caproaldehyde (C5H11CHO)",
                "Nonanal (C8H17CHO)",
                "Malondialdehyde (C3H4O2)",
                "Cinnamaldehyde (C9H8O)",
                "Citral (C10H16O)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "2-Nonylheneicosanal",
                "2-hydroxy caproaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-furaldehyde",
                "3-oxo aldehyde",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-aminovaleraldehyde",
                "4-hydroxy caproaldehyde",
                "5-Methoxymethyl furfural",
                "5-hydroxy caproaldehyde",
                "6-methoxy-2,6-dimethyloctanal",
                "Decanoyl acetaldehyde",
                "L-glutamic 5-semialdehyde",
                "Methacrolein",
                "acetaldehyde",
                "aliphatic aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyoxal",
                "heptanal",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "phenylacetaldehyde",
                "pyridoxal",
                "pyridoxal 5'-phosphate",
                "salicylaldehyde",
                "valeraldehyde",
                "vanillin",
                "veratraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (C2H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Valeraldehyde (C4H9CHO)",
            "Hexanal (C5H11CHO)",
            "Heptanal (C6H13CHO)",
            "Octanal (C7H15CHO)",
            "Nonanal (C8H17CHO)",
            "Decanal (C9H19CHO)",
            "Benzaldehyde (C7H6O)",
            "Cinnamaldehyde (C9H8O)",
            "Salicylaldehyde (C7H6O2)",
            "Furfural (C5H4O2)",
            "Acrolein (C3H4O)",
            "Isovaleraldehyde (C4H8CHO)",
            "Crotonaldehyde (C4H6O)",
            "2-Phenylacetaldehyde (C8H10O)",
            "2-Furaldehyde (C5H4O)",
            "3-Methylbutanal (C5H10O)",
            "4-Methylpentanal (C6H12O)",
            "Pentanal (C5H10O)",
            "2-Ethylhexanal (C8H16O)",
            "2-Butenal (C4H6O)",
            "2-Methylpropanal (C4H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "formaldehyde",
                "acetaldehyde",
                "valeraldehyde",
                "hexanal",
                "heptanal",
                "octanal",
                "benzaldehyde",
                "salicylaldehyde",
                "furfural",
                "acrolein",
                "crotonaldehyde",
                "3-methylbutanal",
                "4-methylpentanal",
                "pentanal",
                "2-ethylhexanal"
            ],
            "mismatches": [
                "propionaldehyde",
                "butyraldehyde",
                "nonanal",
                "decanal",
                "cinnamaldehyde",
                "isovaleraldehyde",
                "2-phenylacetaldehyde",
                "2-furaldehyde",
                "2-butenal",
                "2-methylpropanal"
            ],
            "true_referents": [
                "(+/-)-2-Methylhexanal",
                "(2E)-2-ethyl-2-butenal",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(S)-2-methylbutanal",
                "1-Ethyl-1H-pyrrole-2-carboxaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "2,5-diformylfuran",
                "2-Ethylidenehexanal",
                "2-Methylpentanal",
                "2-Nonylheneicosanal",
                "2-ethylhexanal",
                "2-hexenal",
                "2-hydroxyphenylacetaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-furaldehyde",
                "3-methylbutanal",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-aminovaleraldehyde",
                "4-methylpentanal",
                "5-Methoxymethyl furfural",
                "5-aminopentanal",
                "5-hydroxymethylfurfural",
                "5-methyl-2-furaldehyde",
                "Decanoyl acetaldehyde",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "formaldehyde",
                "furfural",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "non-2-enal",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "salicylaldehyde",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 15,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Propionaldehyde (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Pentanal (C5H10O)",
            "Hexanal (C6H12O)",
            "Heptanal (C7H14O)",
            "Octanal (C8H16O)",
            "Nonanal (C9H18O)",
            "Decanal (C10H20O)",
            "Glyoxal (C2H2O2)",
            "Glutaraldehyde (C5H8O2)",
            "Vanillin (C8H8O3)",
            "Anisaldehyde (C8H8O2)",
            "Cinnamaldehyde (C9H8O)",
            "Salicylaldehyde (C7H6O2)",
            "Benzaldehyde (C7H6O)",
            "Furfural (C5H4O2)",
            "Malondialdehyde (C3H4O2)",
            "Retinaldehyde (C20H28O)",
            "Phthalaldehyde (C8H6O2)",
            "Anthracene-9-carbaldehyde (C15H10O)",
            "Chloral hydrate (C2H3Cl3O2)",
            "Acrolein (C3H4O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Pentanal (C5H10O)",
                "Hexanal (C6H12O)",
                "Heptanal (C7H14O)",
                "Octanal (C8H16O)",
                "Decanal (C10H20O)",
                "Glyoxal (C2H2O2)",
                "Glutaraldehyde (C5H8O2)",
                "Vanillin (C8H8O3)",
                "Salicylaldehyde (C7H6O2)",
                "Benzaldehyde (C7H6O)",
                "Furfural (C5H4O2)",
                "Malondialdehyde (C3H4O2)",
                "Phthalaldehyde (C8H6O2)",
                "Acrolein (C3H4O)"
            ],
            "mismatches": [
                "Propionaldehyde (C3H6O)",
                "Butyraldehyde (C4H8O)",
                "Nonanal (C9H18O)",
                "Anisaldehyde (C8H8O2)",
                "Cinnamaldehyde (C9H8O)",
                "Retinaldehyde (C20H28O)",
                "Anthracene-9-carbaldehyde (C15H10O)",
                "Chloral hydrate (C2H3Cl3O2)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(3R)-all-trans-3-hydroxyretinal",
                "(3S)-all-trans-3-hydroxyretinal",
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "1-Ethyl-1H-pyrrole-2-carboxaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "1-naphthaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "2-Nonylheneicosanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-anhydro-D-galactose",
                "3,6-anhydro-D-glucose",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-chloroacrylaldehyde",
                "3-hydroxy-1-methoxy-9,10-dioxo-2-anthracenecarboxaldehyde",
                "3-oxo aldehyde",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "5-Methoxymethyl furfural",
                "5-aminopentanal",
                "5-hydroxymethylfurfural",
                "8-heptadecenal",
                "L-glutamic 5-semialdehyde",
                "acetaldehyde",
                "acrolein",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "cyclohexanecarbaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyoxal",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "non-2-enal",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "phthalaldehyde",
                "salicylaldehyde",
                "trichloroacetaldehyde",
                "vanillin"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Cinnamaldehyde (C9H8O)",
            "Glyceraldehyde (C3H6O3)",
            "Glutaraldehyde (C5H8O2)",
            "Malondialdehyde (C3H4O2)",
            "Acrolein (C3H4O)",
            "Crotonaldehyde (C4H6O)",
            "Furfural (C5H4O2)",
            "Vanillin (C8H8O3)",
            "Anisaldehyde (C8H8O2)",
            "Citral (C10H16O)",
            "Glyoxal (C2H2O2)",
            "Methylglyoxal (C3H4O2)",
            "Pentanal (CH3(CH2)3CHO)",
            "Phenylacetaldehyde (C8H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Valeraldehyde (CH3(CH2)3CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Glutaraldehyde (C5H8O2)",
                "Acrolein (C3H4O)",
                "Crotonaldehyde (C4H6O)",
                "Furfural (C5H4O2)",
                "Vanillin (C8H8O3)",
                "Glyoxal (C2H2O2)",
                "Methylglyoxal (C3H4O2)",
                "Pentanal (CH3(CH2)3CHO)",
                "Phenylacetaldehyde (C8H8O)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Cinnamaldehyde (C9H8O)",
                "Glyceraldehyde (C3H6O3)",
                "Malondialdehyde (C3H4O2)",
                "Anisaldehyde (C8H8O2)",
                "Citral (C10H16O)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "(S)-2-methylbutanal",
                "1-Ethyl-1H-pyrrole-2-carboxaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3,6-anhydro-D-galactose",
                "3,6-anhydro-D-glucose",
                "3-Methylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-oxo aldehyde",
                "4,6-heptadienal",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-methylpentanal",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glutamic 5-semialdehyde",
                "L-glyceraldehyde 3-phosphate",
                "Phenylglyoxal",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glyoxal",
                "malonaldehyde",
                "methylglyoxal",
                "o-tolualdehyde",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "trans-3-Chloroallyl aldehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 15,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3CH2CH2CHO)",
            "Valeraldehyde (pentanal, C4H9CHO)",
            "Hexanal (C5H11CHO)",
            "Heptanal (C6H13CHO)",
            "Octanal (C7H15CHO)",
            "Nonanal (C8H17CHO)",
            "Decanal (C9H19CHO)",
            "Cinnamaldehyde (3-Phenyl-2-propenal)",
            "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
            "Salicylaldehyde (2-Hydroxybenzaldehyde)",
            "Glycolaldehyde (HOCH2CHO)",
            "Pyridoxal (Vitamin B6 aldehyde)",
            "Retinal (Vitamin A aldehyde)",
            "Furfural (C4H3OCHO)",
            "Hydraldehyde (also known as 3,4-dihydroxybenzaldehyde)",
            "Glyceraldehyde (CH2OHCHOHCHO)",
            "Malondialdehyde (CH2(CHO)2)",
            "p-Tolualdehyde (4-Methylbenzaldehyde)",
            "Furfuraldehyde (C5H4O2)",
            "Mercaptaldehyde (HSCH2CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Valeraldehyde (pentanal, C4H9CHO)",
                "Hexanal (C5H11CHO)",
                "Heptanal (C6H13CHO)",
                "Octanal (C7H15CHO)",
                "Salicylaldehyde (2-Hydroxybenzaldehyde)",
                "Glycolaldehyde (HOCH2CHO)",
                "Pyridoxal (Vitamin B6 aldehyde)",
                "Furfural (C4H3OCHO)",
                "p-Tolualdehyde (4-Methylbenzaldehyde)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3CH2CH2CHO)",
                "Nonanal (C8H17CHO)",
                "Decanal (C9H19CHO)",
                "Cinnamaldehyde (3-Phenyl-2-propenal)",
                "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
                "Retinal (Vitamin A aldehyde)",
                "Hydraldehyde (also known as 3,4-dihydroxybenzaldehyde)",
                "Glyceraldehyde (CH2OHCHOHCHO)",
                "Malondialdehyde (CH2(CHO)2)",
                "Furfuraldehyde (C5H4O2)",
                "Mercaptaldehyde (HSCH2CHO)"
            ],
            "true_referents": [
                "(4-Methylphenyl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(S)-methylmalonaldehydic acid",
                "(methylsulfanyl)acetaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "2-Nonylheneicosanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-furaldehyde",
                "4,6-heptadienal",
                "5-Methoxymethyl furfural",
                "5-methyl-2-furaldehyde",
                "9-cis-retinal",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "heptanal",
                "hexanal",
                "hydroxyaldehyde",
                "hydroxybenzaldehyde",
                "isobutyraldehyde",
                "m-tolualdehyde",
                "malonaldehyde",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "p-tolualdehyde",
                "phenylacetaldehyde",
                "pyridoxal",
                "pyridoxal 5'-phosphate",
                "retinals",
                "salicylaldehyde",
                "sulfoacetaldehyde",
                "valeraldehyde",
                "vanillin acetate",
                "veratraldehyde"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propanal",
            "Butyraldehyde",
            "Benzaldehyde",
            "Glyceraldehyde",
            "Acrolein",
            "Crotonaldehyde",
            "Glutaraldehyde",
            "Cinnamaldehyde",
            "Vanillin",
            "Retinal",
            "Malondialdehyde",
            "Glyoxal",
            "Salicylaldehyde",
            "Pyridoxal",
            "Furfural",
            "Citral",
            "Hydroxymethylfurfural (HMF)",
            "4-Hydroxynonenal (4-HNE)",
            "3-Deoxyglucosone",
            "Methylglyoxal",
            "Propionaldehyde",
            "Isobutyraldehyde"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde",
                "Acetaldehyde",
                "Propanal",
                "Benzaldehyde",
                "Acrolein",
                "Crotonaldehyde",
                "Glutaraldehyde",
                "Vanillin",
                "Retinal",
                "Glyoxal",
                "Salicylaldehyde",
                "Pyridoxal",
                "Furfural",
                "Methylglyoxal",
                "Isobutyraldehyde"
            ],
            "mismatches": [
                "Butyraldehyde",
                "Glyceraldehyde",
                "Cinnamaldehyde",
                "Malondialdehyde",
                "Citral",
                "Hydroxymethylfurfural (HMF)",
                "4-Hydroxynonenal (4-HNE)",
                "3-Deoxyglucosone",
                "Propionaldehyde"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "(R)-alpha-campholenaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "3,6-anhydro-D-glucose",
                "3-methylsalicylaldehyde",
                "4-(Diethylamino)salicylaldehyde",
                "4-hydroxyhexenal",
                "4-hydroxyindole-3-carbaldehyde",
                "4-hydroxynonenal",
                "5-Methoxymethyl furfural",
                "5-Sulfoxymethylfurfural",
                "5-hydroxymethylfurfural",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "acrolein",
                "aldehydic acid",
                "benzaldehyde",
                "benzaldehydes",
                "cinnamaldehydes",
                "citronellal",
                "crotonaldehyde",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glyoxal",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "phenylacetaldehyde",
                "propanal",
                "propanals",
                "pyridoxal",
                "pyridoxal 5'-phosphate",
                "pyrrole-2-carboxaldehyde",
                "retinal",
                "retinals",
                "salicylaldehyde",
                "tolualdehyde",
                "vanillin"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "formaldehyde (methanal)",
            "acetaldehyde (ethanal)",
            "propanal",
            "butanal",
            "pentanal",
            "hexanal",
            "heptanal",
            "octanal",
            "nonanal",
            "decanal",
            "undecanal",
            "dodecanal",
            "acrolein (propenal)",
            "crotonaldehyde",
            "benzaldehyde",
            "salicylaldehyde",
            "cinnamaldehyde",
            "glyceraldehyde",
            "retinal",
            "vanillin",
            "citral",
            "glutaraldehyde",
            "glyoxal",
            "malondialdehyde (MDA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "formaldehyde (methanal)",
                "acetaldehyde (ethanal)",
                "propanal",
                "butanal",
                "pentanal",
                "hexanal",
                "heptanal",
                "octanal",
                "nonanal",
                "decanal",
                "undecanal",
                "dodecanal",
                "acrolein (propenal)",
                "crotonaldehyde",
                "benzaldehyde",
                "salicylaldehyde",
                "cinnamaldehyde",
                "retinal",
                "vanillin",
                "glutaraldehyde",
                "glyoxal"
            ],
            "mismatches": [
                "glyceraldehyde",
                "citral",
                "malondialdehyde (MDA)"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "(R)-alpha-campholenaldehyde",
                "(S)-methylmalonaldehydic acid",
                "2-Methyl-3-phenyl-2-propenal",
                "3-(4-Methylphenyl)-2-propenal",
                "3-methylsalicylaldehyde",
                "4-(Diethylamino)salicylaldehyde",
                "4-hydroxyindole-3-carbaldehyde",
                "4-oxopentanal",
                "5-aminopentanal",
                "D-glyceraldehyde 3-phosphate",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "acrolein",
                "benzaldehyde",
                "benzaldehydes",
                "butanal",
                "butanals",
                "cinnamaldehydes",
                "citronellal",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "ethyl vanillin",
                "formaldehyde",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glyoxal",
                "heptanal",
                "hexanal",
                "lactaldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "nonanal",
                "octanal",
                "pentanal",
                "propanal",
                "propanals",
                "retinal",
                "retinals",
                "salicylaldehyde",
                "tolualdehyde",
                "undecanal",
                "vanillin"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3CH2CH2CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Glyceraldehyde (C3H6O3)",
            "Furfural (C5H4O2)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Vanillin (C8H8O3)",
            "Retinal (C20H28O)",
            "Paraldehyde [(CH3CHO)3]",
            "Citral (C10H16O)",
            "Acrolein (CH2=CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Glutaraldehyde (OHC(CH2)3CHO)",
            "Isovaleraldehyde [(CH3)2CHCH2CHO]",
            "Salicylaldehyde (C6H4(OH)CHO)",
            "Anisaldehyde (C6H4(OCH3)CHO)",
            "Pyridine-3-carboxaldehyde (C6H5NO)",
            "Glyoxal (OHC-CHO)",
            "Succinaldehyde (OHC(CH2)2CHO)",
            "Malonaldehyde (OHC-CH2-CHO)",
            "Glycolaldehyde (HOCH2CHO)",
            "Phenylacetaldehyde (C6H5CH2CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Furfural (C5H4O2)",
                "Vanillin (C8H8O3)",
                "Acrolein (CH2=CHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Glutaraldehyde (OHC(CH2)3CHO)",
                "Salicylaldehyde (C6H4(OH)CHO)",
                "Glyoxal (OHC-CHO)",
                "Glycolaldehyde (HOCH2CHO)",
                "Phenylacetaldehyde (C6H5CH2CHO)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3CH2CH2CHO)",
                "Glyceraldehyde (C3H6O3)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Retinal (C20H28O)",
                "Paraldehyde [(CH3CHO)3]",
                "Citral (C10H16O)",
                "Isovaleraldehyde [(CH3)2CHCH2CHO]",
                "Anisaldehyde (C6H4(OCH3)CHO)",
                "Pyridine-3-carboxaldehyde (C6H5NO)",
                "Succinaldehyde (OHC(CH2)2CHO)",
                "Malonaldehyde (OHC-CH2-CHO)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(3E)-2,3,4-trimethylhex-3-enal",
                "(3R)-all-trans-3-hydroxyretinal",
                "(3S)-all-trans-3-hydroxyretinal",
                "(4-Methylphenyl)acetaldehyde",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2-hydroxyphenylacetaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-anhydro-D-galactose",
                "3,6-anhydro-D-glucose",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-(2,2,3-trimethylcyclopent-3-enyl)cyclohex-3-enecarbaldehyde",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "4-(2,2,3-trimethylcyclopent-3-enyl)cyclohex-3-enecarbaldehyde",
                "4-(Diethylamino)salicylaldehyde",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-formyl-2-methoxyphenyl L-menthyl glutarate",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glyceraldehyde 3-phosphate",
                "Succinic aldehyde",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "alpha-hydroxymethyl succinate semialdehyde",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutaraldehyde",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "o-tolualdehyde",
                "phenylacetaldehyde",
                "pyridine-3-carbaldehyde",
                "pyridine-3-carbaldehyde oxime",
                "pyridinecarbaldehyde",
                "salicylaldehyde",
                "succinic semialdehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3CH2CH2CHO)",
            "Valeraldehyde (CH3CH2CH2CH2CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Glycolaldehyde (HOCH2CHO)",
            "Glyoxal (OCHCHO)",
            "Succinaldehyde (OHC(CH2)2CHO)",
            "Glutaraldehyde (OHC(CH2)3CHO)",
            "Adipaldehyde (OHC(CH2)4CHO)",
            "Piperonal (C6H5CH2O2)",
            "Vanillin (C6H3(OH)(OCH3)CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Citronellal (C10H18O)",
            "Retinal (C20H28O)",
            "Malonaldehyde (CH2(CHO)2)",
            "Phenylacetaldehyde (C6H5CH2CHO)",
            "Isovaleraldehyde ((CH3)2CHCH2CHO)",
            "Salicylaldehyde (C6H4(OH)CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Valeraldehyde (CH3CH2CH2CH2CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Glycolaldehyde (HOCH2CHO)",
                "Glyoxal (OCHCHO)",
                "Glutaraldehyde (OHC(CH2)3CHO)",
                "Adipaldehyde (OHC(CH2)4CHO)",
                "Piperonal (C6H5CH2O2)",
                "Malonaldehyde (CH2(CHO)2)",
                "Phenylacetaldehyde (C6H5CH2CHO)",
                "Salicylaldehyde (C6H4(OH)CHO)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3CH2CH2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Succinaldehyde (OHC(CH2)2CHO)",
                "Vanillin (C6H3(OH)(OCH3)CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Citronellal (C10H18O)",
                "Retinal (C20H28O)",
                "Isovaleraldehyde ((CH3)2CHCH2CHO)"
            ],
            "true_referents": [
                "(3R)-all-trans-3-hydroxyretinal",
                "(3S)-all-trans-3-hydroxyretinal",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-methylpropanal oxime",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "(R)-(+)-citronellal",
                "(S)-(-)-citronellal",
                "(S)-methylmalonaldehydic acid",
                "1-piperazinecarboxaldehyde",
                "2-hydroxyphenylacetaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-Methylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-formyl-2-methoxyphenyl L-menthyl glutarate",
                "Adipaldehyde",
                "Decanoyl acetaldehyde",
                "Succinic aldehyde",
                "acetaldehyde",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "alpha-hydroxymethyl succinate semialdehyde",
                "chloroacetaldehyde",
                "citronellal",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "glutaraldehyde",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "octanal",
                "phenylacetaldehyde",
                "piperonal",
                "salicylaldehyde",
                "succinic semialdehyde",
                "valeraldehyde",
                "vanillin acetate",
                "veratraldehyde"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Vanillin (Vanillin)",
            "Cinnamaldehyde (Cinnamaldehyde)",
            "Acetaldehyde (Acetaldehyde)",
            "Benzaldehyde (Benzaldehyde)",
            "Furfuraldehyde (Furfuraldehyde)",
            "Glyoxal (Glyoxal)",
            "Methylglyoxal (Methylglyoxal)",
            "Ketamine (Ketamine)",
            "Pentanal (Pentanal)",
            "Hexanal (Hexanal)",
            "Heptanal (Heptanal)",
            "Octanal (Octanal)",
            "Nonanal (Nonanal)",
            "Decanal (Decanal)",
            "Undecanal (Undecanal)",
            "Dodecanal (Dodecanal)",
            "Tridecanal (Tridecanal)",
            "Tetradecanal (Tetradecanal)",
            "Pentadecanal (Pentadecanal)",
            "Hexadecanal (Hexadecanal)",
            "Heptadecanal (Heptadecanal)",
            "Octadecanal (Octadecanal)",
            "Nonadecanal (Nonadecanal)",
            "Eicosanal (Eicosanal)",
            "Docosanal (Docosanal)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Vanillin",
                "Cinnamaldehyde",
                "Acetaldehyde",
                "Benzaldehyde",
                "Glyoxal",
                "Methylglyoxal",
                "Pentanal",
                "Hexanal",
                "Heptanal",
                "Octanal",
                "Nonanal",
                "Decanal",
                "Undecanal",
                "Dodecanal",
                "Tridecanal",
                "Tetradecanal",
                "Pentadecanal",
                "Hexadecanal",
                "Heptadecanal",
                "Octadecanal",
                "Nonadecanal",
                "Eicosanal"
            ],
            "mismatches": [
                "Furfuraldehyde",
                "Ketamine",
                "Docosanal"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "11Z-Eicosenal",
                "2-methylpentadecanal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3-furaldehyde",
                "4-oxopentanal",
                "5-aminopentanal",
                "5-methyl-2-furaldehyde",
                "Eicosanal",
                "L-tyrosine",
                "Nonadecanal",
                "acetaldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "cinnamaldehydes",
                "crotonaldehyde",
                "decanal",
                "dodecanal",
                "ethyl vanillin",
                "furfural",
                "glyoxal",
                "heptadecanal",
                "heptanal",
                "hexadecanal",
                "hexanal",
                "lactaldehyde",
                "methylglyoxal",
                "nonanal",
                "octadecanal",
                "octanal",
                "p-aminobenzaldehyde",
                "pentadecanal",
                "pentanal",
                "tetradecanal",
                "tolualdehyde",
                "tridecanal",
                "undecanal",
                "vanillin"
            ],
            "TP": 22,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (FA)",
            "Acetaldehyde (AcH)",
            "Benzaldehyde (BzH)",
            "Glutaraldehyde (GTA)",
            "Retinaldehyde (RAL)",
            "Vanillin (VLN)",
            "Cinnamaldehyde (CIN)",
            "Salicylaldehyde (SAL)",
            "Furfural (FUR)",
            "Propionaldehyde (PPA)",
            "Butyraldehyde (BTA)",
            "Hexanal (HXA)",
            "Heptanal (HPT)",
            "Octanal (OCT)",
            "Nonanal (NON)",
            "Decanal (DEC)",
            "Undecanal (UND)",
            "Dodecanal (DOD)",
            "Tolualdehyde (TLA)",
            "Phenylacetaldehyde (PAA)",
            "4-Hydroxy-3-methoxybenzaldehyde (HMB)",
            "3,4-Dimethoxybenzaldehyde (DMB)",
            "Piperonal (PIP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (FA)",
                "Acetaldehyde (AcH)",
                "Benzaldehyde (BzH)",
                "Glutaraldehyde (GTA)",
                "Vanillin (VLN)",
                "Cinnamaldehyde (CIN)",
                "Salicylaldehyde (SAL)",
                "Furfural (FUR)",
                "Hexanal (HXA)",
                "Heptanal (HPT)",
                "Octanal (OCT)",
                "Nonanal (NON)",
                "Decanal (DEC)",
                "Undecanal (UND)",
                "Dodecanal (DOD)",
                "Tolualdehyde (TLA)",
                "Phenylacetaldehyde (PAA)",
                "Piperonal (PIP)"
            ],
            "mismatches": [
                "Retinaldehyde (RAL)",
                "Propionaldehyde (PPA)",
                "Butyraldehyde (BTA)",
                "4-Hydroxy-3-methoxybenzaldehyde (HMB)",
                "3,4-Dimethoxybenzaldehyde (DMB)"
            ],
            "true_referents": [
                "(+)-pisiferal",
                "(3R)-all-trans-3-hydroxyretinal",
                "(E)-cinnamaldehyde",
                "2,4-dihydroxy-5,6-dimethylbenzaldehyde",
                "2,4-dihydroxy-6-methylbenzaldehyde",
                "2,5-diformylfuran",
                "2-Hydroxy-4-methylbenzaldehyde",
                "3,5-dimethylbenzaldehyde",
                "3-methoxybenzaldehyde",
                "3-methylsalicylaldehyde",
                "4-Hydroxy-3-methylbenzaldehyde",
                "4-piperidin-4-ylbutanal",
                "acetaldehyde",
                "acetaldehyde (Z)-hydrazone",
                "all-trans-13,14-dihydroretinal",
                "benzaldehyde",
                "benzaldehydes",
                "cinnamaldehydes",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "gamma-glutamyl-gamma-aminobutyraldehyde",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "ketoaldehyde",
                "lactaldehyde",
                "m-tolualdehyde",
                "nonanal",
                "o-tolualdehyde",
                "octanal",
                "p-tolualdehyde",
                "phenylacetaldehyde",
                "phenylacetaldehydes",
                "piperonal",
                "salicylaldehyde",
                "tolualdehyde",
                "undecanal",
                "vanillin"
            ],
            "TP": 18,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde",
            "Acrolein",
            "Butanal",
            "Butanone",
            "Crotonaldehyde",
            "Formaldehyde",
            "Glycolaldehyde",
            "Glyceraldehyde",
            "Itaconic aldehyde",
            "Isobutyraldehyde",
            "Isovaleraldehyde",
            "Methacrolein",
            "Methanal",
            "Methyldialdehyde",
            "Oxirane",
            "Pyruvaldehyde",
            "Tetrahydrofuran",
            "Valeraldehyde",
            "Vinylaldehyde",
            "Vinylformaldehyde"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaldehyde",
                "Acrolein",
                "Butanal",
                "Crotonaldehyde",
                "Formaldehyde",
                "Glycolaldehyde",
                "Isobutyraldehyde",
                "Methacrolein",
                "Valeraldehyde"
            ],
            "mismatches": [
                "Butanone",
                "Glyceraldehyde",
                "Itaconic aldehyde",
                "Isovaleraldehyde",
                "Methanal",
                "Methyldialdehyde",
                "Oxirane",
                "Pyruvaldehyde",
                "Tetrahydrofuran",
                "Vinylaldehyde",
                "Vinylformaldehyde"
            ],
            "true_referents": [
                "(R)-alpha-campholenaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,5-diformylfuran",
                "2-(dihydroxymethyl)-5-formylfuran",
                "2-oxo aldehyde",
                "3,5-Dimethylhexanal",
                "3-(3-Ethyloxiranyl)-2-propenal",
                "4-hydroxyindole-3-carbaldehyde",
                "6'-oxoparomamine",
                "6-methoxy-2,6-dimethyloctanal",
                "D-glyceraldehyde 3-phosphate",
                "L-glyceraldehyde 3-phosphate",
                "Methacrolein",
                "Muconic dialdehyde",
                "Succinic aldehyde",
                "acetaldehyde",
                "acrolein",
                "benzaldehyde",
                "butanal",
                "butanals",
                "crotonaldehyde",
                "dialdehyde",
                "formaldehyde",
                "furfural",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glycolaldehydes",
                "hydroxypyruvaldehyde",
                "hydroxypyruvaldehyde phosphate",
                "imidazolylacetaldehyde",
                "isobutyraldehyde",
                "ketoaldehyde",
                "lactaldehyde",
                "malonaldehyde",
                "methanimine",
                "octanal",
                "pyrrole-2-carboxaldehyde",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 9,
            "FP": 11,
            "FN": 0
        }
    }
]