[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "formic acid (HCOOH)",
            "acetic acid (CH3COOH)",
            "propanoic acid (C2H5COOH)",
            "butyric acid (C3H7COOH)",
            "valeric acid (C4H9COOH)",
            "caproic acid (C5H11COOH)",
            "enanthic acid (C6H13COOH)",
            "caprylic acid (C7H15COOH)",
            "pelargonic acid (C8H17COOH)",
            "capric acid (C9H19COOH)",
            "undecylenic acid (C10H21COOH)",
            "lauric acid (C11H23COOH)",
            "tridecylenic acid (C12H25COOH)",
            "myristic acid (C13H27COOH)",
            "palmic acid (C14H29COOH)",
            "palmitoleic acid (C16H31COOH)",
            "stearic acid (C17H35COOH)",
            "elaidic acid (C18H35COOH)",
            "oleic acid (C18H33COOH)",
            "linoleic acid (C18H30COOH)",
            "linolenic acid (C18H29COOH)",
            "arachidic acid (C20H41COOH)",
            "gadoleic acid (C20H39COOH)",
            "erucic acid (C22H43COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "formic acid (HCOOH)",
                "acetic acid (CH3COOH)",
                "propanoic acid (C2H5COOH)",
                "butyric acid (C3H7COOH)",
                "valeric acid (C4H9COOH)",
                "caproic acid (C5H11COOH)",
                "enanthic acid (C6H13COOH)",
                "caprylic acid (C7H15COOH)",
                "pelargonic acid (C8H17COOH)",
                "capric acid (C9H19COOH)",
                "undecylenic acid (C10H21COOH)",
                "lauric acid (C11H23COOH)",
                "tridecylenic acid (C12H25COOH)",
                "myristic acid (C13H27COOH)",
                "palmic acid (C14H29COOH)",
                "palmitoleic acid (C16H31COOH)",
                "stearic acid (C17H35COOH)",
                "elaidic acid (C18H35COOH)",
                "oleic acid (C18H33COOH)",
                "linoleic acid (C18H30COOH)",
                "linolenic acid (C18H29COOH)",
                "arachidic acid (C20H41COOH)",
                "gadoleic acid (C20H39COOH)",
                "erucic acid (C22H43COOH)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(2R)-2-methyl-3-oxopropanoic acid",
                "(E)-2-tridecylnonadec-2-enal",
                "(E)-hept-2-enoic acid",
                "(R)-4-dehydropantoic acid",
                "12-oxo-cis-dodec-9-enoic acid",
                "2,6,10-trimethyldodeca-2,4,6,8,10-pentaenedial",
                "2-formylbenzoic acid",
                "2-methyl-3-oxopropanoic acid",
                "3,6-anhydro-D-glucose",
                "3-oxopropanoic acid",
                "4-formylbenzoic acid",
                "4-oxo-4-(pyridin-3-yl)butanoic acid",
                "6-oxohexanoic acid",
                "N-(fatty acyl)-L-tyrosine(1-)",
                "aldehydic acid",
                "all-trans-1,6-seco-1,2-didehydroretinal",
                "isobutyraldehyde",
                "trans,trans-2,4-dihydroxy-5-methyl-6-oxo-2,4-hexadienoic acid",
                "trans-2-dodecenoic acid",
                "valeraldehyde"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Propionic acid (C2H5COOH)",
            "Butyric acid (C3H7COOH)",
            "Valeric acid (C4H9COOH)",
            "Caproic acid (C5H11COOH)",
            "Enanthic acid (C6H13COOH)",
            "Heptanoic acid (C6H13COOH)",
            "Caprylic acid (C7H15COOH)",
            "Pelargonic acid (C8H17COOH)",
            "Nonanoic acid (C8H17COOH)",
            "Capric acid (C9H19COOH)",
            "Decanoic acid (C9H19COOH)",
            "Undecylic acid (C10H21COOH)",
            "Undecanoic acid (C10H21COOH)",
            "Lauric acid (C11H23COOH)",
            "Dodecanoic acid (C11H23COOH)",
            "Tridecylic acid (C12H25COOH)",
            "Tridecanoic acid (C12H25COOH)",
            "Myristic acid (C13H27COOH)",
            "Tetradecanoic acid (C13H27COOH)",
            "Pentadecylic acid (C14H29COOH)",
            "Pentadecanoic acid (C14H29COOH)",
            "Palmitic acid (C15H31COOH)",
            "Hexadecanoic acid (C15H31COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Acetic acid (CH3COOH)",
                "Propionic acid (C2H5COOH)",
                "Butyric acid (C3H7COOH)",
                "Valeric acid (C4H9COOH)",
                "Caproic acid (C5H11COOH)",
                "Enanthic acid (C6H13COOH)",
                "Heptanoic acid (C6H13COOH)",
                "Caprylic acid (C7H15COOH)",
                "Pelargonic acid (C8H17COOH)",
                "Nonanoic acid (C8H17COOH)",
                "Capric acid (C9H19COOH)",
                "Decanoic acid (C9H19COOH)",
                "Undecylic acid (C10H21COOH)",
                "Undecanoic acid (C10H21COOH)",
                "Lauric acid (C11H23COOH)",
                "Dodecanoic acid (C11H23COOH)",
                "Tridecylic acid (C12H25COOH)",
                "Tridecanoic acid (C12H25COOH)",
                "Myristic acid (C13H27COOH)",
                "Tetradecanoic acid (C13H27COOH)",
                "Pentadecylic acid (C14H29COOH)",
                "Pentadecanoic acid (C14H29COOH)",
                "Palmitic acid (C15H31COOH)",
                "Hexadecanoic acid (C15H31COOH)"
            ],
            "true_referents": [
                "(2E)-11-hydroxy-2-dodecenoic acid",
                "(2E,4E)-deca-2,4-dienal",
                "(2R)-2-methyl-3-oxopropanoic acid",
                "(E)-2-pentadecylheptadec-2-enal",
                "(E)-hept-2-enoic acid",
                "(R)-4-dehydropantoic acid",
                "12-oxo-cis-dodec-9-enoic acid",
                "12-oxododecanoic acid",
                "2,5-dioxopentanoic acid",
                "2-formylbenzoic acid",
                "2-methyl-3-oxopropanoic acid",
                "2-methylpentadecanal",
                "3,6-anhydro-D-glucose",
                "3-oxopropanoic acid",
                "4,6-heptadienal",
                "4-formylbenzoic acid",
                "4-oxo-4-(pyridin-3-yl)butanoic acid",
                "6-oxohexanoic acid",
                "Decanoyl acetaldehyde",
                "N-(fatty acyl)-L-tyrosine(1-)",
                "aldehydic acid",
                "aliphatic aldehyde",
                "isobutyraldehyde",
                "pentadecanal",
                "trans,trans-2,4-dihydroxy-5-methyl-6-oxo-2,4-hexadienoic acid",
                "trans-2-dodecenoic acid",
                "valeraldehyde"
            ],
            "TP": 0,
            "FP": 25,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (Ethanal)",
            "Propionaldehyde (Propanal)",
            "Butyraldehyde (Butanal)",
            "Isobutyraldehyde (2-Methylpropanal)",
            "Valeraldehyde (Pentanal)",
            "Isovaleraldehyde (3-Methylbutanal)",
            "Hexanal",
            "Heptanal",
            "Octanal",
            "Nonanal",
            "Decanal",
            "Undecanal",
            "Dodecanal",
            "Tridecanal",
            "Tetradecanal",
            "Pentadecanal",
            "Hexadecanal",
            "Heptadecanal",
            "Octadecanal",
            "Nonadecanal",
            "Eicosanal",
            "Docosanal",
            "Tricosanal",
            "Tetracosanal"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaldehyde (Ethanal)",
                "Propionaldehyde (Propanal)",
                "Butyraldehyde (Butanal)",
                "Isobutyraldehyde (2-Methylpropanal)",
                "Valeraldehyde (Pentanal)",
                "Isovaleraldehyde (3-Methylbutanal)",
                "Hexanal",
                "Heptanal",
                "Octanal",
                "Nonanal",
                "Decanal",
                "Undecanal",
                "Dodecanal",
                "Tridecanal",
                "Tetradecanal",
                "Pentadecanal",
                "Hexadecanal",
                "Heptadecanal",
                "Octadecanal",
                "Nonadecanal",
                "Eicosanal",
                "Tetracosanal"
            ],
            "mismatches": [
                "Docosanal",
                "Tricosanal"
            ],
            "true_referents": [
                "(S)-2-methylbutanal",
                "11Z-Eicosenal",
                "2-hydroxy-2-methylpropanal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3-methylbutanal",
                "Eicosanal",
                "Nonadecanal",
                "acetaldehyde",
                "butanal",
                "decanal",
                "dodecanal",
                "heptadecanal",
                "heptanal",
                "hexacosanal",
                "hexadecanal",
                "hexanal",
                "isobutyraldehyde",
                "lactaldehyde",
                "nonanal",
                "octadecanal",
                "octanal",
                "pentadecanal",
                "pentanal",
                "propanal",
                "propanals",
                "tetracosanal",
                "tetradecanal",
                "tridecanal",
                "undecanal",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propanal (CH3CH2CHO)",
            "Butyraldehyde (CH3CH2CH2CHO)",
            "Isobutyraldehyde ((CH3)2CHCHO)",
            "Pentanal (CH3(CH2)3CHO)",
            "2-Methylbutanal (CH3CH(CH3)CH2CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Phenylacetaldehyde (C6H5CH2CHO)",
            "Acrylaldehyde (CH2=CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
            "Citral (3,7-Dimethyl-2,6-octadienal)",
            "Glyoxal (OHC-CHO)",
            "Methylglyoxal (CH3COCHO)",
            "Glycolaldehyde (HOCH2CHO)",
            "Formylpiperidine (C6H11NO)",
            "2-Formylfuran (C5H4O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Propanal (CH3CH2CHO)",
                "Isobutyraldehyde ((CH3)2CHCHO)",
                "Pentanal (CH3(CH2)3CHO)",
                "2-Methylbutanal (CH3CH(CH3)CH2CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Phenylacetaldehyde (C6H5CH2CHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Glyoxal (OHC-CHO)",
                "Methylglyoxal (CH3COCHO)",
                "Glycolaldehyde (HOCH2CHO)"
            ],
            "mismatches": [
                "Butyraldehyde (CH3CH2CH2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Acrylaldehyde (CH2=CHCHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Vanillin (4-Hydroxy-3-methoxybenzaldehyde)",
                "Citral (3,7-Dimethyl-2,6-octadienal)",
                "Formylpiperidine (C6H11NO)",
                "2-Formylfuran (C5H4O2)"
            ],
            "true_referents": [
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "(S)-2-methylbutanal",
                "2,3,4,5-tetrahydroxypentanal",
                "2,5-diformylfuran",
                "2-(dihydroxymethyl)-5-formylfuran",
                "2-formylphenylformamide",
                "2-formylpyridine",
                "2-hydroxyphenylacetaldehyde",
                "2-methylbutanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methylhexanal",
                "3-[(4-aminobutyl)amino]propanal",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "4,6-heptadienal",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-methylpentanal",
                "Decanoyl acetaldehyde",
                "acetaldehyde",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "formylmethyl",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "isobutyraldehyde",
                "methyl phenylglyoxalate",
                "methylglyoxal",
                "o-tolualdehyde",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "propanal",
                "vanillin acetate"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (C2H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Isobutyraldehyde ((CH3)2CHCHO)",
            "Valeraldehyde (C4H9CHO)",
            "Isovaleraldehyde ((CH3)2CHCH2CHO)",
            "Pivaldehyde ((CH3)3CCHO)",
            "Caproaldehyde (C5H11CHO)",
            "Heptanal (C6H13CHO)",
            "Octanal (C7H15CHO)",
            "Nonanal (C8H17CHO)",
            "Decanal (C9H19CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Phenylacetaldehyde (C6H5CH2CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Salicylaldehyde (C6H4(OH)CHO)",
            "Vanillin (C8H8O3)",
            "Furfural (C4H3OCHO)",
            "Glyoxal (OCHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Methacrolein (CH2=C(CH3)CHO)",
            "Acrolein (CH2=CHCHO)",
            "2-Methylbutyraldehyde (C4H9CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Isobutyraldehyde ((CH3)2CHCHO)",
                "Valeraldehyde (C4H9CHO)",
                "Heptanal (C6H13CHO)",
                "Octanal (C7H15CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Phenylacetaldehyde (C6H5CH2CHO)",
                "Salicylaldehyde (C6H4(OH)CHO)",
                "Vanillin (C8H8O3)",
                "Furfural (C4H3OCHO)",
                "Glyoxal (OCHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Methacrolein (CH2=C(CH3)CHO)",
                "Acrolein (CH2=CHCHO)"
            ],
            "mismatches": [
                "Propionaldehyde (C2H5CHO)",
                "Butyraldehyde (C3H7CHO)",
                "Isovaleraldehyde ((CH3)2CHCH2CHO)",
                "Pivaldehyde ((CH3)3CCHO)",
                "Caproaldehyde (C5H11CHO)",
                "Nonanal (C8H17CHO)",
                "Decanal (C9H19CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "2-Methylbutyraldehyde (C4H9CHO)"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(S)-2-methylbutanal",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2-Nonylheneicosanal",
                "2-hydroxy caproaldehyde",
                "2-hydroxyphenylacetaldehyde",
                "2-methyl-branched fatty aldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5-Dimethylhexanal",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-furaldehyde",
                "4,6-heptadienal",
                "4-(Diethylamino)salicylaldehyde",
                "4-hydroxy caproaldehyde",
                "5-Methoxymethyl furfural",
                "5-hydroxy caproaldehyde",
                "Decanoyl acetaldehyde",
                "Methacrolein",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glyoxal",
                "heptanal",
                "isobutyraldehyde",
                "methylglyoxal",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "phenylacetaldehyde",
                "salicylaldehyde",
                "valeraldehyde",
                "vanillin",
                "veratraldehyde"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (CH3CHO)",
            "Formaldehyde (HCHO)",
            "Propionaldehyde (C2H5CHO)",
            "Butyraldehyde (C3H7CHO)",
            "Valeraldehyde (C4H9CHO)",
            "Caproaldehyde (C5H11CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Cinnamaldehyde (C9H8O)",
            "Isovaleraldehyde (C5H10CHO)",
            "Crotonaldehyde (C4H8CHO)",
            "Furfural (C5H4O2)",
            "Salicylaldehyde (C7H6O2)",
            "Acrolein (C3H4O)",
            "Octanal (C8H16CHO)",
            "Nonanal (C9H18CHO)",
            "Decanal (C10H20CHO)",
            "2-Phenylacetaldehyde (C8H10O)",
            "2-Methylbutanal (C5H12O)",
            "3-Methylbutanal (C5H12O)",
            "2-Butenal (C4H6O)",
            "3-Butenal (C4H6O)",
            "Trans-2-Hexenal (C6H10O)",
            "Cis-3-Hexenal (C6H10O)",
            "2-Furaldehyde (C5H4O)",
            "2,3-Butanedione (C4H6O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "formaldehyde",
                "valeraldehyde",
                "benzaldehyde",
                "crotonaldehyde",
                "furfural",
                "salicylaldehyde",
                "acrolein",
                "octanal",
                "2-methylbutanal",
                "3-methylbutanal",
                "cis-3-hexenal"
            ],
            "mismatches": [
                "propionaldehyde",
                "butyraldehyde",
                "caproaldehyde",
                "cinnamaldehyde",
                "isovaleraldehyde",
                "nonanal",
                "decanal",
                "2-phenylacetaldehyde",
                "2-butenal",
                "3-butenal",
                "trans-2-hexenal",
                "2-furaldehyde",
                "2,3-butanedione"
            ],
            "true_referents": [
                "(2E)-2-ethyl-2-butenal",
                "(E)-cinnamaldehyde",
                "(R)-alpha-campholenaldehyde",
                "(S)-2-methylbutanal",
                "(Z)-4-Hexenal",
                "1-Ethyl-1H-pyrrole-2-carboxaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,5-diformylfuran",
                "2-Nonylheneicosanal",
                "2-ethyl-2-hexenal",
                "2-hexenal",
                "2-hydroxy caproaldehyde",
                "2-hydroxyphenylacetaldehyde",
                "2-methylbutanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-[4-Hydroxy-3-(3-methyl-2-butenyl)phenyl]-2-propenal",
                "3-furaldehyde",
                "3-hexenal",
                "3-methylbutanal",
                "4-(Diethylamino)salicylaldehyde",
                "4-hydroxy caproaldehyde",
                "4-hydroxyindole-3-carbaldehyde",
                "5-Methoxymethyl furfural",
                "5-hydroxy caproaldehyde",
                "5-hydroxymethylfurfural",
                "5-methyl-2-furaldehyde",
                "Decanoyl acetaldehyde",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "cis-3-hexenal",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "formaldehyde",
                "furfural",
                "isobutyraldehyde",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "phenylacetaldehyde",
                "salicylaldehyde",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 12,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (CH3CHO)",
            "Formaldehyde (HCHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Undecanal (CH3(CH2)9CHO)",
            "Dodecanal (CH3(CH2)10CHO)",
            "Tridecanal (CH3(CH2)11CHO)",
            "Tetradecanal (CH3(CH2)12CHO)",
            "Pentadecanal (CH3(CH2)13CHO)",
            "Hexadecanal (CH3(CH2)14CHO)",
            "Heptadecanal (CH3(CH2)15CHO)",
            "Octadecanal (CH3(CH2)16CHO)",
            "Nonadecanal (CH3(CH2)17CHO)",
            "Eicosanal (CH3(CH2)18CHO)",
            "Heneicosanal (CH3(CH2)19CHO)",
            "Docosanal (CH3(CH2)20CHO)",
            "Tricosanal (CH3(CH2)21CHO)",
            "Tetracosanal (CH3(CH2)22CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaldehyde (CH3CHO)",
                "Formaldehyde (HCHO)",
                "Valeraldehyde (CH3(CH2)3CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Undecanal (CH3(CH2)9CHO)",
                "Dodecanal (CH3(CH2)10CHO)",
                "Tetradecanal (CH3(CH2)12CHO)",
                "Pentadecanal (CH3(CH2)13CHO)",
                "Hexadecanal (CH3(CH2)14CHO)",
                "Heptadecanal (CH3(CH2)15CHO)",
                "Eicosanal (CH3(CH2)18CHO)",
                "Tetracosanal (CH3(CH2)22CHO)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Tridecanal (CH3(CH2)11CHO)",
                "Octadecanal (CH3(CH2)16CHO)",
                "Nonadecanal (CH3(CH2)17CHO)",
                "Heneicosanal (CH3(CH2)19CHO)",
                "Docosanal (CH3(CH2)20CHO)",
                "Tricosanal (CH3(CH2)21CHO)"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(E)-non-2-enal",
                "(R)-2-hydroxyhexadecanal",
                "(S)-2-methylbutanal",
                "(S)-3-acetyloctanal",
                "16-heptadecenal",
                "2,4,6,8,10-dodecapentaenal",
                "2-Nonylheneicosanal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-oxo-Tetradecanal",
                "4,6-heptadienal",
                "5-Oxohexanal",
                "Decanoyl acetaldehyde",
                "Eicosanal",
                "acetaldehyde",
                "aliphatic aldehyde",
                "all-trans-1,6-seco-1,2-didehydroretinal",
                "alpha-CH2-containing aldehyde",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "formaldehyde",
                "heptadecanal",
                "hexacosanal",
                "hexadecanal",
                "nona-2,4,6-trienal",
                "octadecanal",
                "octanal",
                "pentadecanal",
                "phenylacetaldehyde",
                "tetracosanal",
                "tetradecanal",
                "trans-3-Chloroallyl aldehyde",
                "tridecanal",
                "undecanal",
                "valeraldehyde"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetone (CH3COCH3)",
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3(CH2)2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Undecanal (CH3(CH2)9CHO)",
            "Dodecanal (CH3(CH2)10CHO)",
            "Tridecanal (CH3(CH2)11CHO)",
            "Tetradecanal (CH3(CH2)12CHO)",
            "Pentadecanal (CH3(CH2)13CHO)",
            "Hexadecanal (CH3(CH2)14CHO)",
            "Heptadecanal (CH3(CH2)15CHO)",
            "Octadecanal (CH3(CH2)16CHO)",
            "Nonadecanal (CH3(CH2)17CHO)",
            "Eicosanal (CH3(CH2)18CHO)",
            "Docosanal (CH3(CH2)20CHO)",
            "Tetracosanal (CH3(CH2)22CHO)",
            "Hexacosanal (CH3(CH2)24CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Valeraldehyde (CH3(CH2)3CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Undecanal (CH3(CH2)9CHO)",
                "Dodecanal (CH3(CH2)10CHO)",
                "Tridecanal (CH3(CH2)11CHO)",
                "Tetradecanal (CH3(CH2)12CHO)",
                "Pentadecanal (CH3(CH2)13CHO)",
                "Hexadecanal (CH3(CH2)14CHO)",
                "Heptadecanal (CH3(CH2)15CHO)",
                "Octadecanal (CH3(CH2)16CHO)",
                "Eicosanal (CH3(CH2)18CHO)",
                "Tetracosanal (CH3(CH2)22CHO)",
                "Hexacosanal (CH3(CH2)24CHO)"
            ],
            "mismatches": [
                "Acetone (CH3COCH3)",
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Nonadecanal (CH3(CH2)17CHO)",
                "Docosanal (CH3(CH2)20CHO)"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(4-Methylphenyl)acetaldehyde",
                "(E)-non-2-enal",
                "(R)-2-hydroxyhexadecanal",
                "(S)-2-methylbutanal",
                "(S)-3-acetyloctanal",
                "16-heptadecenal",
                "2,4,6,8,10-dodecapentaenal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-oxo-Tetradecanal",
                "4,6-heptadienal",
                "5-Oxohexanal",
                "Decanoyl acetaldehyde",
                "Eicosanal",
                "acetaldehyde",
                "aliphatic aldehyde",
                "all-trans-1,6-seco-1,2-didehydroretinal",
                "alpha-CH2-containing aldehyde",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "formaldehyde",
                "heptadecanal",
                "hexacosanal",
                "hexadecanal",
                "nona-2,4,6-trienal",
                "octadecanal",
                "octanal",
                "pentadecanal",
                "phenylacetaldehyde",
                "tetracosanal",
                "tetradecanal",
                "trans-3-Chloroallyl aldehyde",
                "tridecanal",
                "undecanal",
                "valeraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Methanal (Formaldehyde)",
            "Ethanal (Acetaldehyde)",
            "Propanal (Propionaldehyde)",
            "Butanal (Butyraldehyde)",
            "Pentanal (Valeraldehyde)",
            "Hexanal",
            "Heptanal",
            "Octanal",
            "Nonanal",
            "Decanal",
            "Benzaldehyde",
            "Cinnamaldehyde",
            "Crotonaldehyde",
            "Furfural",
            "Isovaleraldehyde",
            "Glycolaldehyde",
            "Malonaldehyde",
            "Pivaldehyde",
            "Salicylaldehyde",
            "Anisaldehyde",
            "Nicotinaldehyde",
            "Vanillin",
            "Hydrocinnamaldehyde",
            "3-Furaldehyde",
            "4-Hydroxybenzaldehyde"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexanal",
                "Heptanal",
                "Octanal",
                "Nonanal",
                "Decanal",
                "Benzaldehyde",
                "Crotonaldehyde",
                "Furfural",
                "Glycolaldehyde",
                "Malonaldehyde",
                "Salicylaldehyde",
                "Vanillin",
                "3-Furaldehyde"
            ],
            "mismatches": [
                "Methanal (Formaldehyde)",
                "Ethanal (Acetaldehyde)",
                "Propanal (Propionaldehyde)",
                "Butanal (Butyraldehyde)",
                "Pentanal (Valeraldehyde)",
                "Cinnamaldehyde",
                "Isovaleraldehyde",
                "Pivaldehyde",
                "Anisaldehyde",
                "Nicotinaldehyde",
                "Hydrocinnamaldehyde",
                "4-Hydroxybenzaldehyde"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "(E)-sinapaldehyde",
                "(R)-alpha-campholenaldehyde",
                "1-naphthaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "2,5-diformylfuran",
                "3,6-anhydro-D-glucose",
                "3-furaldehyde",
                "3-methylsalicylaldehyde",
                "4-(Diethylamino)salicylaldehyde",
                "4-hydroxycinnamaldehyde",
                "4-hydroxyindole-3-carbaldehyde",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "5-methyl-2-furaldehyde",
                "Adipaldehyde",
                "Pimelic dialdehyde",
                "acetaldehyde",
                "acetaldehyde (E)-hydrazone",
                "benzaldehyde",
                "benzaldehydes",
                "butanal",
                "butanals",
                "butynal",
                "cinnamaldehydes",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glycolaldehydes",
                "heptanal",
                "hexanal",
                "hydroxyaldehyde",
                "isobutyraldehyde",
                "malonaldehyde",
                "methanimine",
                "naphthaldehyde",
                "nonanal",
                "octanal",
                "pentanal",
                "propanal",
                "propanals",
                "salicylaldehyde",
                "tolualdehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 13,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Propionaldehyde (CH3CH2CHO)",
            "Butyraldehyde (CH3CH2CH2CHO)",
            "Valeraldehyde (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Acrolein (CH2=CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Glycolaldehyde (HOCH2CHO)",
            "Glyoxal (OHCCHO)",
            "Malondialdehyde (OHC-CH2-CHO)",
            "Glutaraldehyde (OHC-(CH2)3-CHO)",
            "Salicylaldehyde (2-hydroxybenzaldehyde)",
            "Vanillin (4-hydroxy-3-methoxybenzaldehyde)",
            "Citral (3,7-dimethyl-2,6-octadienal)",
            "Retinal (vitamin A aldehyde)",
            "Pyridoxal (vitamin B6 aldehyde)",
            "Furfural (2-furaldehyde)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Valeraldehyde (CH3(CH2)3CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Acrolein (CH2=CHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Glycolaldehyde (HOCH2CHO)",
                "Glyoxal (OHCCHO)",
                "Glutaraldehyde (OHC-(CH2)3-CHO)",
                "Salicylaldehyde (2-hydroxybenzaldehyde)",
                "Furfural (2-furaldehyde)",
                "Pyridoxal (vitamin B6 aldehyde)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CHO)",
                "Butyraldehyde (CH3CH2CH2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Malondialdehyde (OHC-CH2-CHO)",
                "Vanillin (4-hydroxy-3-methoxybenzaldehyde)",
                "Citral (3,7-dimethyl-2,6-octadienal)",
                "Retinal (vitamin A aldehyde)"
            ],
            "true_referents": [
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2-oxo aldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3,6-dihydroxy-2-propylbenzaldehyde",
                "3-Methylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-furaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "4,6-heptadienal",
                "4-acetoxy-3,5-dimethoxy-trans-cinnamaldehyde",
                "4-formyl-2-methoxyphenyl L-menthyl glutarate",
                "5-methyl-2-furaldehyde",
                "9-cis-retinal",
                "Decanoyl acetaldehyde",
                "acetaldehyde",
                "acrolein",
                "aliphatic aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutaraldehyde",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "glyoxal",
                "hydroxybenzaldehyde",
                "isobutyraldehyde",
                "malonaldehyde",
                "methylglyoxal",
                "o-tolualdehyde",
                "octanal",
                "phenylacetaldehyde",
                "pyridoxal",
                "pyridoxal 5'-phosphate",
                "retinals",
                "salicylaldehyde",
                "trans-3-Chloroallyl aldehyde",
                "valeraldehyde",
                "vanillin acetate"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Propanal (CH3CH2CHO)",
            "Butanal (CH3(CH2)2CHO)",
            "Pentanal (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Undecanal (CH3(CH2)9CHO)",
            "Dodecanal (CH3(CH2)10CHO)",
            "Tridecanal (CH3(CH2)11CHO)",
            "Tetradecanal (CH3(CH2)12CHO)",
            "Pentadecanal (CH3(CH2)13CHO)",
            "Hexadecanal (CH3(CH2)14CHO)",
            "Heptadecanal (CH3(CH2)15CHO)",
            "Octadecanal (CH3(CH2)16CHO)",
            "Nonadecanal (CH3(CH2)17CHO)",
            "Eicosanal (CH3(CH2)18CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Cinnamaldehyde (C6H5CH=CHCHO)",
            "Furfural (C4H3OCHO)",
            "Glyceraldehyde (HOCH2CH(OH)CHO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Propanal (CH3CH2CHO)",
                "Pentanal (CH3(CH2)3CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Undecanal (CH3(CH2)9CHO)",
                "Dodecanal (CH3(CH2)10CHO)",
                "Tridecanal (CH3(CH2)11CHO)",
                "Tetradecanal (CH3(CH2)12CHO)",
                "Pentadecanal (CH3(CH2)13CHO)",
                "Hexadecanal (CH3(CH2)14CHO)",
                "Heptadecanal (CH3(CH2)15CHO)",
                "Octadecanal (CH3(CH2)16CHO)",
                "Eicosanal (CH3(CH2)18CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Furfural (C4H3OCHO)"
            ],
            "mismatches": [
                "Butanal (CH3(CH2)2CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Nonadecanal (CH3(CH2)17CHO)",
                "Cinnamaldehyde (C6H5CH=CHCHO)",
                "Glyceraldehyde (HOCH2CH(OH)CHO)"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(E)-cinnamaldehyde",
                "(E)-non-2-enal",
                "(R)-2-hydroxyhexadecanal",
                "(S)-3-acetyloctanal",
                "16-heptadecenal",
                "2,2-dimethylpropanal",
                "2,3,4,5-tetrahydroxypentanal",
                "2,4,6,8,10-dodecapentaenal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methylhexanal",
                "3-[(4-aminobutyl)amino]propanal",
                "3-furaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "3-oxo-Tetradecanal",
                "4,6-heptadienal",
                "4-methylpentanal",
                "5-Methoxymethyl furfural",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "Eicosanal",
                "acetaldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "formaldehyde",
                "furfural",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "heptadecanal",
                "hexadecanal",
                "nona-2,4,6-trienal",
                "o-tolualdehyde",
                "octadecanal",
                "octanal",
                "pentadecanal",
                "pentanal",
                "phenylacetaldehyde",
                "propanal",
                "tetradecanal",
                "tridecanal",
                "undecanal"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (H2C=O)",
            "Acetaldehyde (CH3CH=O)",
            "Propionaldehyde (CH3CH2CH=O)",
            "Butyraldehyde (CH3CH2CH2CH=O)",
            "Isobutyraldehyde ((CH3)2CHCH=O)",
            "Valeraldehyde (CH3CH2CH2CH2CH=O)",
            "Isovaleraldehyde ((CH3)2CHCH2CH=O)",
            "Hexanal (CH3CH2CH2CH2CH2CH=O)",
            "Heptanal (CH3(CH2)5CH=O)",
            "Octanal (CH3(CH2)6CH=O)",
            "Nonanal (CH3(CH2)7CH=O)",
            "Decanal (CH3(CH2)8CH=O)",
            "Undecanal (CH3(CH2)9CH=O)",
            "Dodecanal (CH3(CH2)10CH=O)",
            "Tridecanal (CH3(CH2)11CH=O)",
            "Tetradecanal (CH3(CH2)12CH=O)",
            "Pentadecanal (CH3(CH2)13CH=O)",
            "Hexadecanal (CH3(CH2)14CH=O)",
            "Heptadecanal (CH3(CH2)15CH=O)",
            "Octadecanal (CH3(CH2)16CH=O)",
            "Nonadecanal (CH3(CH2)17CH=O)",
            "Eicosanal (CH3(CH2)18CH=O)",
            "Heneicosanal (CH3(CH2)19CH=O)",
            "Docosanal (CH3(CH2)20CH=O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (H2C=O)",
                "Acetaldehyde (CH3CH=O)",
                "Isobutyraldehyde ((CH3)2CHCH=O)",
                "Valeraldehyde (CH3CH2CH2CH2CH=O)",
                "Decanal (CH3(CH2)8CH=O)",
                "Dodecanal (CH3(CH2)10CH=O)",
                "Tridecanal (CH3(CH2)11CH=O)",
                "Tetradecanal (CH3(CH2)12CH=O)",
                "Pentadecanal (CH3(CH2)13CH=O)",
                "Hexadecanal (CH3(CH2)14CH=O)",
                "Heptadecanal (CH3(CH2)15CH=O)",
                "Octadecanal (CH3(CH2)16CH=O)",
                "Eicosanal (CH3(CH2)18CH=O)"
            ],
            "mismatches": [
                "Propionaldehyde (CH3CH2CH=O)",
                "Butyraldehyde (CH3CH2CH2CH=O)",
                "Isovaleraldehyde ((CH3)2CHCH2CH=O)",
                "Hexanal (CH3CH2CH2CH2CH2CH=O)",
                "Heptanal (CH3(CH2)5CH=O)",
                "Octanal (CH3(CH2)6CH=O)",
                "Nonanal (CH3(CH2)7CH=O)",
                "Undecanal (CH3(CH2)9CH=O)",
                "Nonadecanal (CH3(CH2)17CH=O)",
                "Heneicosanal (CH3(CH2)19CH=O)",
                "Docosanal (CH3(CH2)20CH=O)"
            ],
            "true_referents": [
                "(2E)-5-isopropyl-2-methylhexa-2,5-dienal",
                "(2E,4E)-2,4-Dodecadienal",
                "(2E,4E)-deca-2,4-dienal",
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxy-3-methoxyphenyl)acetaldehyde",
                "(E)-2-tridecylnonadec-2-enal",
                "(E)-dodec-2-enal",
                "(E)-non-2-enal",
                "(E,E)-2,4-Octadienal",
                "(E,E)-2,4-hexadienal",
                "(E,E)-2,6-Octadienal",
                "(E,E)-hepta-2,4-dienal",
                "(R)-2-hydroxyhexadecanal",
                "(S)-2-methylbutanal",
                "(all-Z)-8,11,14-Heptadecatrienal",
                "2,4,6,8,10-dodecapentaenal",
                "2,4,6-octatrienal",
                "2,4-Undecadienal",
                "2-Nonylheneicosanal",
                "2-methyl-Undecanal",
                "2E,4E,6E,8E,10E,12E,14E,16E,18E-eicosanonaenal",
                "3,5-Dimethylhexanal",
                "3-[(4-[(3-oxopropyl)amino]butyl)amino]propionaldehyde",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-oxo-Tetradecanal",
                "4-methylocta-2,4,6-trienedial",
                "5-Oxohexanal",
                "6,8,10,12-Pentadecatetraenal",
                "Eicosanal",
                "acetaldehyde",
                "aliphatic aldehyde",
                "all-trans-1,6-seco-1,2-didehydroretinal",
                "alpha-CH2-containing aldehyde",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "formaldehyde",
                "heptadecanal",
                "hexadecanal",
                "isobutyraldehyde",
                "nona-2,4,6-trienal",
                "octadecanal",
                "pentadecanal",
                "tetradecanal",
                "tridecanal",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (Ac)",
            "Acetone (Ac)",
            "Acetophenone (Ac)",
            "Benzaldehyde (Bz)",
            "Butyraldehyde (Bu)",
            "Crotonaldehyde (Cr)",
            "Cyclohexanone (Cy)",
            "Ethanal (Et)",
            "Formaldehyde (Fo)",
            "Heptanal (He)",
            "Hexanal (Hx)",
            "Isobutyraldehyde (Ib)",
            "Isovaleraldehyde (Iv)",
            "Methyl ethyl ketone (Mk)",
            "Nonanal (No)",
            "Octanal (Oc)",
            "Pentanal (Pe)",
            "Phenylacetaldehyde (Pa)",
            "Propionaldehyde (Pr)",
            "Propiophenone (Pp)",
            "Propanal (Pn)",
            "Propanone (Pt)",
            "Valeraldehyde (Va)",
            "Valerophenone (Vp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "benzaldehyde",
                "crotonaldehyde",
                "formaldehyde",
                "heptanal",
                "hexanal",
                "isobutyraldehyde",
                "nonanal",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "propanal",
                "valeraldehyde"
            ],
            "mismatches": [
                "acetone",
                "acetophenone",
                "butyraldehyde",
                "cyclohexanone",
                "ethanal",
                "isovaleraldehyde",
                "methyl ethyl ketone",
                "propionaldehyde",
                "propiophenone",
                "propanone",
                "valerophenone"
            ],
            "true_referents": [
                "(2S,3S,4S)-4-(hydroxymethyl)-2-methoxy-3-[(2E)-6-methylhept-2-en-2-yl]cyclohexanone",
                "(4-Methylphenyl)acetaldehyde",
                "(R)-alpha-campholenaldehyde",
                "(R)-lactaldehyde",
                "(S)-2-methylbutanal",
                "1-acetylindole-3-carboxaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "2-Ethylidenehexanal",
                "2-Methyl-3-phenyl-2-propenal",
                "2-ethylhexanal",
                "2-pentenal",
                "2-pentyl-3-phenyl-2-propenal",
                "3,5,5-Trimethylhexanal",
                "3-(4-Methylphenyl)-2-propenal",
                "3-O-acetyl-4'-O-demethylpapaveroxine",
                "3-O-ethylentacapone",
                "4-aminovaleraldehyde",
                "4-methylocta-2,4,6-trienedial",
                "5-aminopentanal",
                "6-oxohexanoic acid",
                "L-dopa",
                "acetaldehyde",
                "acetaldehyde (Z)-hydrazone",
                "acetyl",
                "benzaldehyde",
                "benzaldehydes",
                "chloroacetaldehyde",
                "crotonaldehyde",
                "cyclohexanecarbaldehyde",
                "dialdehyde",
                "formaldehyde",
                "heptanal",
                "hexanal",
                "imidazole-4-acetaldehyde",
                "isobutyraldehyde",
                "lactaldehyde",
                "nonanal",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "phenylacetaldehydes",
                "propanal",
                "propanals",
                "valeraldehyde",
                "veratraldehyde"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Aldehyde (RCHO)",
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Acrolein (CH2CHCHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Furfural (C5H4OCHO)",
            "Glyceraldehyde (C3H6O3)",
            "Dihydroxyacetone (C3H6O3)",
            "Lactaldehyde (C6H12O4)",
            "Glycolaldehyde (C2H4O2)",
            "Ethanal (C2H5OH)",
            "Propanal (C3H7OH)",
            "Butanal (C4H9OH)",
            "Pentanal (C5H11OH)",
            "Hexanal (C6H13OH)",
            "Heptanal (C7H15OH)",
            "Octanal (C8H17OH)",
            "Nonanal (C9H19OH)",
            "Decanal (C10H21OH)",
            "Undecanal (C11H23OH)",
            "Dodecanal (C12H25OH)",
            "Tridecanal (C13H27OH)",
            "Tetradecanal (C14H29OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (HCHO)",
                "Acetaldehyde (CH3CHO)",
                "Acrolein (CH2CHCHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Furfural (C5H4OCHO)",
                "Lactaldehyde (C6H12O4)",
                "Glycolaldehyde (C2H4O2)",
                "Propanal (C3H7OH)",
                "Butanal (C4H9OH)",
                "Pentanal (C5H11OH)",
                "Hexanal (C6H13OH)",
                "Octanal (C8H17OH)",
                "Decanal (C10H21OH)",
                "Undecanal (C11H23OH)",
                "Dodecanal (C12H25OH)",
                "Tetradecanal (C14H29OH)"
            ],
            "mismatches": [
                "Aldehyde (RCHO)",
                "Glyceraldehyde (C3H6O3)",
                "Dihydroxyacetone (C3H6O3)",
                "Ethanal (C2H5OH)",
                "Heptanal (C7H15OH)",
                "Nonanal (C9H19OH)",
                "Tridecanal (C13H27OH)"
            ],
            "true_referents": [
                "(2E,4E)-2,4-Dodecadienal",
                "(4S,5S)-4,5-dihydroxy-2,6-dioxohexanoic acid",
                "(5-hydroxyindol-3-yl)acetaldehyde",
                "(E)-non-2-enal",
                "(R)-lactaldehyde",
                "1-Methyl-2-pyrrolecarboxaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "3,5-Dimethylhexanal",
                "3-[(4-aminobutyl)amino]propanal",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "3-oxo-Tetradecanal",
                "4,6-heptadienal",
                "4-methylpentanal",
                "4-oxo-4-(pyridin-3-yl)butanal",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "6-oxohexanoic acid",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glyceraldehyde 3-phosphate",
                "acetaldehyde",
                "acetaldehyde (E)-hydrazone",
                "acrolein",
                "butanal",
                "crotonaldehyde",
                "decanal",
                "dialdehyde",
                "dodecanal",
                "formaldehyde",
                "furfural",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "hexanal",
                "hydroxyaldehyde",
                "lactaldehyde",
                "non-2-enal",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "propanal",
                "pyrrole-2-carboxaldehyde",
                "tetradecanal",
                "tridecanal",
                "undecanal"
            ],
            "TP": 16,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            "Acetaldehyde (AcH)",
            "Acetone (Ac)",
            "Benzaldehyde (BzH)",
            "Butyraldehyde (BuH)",
            "Crotonaldehyde (CrH)",
            "Formaldehyde (FmH)",
            "Furfural (FfH)",
            "Glutaraldehyde (GlH)",
            "Glyoxal (GxH)",
            "Heptaldehyde (HpH)",
            "Hexanal (HxH)",
            "Hyacinthin (HynH)",
            "Isovaleraldehyde (IvH)",
            "Methacrolein (McH)",
            "Nonanal (NnH)",
            "Octanal (OcH)",
            "Pentanal (PnH)",
            "Phenylacetaldehyde (PhAcH)",
            "Propionaldehyde (PrH)",
            "Salicylaldehyde (SlH)",
            "Tolualdehyde (TlH)",
            "Undecanal (UnH)",
            "Valeraldehyde (VlH)",
            "Vanillin (VnH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzaldehyde (BzH)",
                "Crotonaldehyde (CrH)",
                "Formaldehyde (FmH)",
                "Furfural (FfH)",
                "Glutaraldehyde (GlH)",
                "Glyoxal (GxH)",
                "Hexanal (HxH)",
                "Methacrolein (McH)",
                "Octanal (OcH)",
                "Pentanal (PnH)",
                "Phenylacetaldehyde (PhAcH)",
                "Salicylaldehyde (SlH)",
                "Tolualdehyde (TlH)",
                "Undecanal (UnH)",
                "Valeraldehyde (VlH)",
                "Vanillin (VnH)"
            ],
            "mismatches": [
                "Acetaldehyde (AcH)",
                "Acetone (Ac)",
                "Butyraldehyde (BuH)",
                "Heptaldehyde (HpH)",
                "Hyacinthin (HynH)",
                "Isovaleraldehyde (IvH)",
                "Nonanal (NnH)",
                "Propionaldehyde (PrH)"
            ],
            "true_referents": [
                "(E)-sinapaldehyde",
                "(R)-alpha-campholenaldehyde",
                "2,3,4,5-tetrahydroxypentanal",
                "3,5-Dimethylhexanal",
                "3-methylsalicylaldehyde",
                "3-nonenal",
                "4-aminovaleraldehyde",
                "4-hydroxyindole-3-carbaldehyde",
                "5-Methoxymethyl furfural",
                "5-aminopentanal",
                "5-hydroxymethylfurfural",
                "L-glutamic 5-semialdehyde",
                "Methacrolein",
                "acetaldehyde",
                "acetaldehyde (Z)-hydrazone",
                "acetyl",
                "aldehydic acid",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutamic semialdehyde",
                "glutaraldehyde",
                "glyoxal",
                "helicin",
                "hexanal",
                "hydroxyaldehyde",
                "isobutyraldehyde",
                "k-Strophanthin-beta",
                "lactaldehyde",
                "m-tolualdehyde",
                "methylglyoxal",
                "non-2-enal",
                "o-tolualdehyde",
                "octanal",
                "pentanal",
                "phenylacetaldehyde",
                "phenylacetaldehydes",
                "salicylaldehyde",
                "tolualdehyde",
                "undecanal",
                "valeraldehyde",
                "vanillin",
                "veratraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "aldehyde",
        "domain": "chemistry",
        "response": [
            [
                "Referent",
                "formic acid"
            ],
            [
                "Referent",
                "acetic acid"
            ],
            [
                "Referent",
                "propanoic acid"
            ]
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "formic acid",
                "acetic acid",
                "propanoic acid"
            ],
            "true_referents": [
                "(S)-citronellic acid",
                "2-methyl-3-oxopropanoic acid",
                "3-oxopropanoic acid",
                "4-formylbenzoic acid",
                "5-formyl-2-furoic acid",
                "acetyl",
                "aldehydic acid"
            ],
            "TP": 0,
            "FP": 3,
            "FN": 0
        }
    }
]