[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "acetaldehyde (CH3CHO)",
            "acetone (CH3COCH3)",
            "benzaldehyde (C6H5CHO)",
            "butyraldehyde (CH3CH2CH2CHO)",
            "formaldehyde (HCHO)",
            "glyoxal (OHCCHO)",
            "methyl ethyl ketone (CH3COCH2CH3)",
            "pyruvic acid (CH3COCOOH)",
            "acetic anhydride (C4H6O3)",
            "benzophenone (C13H10O)",
            "cinnamaldehyde (C9H8O)",
            "coumarin (C9H6O2)",
            "crotonaldehyde (CH3CH=CHCHO)",
            "ethyl acetate (C4H8O2)",
            "furfural (C5H4O2)",
            "glycerol acetoacetate (C7H12O4)",
            "isobutyraldehyde (CH3CH(CH3)CHO)",
            "lactide (C6H8O3)",
            "maleic anhydride (C4H4O3)",
            "methyl salicylate (C8H8O3)",
            "nicotinamide (C6H5NO2)",
            "octanal (C8H16O)",
            "vanillin (C8H8O3)",
            "valeric aldehyde (C5H10O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaldehyde",
                "acetone",
                "benzaldehyde",
                "formaldehyde",
                "glyoxal",
                "pyruvic acid",
                "acetic anhydride",
                "benzophenone",
                "crotonaldehyde",
                "ethyl acetate",
                "furfural",
                "isobutyraldehyde",
                "maleic anhydride",
                "methyl salicylate",
                "nicotinamide",
                "octanal",
                "vanillin"
            ],
            "mismatches": [
                "butyraldehyde",
                "methyl ethyl ketone",
                "cinnamaldehyde",
                "coumarin",
                "glycerol acetoacetate",
                "lactide",
                "valeric aldehyde"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "(R)-glycerol 1-acetate",
                "(S)-3-phenyllactic acid",
                "2,3',4,6-tetrahydroxybenzophenone",
                "3,13-Dihydroxy-8-oxo-2(9),6-lactaradien-5-oic acid y-lactone",
                "3,4-dimethoxy cinnamaldehyde",
                "5-Methoxymethyl furfural",
                "5-aceto valeric acid",
                "5-hydroxymethylfurfural",
                "6,8-dimethoxy-3-hydroxymethylisocoumarin",
                "7,8-diacetoxy-4-methylcoumarin",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Ethyl isopropyl ketone",
                "Glycerol 1,2-diacetate",
                "Glycerol 1-propanoate diacetate",
                "acetaldehyde",
                "acetic anhydride",
                "acetic benzoic anhydride",
                "acetone",
                "aldehyde",
                "alpha-CH2-containing aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "benzophenones",
                "beta,beta-dimethyl valeric acid",
                "carboxylic anhydride",
                "coumaric acid",
                "crotonaldehyde",
                "crotonic acid",
                "dialdehyde",
                "ethyl acetate",
                "ethyl butyrate",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glyoxal",
                "isobutyraldehyde",
                "maleic acid",
                "maleic anhydride",
                "methyl ketone",
                "methyl propenyl ketone",
                "methyl salicylate",
                "methylglyoxal",
                "nicotinamide",
                "nicotinic acid",
                "octanal",
                "pyruvic acid",
                "triacetate lactone",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Acetaldehyde (CH3CHO)",
            "Formaldehyde (CH2O)",
            "Acetic acid (CH3COOH)",
            "Benzaldehyde (C6H5CHO)",
            "Cinnamaldehyde (C9H8O)",
            "Acetophenone (C8H8O)",
            "Butyraldehyde (C4H8O)",
            "Propionic acid (C3H6O2)",
            "Methyl ethyl ketone (MEK, C4H8O)",
            "Vanillin (C8H8O3)",
            "Salicylaldehyde (C7H6O2)",
            "Cyclohexanone (C6H10O)",
            "Glyoxal (C2H2O2)",
            "Phenylacetaldehyde (C8H8O)",
            "Ethyl acetate (C4H8O2)",
            "Methyl benzoate (C8H8O2)",
            "Acrolein (C3H4O)",
            "Glutaraldehyde (C5H8O2)",
            "Furfural (C5H4O2)",
            "Phthalic anhydride (C8H4O3)",
            "Maleic anhydride (C4H2O3)",
            "Acrolein (C3H4O)",
            "Crotonaldehyde (C4H6O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Acetaldehyde (CH3CHO)",
                "Formaldehyde (CH2O)",
                "Acetic acid (CH3COOH)",
                "Benzaldehyde (C6H5CHO)",
                "Acetophenone (C8H8O)",
                "Propionic acid (C3H6O2)",
                "Vanillin (C8H8O3)",
                "Salicylaldehyde (C7H6O2)",
                "Cyclohexanone (C6H10O)",
                "Glyoxal (C2H2O2)",
                "Phenylacetaldehyde (C8H8O)",
                "Ethyl acetate (C4H8O2)",
                "Methyl benzoate (C8H8O2)",
                "Acrolein (C3H4O)",
                "Glutaraldehyde (C5H8O2)",
                "Furfural (C5H4O2)",
                "Phthalic anhydride (C8H4O3)",
                "Maleic anhydride (C4H2O3)",
                "Crotonaldehyde (C4H6O)"
            ],
            "mismatches": [
                "Cinnamaldehyde (C9H8O)",
                "Butyraldehyde (C4H8O)",
                "Methyl ethyl ketone (MEK, C4H8O)"
            ],
            "true_referents": [
                "(3R,5S)-1-pyrroline-3-hydroxy-5-carboxylic acid",
                "(4-Methylphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Hexenyl salicylic acid",
                "4-(O-methyl-aci-nitro)crotonic acid",
                "4-hydroxy-crotonic acid",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Ethyl isopropyl ketone",
                "Phthalic acid, di(3-methylphenyl) ester",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "acetophenones",
                "acrolein",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "crotonaldehyde",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "ethyl acetate",
                "ethyl propionate",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutaraldehyde",
                "glutaric anhydride",
                "glyoxal",
                "hydroxycyclohexanone",
                "maleic acid",
                "maleic anhydride",
                "methyl benzoate",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylglyoxal",
                "phenylacetaldehyde",
                "phthalic acid",
                "phthalic anhydride",
                "propionic acid",
                "salicylaldehyde",
                "vanillin"
            ],
            "TP": 20,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (Propanone)",
            "Formaldehyde (Methanal)",
            "Acetaldehyde (Ethanal)",
            "Propionaldehyde (Propanal)",
            "Benzaldehyde",
            "Butyraldehyde (Butanal)",
            "Valeraldehyde (Pentanal)",
            "Hexanal",
            "Heptanal",
            "Octanal",
            "Nonanal",
            "Decanal",
            "Acetic acid (Ethanoic acid)",
            "Propionic acid (Propanoic acid)",
            "Butyric acid (Butanoic acid)",
            "Valeric acid (Pentanoic acid)",
            "Caproic acid (Hexanoic acid)",
            "Enanthic acid (Heptanoic acid)",
            "Caprylic acid (Octanoic acid)",
            "Pelargonic acid (Nonanoic acid)",
            "Capric acid (Decanoic acid)",
            "Acetyl chloride",
            "Acetyl bromide",
            "Acetyl iodide"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (Propanone)",
                "Formaldehyde (Methanal)",
                "Acetaldehyde (Ethanal)",
                "Propionaldehyde (Propanal)",
                "Benzaldehyde",
                "Hexanal",
                "Heptanal",
                "Octanal",
                "Nonanal",
                "Decanal",
                "Acetic acid (Ethanoic acid)",
                "Propionic acid (Propanoic acid)",
                "Butyric acid (Butanoic acid)",
                "Valeric acid (Pentanoic acid)",
                "Acetyl chloride"
            ],
            "mismatches": [
                "Butyraldehyde (Butanal)",
                "Valeraldehyde (Pentanal)",
                "Caproic acid (Hexanoic acid)",
                "Enanthic acid (Heptanoic acid)",
                "Caprylic acid (Octanoic acid)",
                "Pelargonic acid (Nonanoic acid)",
                "Capric acid (Decanoic acid)",
                "Acetyl bromide",
                "Acetyl iodide"
            ],
            "true_referents": [
                "3-capryl propionic acid",
                "3-iodo-octadecanoic acid",
                "6-Methyl caprylic acid",
                "6-propionyl n-caproic acid",
                "7-hydroxy pelargonic acid",
                "8-propionyl caprylic acid",
                "9-acetyl pelargonic acid",
                "Acetyl citrate",
                "BenzAldehyde, 3-ethyl-",
                "Pelargonyl acetic acid",
                "acetaldehyde",
                "acetic acid",
                "acetic acid-d4",
                "acetoacetic acid",
                "acetone",
                "acetyl",
                "acetyl chloride",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butyl butanoate",
                "butyric acid",
                "decanal",
                "decanoic acid",
                "dialdehyde",
                "dibromoacetic acid",
                "ethyl propionate",
                "formaldehyde",
                "heptanal",
                "heptanoic acid",
                "hexanal",
                "hexanoic acid",
                "iodoacetic acid",
                "lactaldehyde",
                "nonanal",
                "octanal",
                "pentanal",
                "propanal",
                "propanals",
                "propionic acid",
                "valeraldehyde",
                "valeric acid",
                "veratraldehyde"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (HCHO)",
            "Acetaldehyde (CH3CHO)",
            "Acetone (CH3COCH3)",
            "Propanal (CH3CH2CHO)",
            "Propanone (CH3COCH2CH3)",
            "Butanal (CH3CH2CH2CHO)",
            "Butanone (CH3COCH2CH3)",
            "Benzaldehyde (C6H5CHO)",
            "Acetophenone (C6H5COCH3)",
            "Benzoic acid (C6H5COOH)",
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Pyruvic acid (CH3COCOOH)",
            "Glycolaldehyde (HOCH2CHO)",
            "Glyceraldehyde (HOCH2CHOHCHO)",
            "Ribose (C5H10O5)",
            "Fructose (C6H12O6)",
            "Glucose (C6H12O6)",
            "Dimethylformamide (DMF, (CH3)2NCHO)",
            "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
            "Vanillin (C8H8O3)",
            "Camphor (C10H16O)",
            "Cortisol (C21H30O5)",
            "Testosterone (C19H28O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde",
                "Acetaldehyde",
                "Acetone",
                "Propanal",
                "Butanal",
                "Butanone",
                "Benzaldehyde",
                "Acetophenone",
                "Benzoic acid",
                "Formic acid",
                "Acetic acid",
                "Pyruvic acid",
                "Glycolaldehyde",
                "Vanillin",
                "Camphor"
            ],
            "mismatches": [
                "Propanone",
                "Glyceraldehyde",
                "Ribose",
                "Fructose",
                "Glucose",
                "Dimethylformamide",
                "Dimethyl sulfoxide",
                "Cortisol",
                "Testosterone"
            ],
            "true_referents": [
                "(E)-testosterone-3-CMO-Hist",
                "1,5-anhydro-D-fructose",
                "2,2-dimethylpropanal",
                "2,2-dimethylsuccinic acid",
                "2-(3-methylbenzoyl)benzoic acid",
                "2-Propenyl 2-ethylbutanoate",
                "2-[(sulfooxy)methyl]butanoic acid",
                "3,6-anhydro-D-glucose",
                "3-Methyl-1-phenyl-1-butanone",
                "3-[(4-aminobutyl)amino]propanal",
                "3-deoxy-keto-D-fructose",
                "3-hydroxy-2-(hydroxymethyl)-2-[(sulfooxy)methyl]propanoic acid",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "4,5alpha-Dihydro-2-(hydroxymethylene)testosterone",
                "4-propoxy-2-butanone",
                "6-phospho-2-dehydro-D-gluconic acid",
                "BenzAldehyde, 3-ethyl-",
                "D-glyceraldehyde 3-phosphate",
                "Decanoyl acetaldehyde",
                "L-glyceraldehyde 3-phosphate",
                "L-ribonic acid",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-methylformamide",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzoic acid",
                "benzoic acids",
                "butanal",
                "butanone",
                "camphor",
                "cortisol 21-acetate",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "formic acid",
                "formic acid-d2",
                "glyceraldehyde 3-phosphate",
                "glycolaldehyde",
                "glycolaldehyde phosphate",
                "p-Ethylacetophenone",
                "propanal",
                "pyruvic acid",
                "ribonic acid",
                "vanillin"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Benzaldehyde (C7H6O)",
            "Acetic Acid (C2H4O2)",
            "Butanone (C4H8O)",
            "Propionaldehyde (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Valeraldehyde (C5H10O)",
            "Hexanal (C6H12O)",
            "Heptanal (C7H14O)",
            "Octanal (C8H16O)",
            "Nonanal (C9H18O)",
            "Decanal (C10H20O)",
            "Cyclohexanone (C6H10O)",
            "Methyl Ethyl Ketone (C4H8O)",
            "Benzophenone (C13H10O)",
            "Camphor (C10H16O)",
            "Phenylacetaldehyde (C8H8O)",
            "Vanillin (C8H8O3)",
            "Salicylaldehyde (C7H6O2)",
            "Cinnamaldehyde (C9H8O)",
            "Caproaldehyde (C6H12O)",
            "Isovaleraldehyde (C5H10O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Benzaldehyde (C7H6O)",
                "Acetic Acid (C2H4O2)",
                "Butanone (C4H8O)",
                "Valeraldehyde (C5H10O)",
                "Hexanal (C6H12O)",
                "Octanal (C8H16O)",
                "Decanal (C10H20O)",
                "Cyclohexanone (C6H10O)",
                "Benzophenone (C13H10O)",
                "Camphor (C10H16O)",
                "Phenylacetaldehyde (C8H8O)",
                "Vanillin (C8H8O3)",
                "Salicylaldehyde (C7H6O2)"
            ],
            "mismatches": [
                "Propionaldehyde (C3H6O)",
                "Butyraldehyde (C4H8O)",
                "Heptanal (C7H14O)",
                "Nonanal (C9H18O)",
                "Methyl Ethyl Ketone (C4H8O)",
                "Cinnamaldehyde (C9H8O)",
                "Caproaldehyde (C6H12O)",
                "Isovaleraldehyde (C5H10O)"
            ],
            "true_referents": [
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "1-hydroxy-3-propoxyacetone",
                "2,3',4,6-tetrahydroxybenzophenone",
                "2-Nonylheneicosanal",
                "2-hydroxy caproaldehyde",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Hexenyl salicylic acid",
                "4,4-Dimethoxy-2-butanone",
                "4,6-heptadienal",
                "4-hydroxy caproaldehyde",
                "4-propoxy-2-butanone",
                "5-aceto valeric acid",
                "5-hydroxy caproaldehyde",
                "7-phenyl heptanoic acid",
                "BenzAldehyde, 3-ethyl-",
                "Ethyl isopropyl ketone",
                "acetaldehyde",
                "acetic acid",
                "acetic acid-d4",
                "acetoacetic acid",
                "acetone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "benzophenones",
                "beta,beta-dimethyl valeric acid",
                "butanone",
                "camphor",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "decanal",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "hexanal",
                "hydroxycyclohexanone",
                "isovaleric acid",
                "methyl ketone",
                "methyl propenyl ketone",
                "non-2-enal",
                "octanal",
                "phenylacetaldehyde",
                "salicylaldehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Benzaldehyde (C7H6O)",
            "Propionaldehyde (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Valeraldehyde (C5H10O)",
            "Caproaldehyde (C6H12O)",
            "Cinnamaldehyde (C9H8O)",
            "Furfural (C5H4O2)",
            "Cyclopentanone (C5H8O)",
            "Cyclohexanone (C6H10O)",
            "2-Pentanone (C5H10O)",
            "3-Pentanone (C5H10O)",
            "2-Butanone (C4H8O)",
            "4-Hydroxy-4-methylpentan-2-one (C6H12O2)",
            "Acetophenone (C8H8O)",
            "2-Phenylpropan-2-one (C9H10O)",
            "Diethyl ketone (C6H12O)",
            "3-Hexanone (C6H12O)",
            "2-Nonanone (C9H18O)",
            "2-Undecanone (C11H22O)",
            "Isovaleraldehyde (C5H10O)",
            "Hexanal (C6H12O)",
            "Octanal (C8H16O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Benzaldehyde (C7H6O)",
                "Valeraldehyde (C5H10O)",
                "Furfural (C5H4O2)",
                "Cyclopentanone (C5H8O)",
                "Cyclohexanone (C6H10O)",
                "2-Pentanone (C5H10O)",
                "3-Pentanone (C5H10O)",
                "2-Butanone (C4H8O)",
                "4-Hydroxy-4-methylpentan-2-one (C6H12O2)",
                "Acetophenone (C8H8O)",
                "3-Hexanone (C6H12O)",
                "2-Nonanone (C9H18O)",
                "Hexanal (C6H12O)"
            ],
            "mismatches": [
                "Propionaldehyde (C3H6O)",
                "Butyraldehyde (C4H8O)",
                "Caproaldehyde (C6H12O)",
                "Cinnamaldehyde (C9H8O)",
                "2-Phenylpropan-2-one (C9H10O)",
                "Diethyl ketone (C6H12O)",
                "2-Undecanone (C11H22O)",
                "Isovaleraldehyde (C5H10O)",
                "Octanal (C8H16O)"
            ],
            "true_referents": [
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "(E)-cinnamaldehyde",
                "1-Hydroxy-2-pentanone",
                "1-hydroxy-1-phenylpropan-2-one",
                "1-hydroxy-3-propoxyacetone",
                "1-hydroxy-4-methylpentan-2-one",
                "2,4-dimethyl-3-pentanone",
                "2-(trans-2-Pentenyl)cyclopentanone",
                "2-Dodecanone",
                "2-Methyl-3-hexanone",
                "2-Nonadecanone",
                "2-butyl-1-octanol",
                "2-hydroxy caproaldehyde",
                "2-phenylpropanal",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Hydroxy-2-pentanone",
                "3-hexanone",
                "4-hydroxy caproaldehyde",
                "4-propoxy-2-butanone",
                "5-Methoxymethyl furfural",
                "5-aceto valeric acid",
                "5-hydroxy caproaldehyde",
                "5-hydroxymethylfurfural",
                "6-Undecanone",
                "BenzAldehyde, 3-ethyl-",
                "Cyclopentanone, 3-methyl-2-(2-pentenyl)-",
                "Nonan-2,5-dione",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acetophenones",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "beta,beta-dimethyl valeric acid",
                "butanone",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "cyclopentanone",
                "dialdehyde",
                "dialkyl ketone",
                "formaldehyde",
                "furfural",
                "hexanal",
                "hexanone",
                "hydroxycyclohexanone",
                "isovaleric acid",
                "ketone",
                "methyl ketone",
                "nonanone",
                "pentanone",
                "undecan-2-ol",
                "valeraldehyde"
            ],
            "TP": 16,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Propanone (C3H6O)",
            "Butanone (C4H8O)",
            "Acetic acid (C2H4O2)",
            "Propionic acid (C3H6O2)",
            "Butanoic acid (C4H8O2)",
            "Acetophenone (C8H8O)",
            "Benzophenone (C13H10O)",
            "Flavone (C15H10O2)",
            "Isobutyraldehyde (C4H8O)",
            "Valeraldehyde (C5H10O)",
            "Hexanal (C6H12O)",
            "Heptanal (C7H14O)",
            "Octanal (C8H16O)",
            "Nonanal (C9H18O)",
            "Decanal (C10H20O)",
            "Cinnamaldehyde (C9H8O)",
            "Vanillin (C8H8O3)",
            "Salicylaldehyde (C7H6O2)",
            "Anisaldehyde (C8H8O2)",
            "Furfural (C5H4O2)",
            "Glutaraldehyde (C5H8O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Butanone (C4H8O)",
                "Acetic acid (C2H4O2)",
                "Propionic acid (C3H6O2)",
                "Acetophenone (C8H8O)",
                "Benzophenone (C13H10O)",
                "Isobutyraldehyde (C4H8O)",
                "Valeraldehyde (C5H10O)",
                "Hexanal (C6H12O)",
                "Octanal (C8H16O)",
                "Decanal (C10H20O)",
                "Vanillin (C8H8O3)",
                "Salicylaldehyde (C7H6O2)",
                "Furfural (C5H4O2)",
                "Glutaraldehyde (C5H8O2)"
            ],
            "mismatches": [
                "Propanone (C3H6O)",
                "Butanoic acid (C4H8O2)",
                "Flavone (C15H10O2)",
                "Heptanal (C7H14O)",
                "Nonanal (C9H18O)",
                "Cinnamaldehyde (C9H8O)",
                "Anisaldehyde (C8H8O2)"
            ],
            "true_referents": [
                "(2R,3R)-dihydroflavonol",
                "(2S)-flavanone",
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-cinnamaldehyde",
                "2,3',4,6-tetrahydroxybenzophenone",
                "2-Nonylheneicosanal",
                "2-hydroxy-1-phenyl-1-propanone",
                "3,4-dimethoxy cinnamaldehyde",
                "3-Chloro-anisaldehyde",
                "3-Hexenyl salicylic acid",
                "4,4-Dimethoxy-2-butanone",
                "4,6-heptadienal",
                "4-propoxy-2-butanone",
                "5-Methoxymethyl furfural",
                "5-aceto valeric acid",
                "5-hydroxymethylfurfural",
                "7-phenyl heptanoic acid",
                "8-undecenoic acid",
                "acetaldehyde",
                "acetic acid",
                "acetic acid-d4",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "acetophenones",
                "aldehyde",
                "benzophenone",
                "benzophenones",
                "beta,beta-dimethyl valeric acid",
                "butanone",
                "butyric acid",
                "decanal",
                "dialdehyde",
                "ethyl propionate",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glutaraldehyde",
                "glutaric anhydride",
                "hexanal",
                "isobutyl acetate",
                "isobutyraldehyde",
                "non-2-enal",
                "octanal",
                "propionic acid",
                "salicylaldehyde",
                "valeraldehyde",
                "vanillin"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone",
            "Formaldehyde",
            "Acetaldehyde",
            "Acetophenone",
            "Acrolein",
            "Butanone",
            "Butyraldehyde",
            "Cinnamaldehyde",
            "Decanal",
            "Ethanal",
            "Furfural",
            "Hexanal",
            "Isobutanal",
            "Isobutyraldehyde",
            "Methanal",
            "Methyl ethyl ketone",
            "Nonanal",
            "Octanal",
            "Propionaldehyde",
            "Propyl aldehyde",
            "Tolualdehyde",
            "Tridecanal",
            "Valeraldehyde",
            "Vanillin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Formaldehyde",
                "Acetaldehyde",
                "Acetophenone",
                "Acrolein",
                "Butanone",
                "Decanal",
                "Furfural",
                "Hexanal",
                "Isobutyraldehyde",
                "Nonanal",
                "Octanal",
                "Tolualdehyde",
                "Tridecanal",
                "Valeraldehyde",
                "Vanillin"
            ],
            "mismatches": [
                "Butyraldehyde",
                "Cinnamaldehyde",
                "Ethanal",
                "Isobutanal",
                "Methanal",
                "Methyl ethyl ketone",
                "Propionaldehyde",
                "Propyl aldehyde"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "(Z)-Oak lactone",
                "1-pyrrolinecarboxylic acid",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "Ethyl isopropyl ketone",
                "Furfuryl pentanoate",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acetophenones",
                "acrolein",
                "aldehyde",
                "aminobutanal",
                "butanone",
                "butyl butanoate",
                "cinnamaldehydes",
                "decanal",
                "decyl butyrate",
                "dialdehyde",
                "ethyl butyrate",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "hexanal",
                "hydroxybutanal",
                "isobutyl acetate",
                "isobutyraldehyde",
                "lactaldehyde",
                "m-tolualdehyde",
                "methanimine",
                "methyl formate",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylbutanal",
                "nonanal",
                "o-tolualdehyde",
                "octanal",
                "propyl acetate",
                "tolualdehyde",
                "tridecanal",
                "undecyl butyrate",
                "urethane",
                "valeraldehyde",
                "vanillin",
                "veratraldehyde"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (Methanal)",
            "Acetaldehyde (Ethanal)",
            "Acetone (Propanone)",
            "Benzaldehyde",
            "Cyclohexanone",
            "Methyl Ethyl Ketone (MEK)",
            "2-Pentanone",
            "Butanone",
            "N,N-Dimethylformamide (DMF)",
            "Acetyl Chloride",
            "Acetylsalicylic Acid (Aspirin)",
            "Benzoic Acid",
            "Acetamide",
            "Ethyl Acetate",
            "Propanoic Acid",
            "Succinic Acid",
            "Oxalic Acid",
            "Lactic Acid",
            "Stearic Acid",
            "Malonic Acid",
            "Methyl Formate",
            "Ethyl Formate",
            "Acetic Anhydride",
            "Pyridine-2-carboxaldehyde"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (Methanal)",
                "Acetaldehyde (Ethanal)",
                "Acetone (Propanone)",
                "Benzaldehyde",
                "Cyclohexanone",
                "Butanone",
                "N,N-Dimethylformamide (DMF)",
                "Acetyl Chloride",
                "Acetylsalicylic Acid (Aspirin)",
                "Benzoic Acid",
                "Ethyl Acetate",
                "Succinic Acid",
                "Oxalic Acid",
                "Malonic Acid",
                "Methyl Formate",
                "Ethyl Formate",
                "Acetic Anhydride"
            ],
            "mismatches": [
                "Methyl Ethyl Ketone (MEK)",
                "2-Pentanone",
                "Acetamide",
                "Propanoic Acid",
                "Lactic Acid",
                "Stearic Acid",
                "Pyridine-2-carboxaldehyde"
            ],
            "true_referents": [
                "(R)-lactic acid",
                "1-Hydroxy-2-pentanone",
                "2-Pentanol acetate",
                "Acetyl citrate",
                "BenzAldehyde, 3-ethyl-",
                "Butyl pyridine-2-carboxylate",
                "Methyl isobutyl ketone",
                "Monoethyl malonic acid",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-methylformamide",
                "Oxalicine A",
                "acetaldehyde",
                "acetic anhydride",
                "acetic benzoic anhydride",
                "acetoacetamide",
                "acetone",
                "acetyl",
                "acetyl chloride",
                "acetylsalicylic acid",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzilic acid",
                "benzoic acid",
                "benzoic acids",
                "butanone",
                "butyl butanoate",
                "carboxylic anhydride",
                "cyclohexanone",
                "cyclohexanones",
                "dialdehyde",
                "ethyl acetate",
                "ethyl formate",
                "formaldehyde",
                "hydroxycyclohexanone",
                "lactaldehyde",
                "malonic acid",
                "methyl formate",
                "methyl ketone",
                "methyl propenyl ketone",
                "octanoic acid",
                "oxalic acid",
                "pentanone",
                "propionic acid",
                "pyridine-3-carbaldehyde",
                "pyridinecarbaldehyde",
                "stearolic acid",
                "succinic acid"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (CH3COCH3)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (CH3CHO)",
            "Benzaldehyde (C6H5CHO)",
            "Acrolein (CH2CHCHO)",
            "Butanone (CH3CH2COCH3)",
            "Propanal (CH3CH2CHO)",
            "Cyclohexanone (C6H10O)",
            "Acetophenone (C6H5COCH3)",
            "Glyoxal (OCHCHO)",
            "Furfural (C5H4O2)",
            "Propanone (CH3COCH3)",
            "Butanal (CH3CH2CH2CHO)",
            "Pentanal (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Crotonaldehyde (CH3CH=CHCHO)",
            "Methyl ethyl ketone (CH3CH2COCH3)",
            "Diethyl ketone (CH3CH2COCH2CH3)",
            "Methyl vinyl ketone (CH3COCH=CH2)",
            "Acetylacetone (CH3COCH2COCH3)",
            "Benzophenone (C6H5COC6H5)",
            "Vanillin (C8H8O3)",
            "Cinnamaldehyde (C9H8O)",
            "Camphor (C10H16O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (CH3COCH3)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (CH3CHO)",
                "Benzaldehyde (C6H5CHO)",
                "Acrolein (CH2CHCHO)",
                "Butanone (CH3CH2COCH3)",
                "Propanal (CH3CH2CHO)",
                "Cyclohexanone (C6H10O)",
                "Acetophenone (C6H5COCH3)",
                "Glyoxal (OCHCHO)",
                "Furfural (C5H4O2)",
                "Propanone (CH3COCH3)",
                "Butanal (CH3CH2CH2CHO)",
                "Pentanal (CH3(CH2)3CHO)",
                "Crotonaldehyde (CH3CH=CHCHO)",
                "Acetylacetone (CH3COCH2COCH3)",
                "Benzophenone (C6H5COC6H5)",
                "Vanillin (C8H8O3)"
            ],
            "mismatches": [
                "Hexanal (CH3(CH2)4CHO)",
                "Methyl ethyl ketone (CH3CH2COCH3)",
                "Diethyl ketone (CH3CH2COCH2CH3)",
                "Methyl vinyl ketone (CH3COCH=CH2)",
                "Cinnamaldehyde (C9H8O)",
                "Camphor (C10H16O)"
            ],
            "true_referents": [
                "(E)-cinnamaldehyde",
                "2',6'-dimethoxy-4'-hydroxyacetophenone",
                "2,2-dimethylpropanal",
                "2,3,4,5-tetrahydroxypentanal",
                "2-ethyl-3-ketopentanoic acid",
                "3,4-dimethoxy cinnamaldehyde",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methyl-1-phenyl-1-butanone",
                "3-[(4-aminobutyl)amino]propanal",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "3-propylidene-delta1-pyrroline-5-carboxylic acid",
                "4-acetyl-4-(ethoxycarbonyl)heptanedioic acid",
                "4-methyl-2-pyrroline-5-carboxylic acid",
                "4-propoxy-2-butanone",
                "5-Methoxymethyl furfural",
                "5-hydroxymethylfurfural",
                "BenzAldehyde, 3-ethyl-",
                "Decanoyl acetaldehyde",
                "Ethyl isopropyl ketone",
                "N-(2-benzoyl-1-methylvinyl)acetamide",
                "acetaldehyde",
                "acetone",
                "acetophenone",
                "acetylacetonate",
                "acetylacetone",
                "acrolein",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "benzophenones",
                "butanal",
                "butanone",
                "camphor",
                "crotonaldehyde",
                "crotonic acid",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "ethyl vanillin",
                "formaldehyde",
                "furfural",
                "glyoxal",
                "hydroxycyclohexanone",
                "methyl ketone",
                "methyl propenyl ketone",
                "methylglyoxal",
                "p-Ethylacetophenone",
                "pentanal",
                "propanal",
                "propanoate ester",
                "propanones",
                "vanillin"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (CH3COCH3)",
            "Acetaldehyde (CH3CHO)",
            "Formaldehyde (CH2O)",
            "Acetic acid (CH3COOH)",
            "Methyl ethyl ketone (CH3C(O)CH2CH3)",
            "Cyclohexanone (C6H10O)",
            "Benzaldehyde (C6H5CHO)",
            "Propanal (CH3CH2CHO)",
            "Butanal (CH3(CH2)2CHO)",
            "Pentanal (CH3(CH2)3CHO)",
            "Hexanal (CH3(CH2)4CHO)",
            "Heptanal (CH3(CH2)5CHO)",
            "Octanal (CH3(CH2)6CHO)",
            "Nonanal (CH3(CH2)7CHO)",
            "Decanal (CH3(CH2)8CHO)",
            "Propanone (CH3C(O)CH3)",
            "Butanone (CH3C(O)CH2CH3)",
            "Cyclopentanone (C5H8O)",
            "Acetophenone (C6H5C(O)CH3)",
            "Benzophenone ((C6H5)2CO)",
            "Pyruvic acid (CH3C(O)COOH)",
            "Oxaloacetic acid (HOOC-CO-CH2-COOH)",
            "\u03b1-Ketoglutaric acid (HOOC-CO-(CH2)2-COOH)",
            "Phenylglyoxylic acid (C6H5-CO-COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (CH3COCH3)",
                "Acetaldehyde (CH3CHO)",
                "Formaldehyde (CH2O)",
                "Acetic acid (CH3COOH)",
                "Cyclohexanone (C6H10O)",
                "Benzaldehyde (C6H5CHO)",
                "Propanal (CH3CH2CHO)",
                "Octanal (CH3(CH2)6CHO)",
                "Butanone (CH3C(O)CH2CH3)",
                "Cyclopentanone (C5H8O)",
                "Acetophenone (C6H5C(O)CH3)",
                "Benzophenone ((C6H5)2CO)",
                "Pyruvic acid (CH3C(O)COOH)",
                "Oxaloacetic acid (HOOC-CO-CH2-COOH)",
                "Phenylglyoxylic acid (C6H5-CO-COOH)"
            ],
            "mismatches": [
                "Methyl ethyl ketone (CH3C(O)CH2CH3)",
                "Butanal (CH3(CH2)2CHO)",
                "Pentanal (CH3(CH2)3CHO)",
                "Hexanal (CH3(CH2)4CHO)",
                "Heptanal (CH3(CH2)5CHO)",
                "Nonanal (CH3(CH2)7CHO)",
                "Decanal (CH3(CH2)8CHO)",
                "Propanone (CH3C(O)CH3)",
                "\u03b1-Ketoglutaric acid (HOOC-CO-(CH2)2-COOH)"
            ],
            "true_referents": [
                "(2E)-hex-2-en-1-yl propanoate",
                "2,2-dimethylpropanal",
                "2,3',4,6-tetrahydroxybenzophenone",
                "2,3,4,5-tetrahydroxypentanal",
                "2,3-Dimethyl-3-hydroxyglutaric acid",
                "2-(ethoxyimino)-2-ketoglutaric acid",
                "2-(trans-2-Pentenyl)cyclopentanone",
                "2-ethyl-3-ketopentanoic acid",
                "24-Nor-5-cholane-3,6,23-triol",
                "24-Nor-5beta-cholane-3alpha,7alpha,23-triol",
                "3,5,5-Trimethylhexanal",
                "3,5-Dimethylhexanal",
                "3-Methyl-1-phenyl-1-butanone",
                "3-Nonanon-1-yl acetate",
                "3-[(4-aminobutyl)amino]propanal",
                "3-hydroxy-2-(phenylmethylidene)octanal",
                "3-methoxy-3-methyl-2-butanone",
                "3-methyl-4-\\{[(1Z)-prop-1-en-1-yl]oxy\\}butanal",
                "4'-Methoxybenzophenone-2-carboxylic acid",
                "4,6-heptadienal",
                "BenzAldehyde, 3-ethyl-",
                "Benzyl 2,3-dimethyl-2-butenoate",
                "Cyclopentanone, 3-methyl-2-(2-pentenyl)-",
                "Decanoyl acetaldehyde",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzophenone",
                "butanone",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "cyclopentanone",
                "decanal",
                "dialdehyde",
                "formaldehyde",
                "hydroxycyclohexanone",
                "methyl ketone",
                "methyl propenyl ketone",
                "octanal",
                "oxaloacetic acid",
                "p-Ethylacetophenone",
                "pentanal",
                "phenylglyoxylic acid",
                "propanal",
                "pyruvic acid"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Formaldehyde (CH2O)",
            "Acetone (C3H6O)",
            "Acetaldehyde (C2H4O)",
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propionaldehyde (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Benzaldehyde (C7H6O)",
            "Acetophenone (C8H8O)",
            "Cyclohexanone (C6H10O)",
            "Ethyl acetate (C4H8O2)",
            "Methyl methacrylate (C5H8O2)",
            "Diethyl ketone (C5H10O)",
            "Acetylacetone (C5H8O2)",
            "Isophorone (C9H14O)",
            "Methyl ethyl ketone (C4H8O)",
            "Diacetyl (C4H6O2)",
            "Glyoxal (C2H2O2)",
            "Pyruvic acid (C3H4O3)",
            "Oxaloacetic acid (C4H4O5)",
            "Alpha-ketoglutaric acid (C5H6O5)",
            "Succinyl-CoA (C25H40N7O19P3S)",
            "Acetyl-CoA (C23H38N7O17P3S)",
            "Oxaloacetate (C4H4O5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde (CH2O)",
                "Acetone (C3H6O)",
                "Acetaldehyde (C2H4O)",
                "Acetic acid (CH3COOH)",
                "Formic acid (HCOOH)",
                "Benzaldehyde (C7H6O)",
                "Acetophenone (C8H8O)",
                "Cyclohexanone (C6H10O)",
                "Ethyl acetate (C4H8O2)",
                "Methyl methacrylate (C5H8O2)",
                "Acetylacetone (C5H8O2)",
                "Isophorone (C9H14O)",
                "Glyoxal (C2H2O2)",
                "Pyruvic acid (C3H4O3)",
                "Oxaloacetic acid (C4H4O5)",
                "Succinyl-CoA (C25H40N7O19P3S)",
                "Acetyl-CoA (C23H38N7O17P3S)"
            ],
            "mismatches": [
                "Propionaldehyde (C3H6O)",
                "Butyraldehyde (C4H8O)",
                "Diethyl ketone (C5H10O)",
                "Methyl ethyl ketone (C4H8O)",
                "Diacetyl (C4H6O2)",
                "Alpha-ketoglutaric acid (C5H6O5)",
                "Oxaloacetate (C4H4O5)"
            ],
            "true_referents": [
                "(4-Methylphenyl)acetaldehyde",
                "(4-hydroxyphenyl)acetaldehyde",
                "(E)-2-Butenyl butyrate",
                "1,3-Diacetylpropane",
                "1-hydroxy-3-propoxyacetone",
                "5-Keto-D-gluconate",
                "Acetyl coenzyme A (Acetyl-CoA)",
                "BenzAldehyde, 3-ethyl-",
                "Ethyl isopropyl ketone",
                "Glycerol 1,2-diacetate",
                "Glycerol 1-propanoate diacetate",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "acetophenone",
                "acetophenones",
                "acetyl-CoA",
                "acetylacetonate",
                "acetylacetone",
                "aldehyde",
                "alpha-ketoglutaric acid-13C4",
                "benzaldehyde",
                "benzaldehydes",
                "cyclohexanone",
                "cyclohexanone, 4-(1,1-dimethylethyl)-",
                "dialdehyde",
                "dialkyl ketone",
                "ethyl acetate",
                "formaldehyde",
                "formic acid",
                "formic acid-d2",
                "glyoxal",
                "hydroxyacetone",
                "hydroxycyclohexanone",
                "isophorone",
                "isophorone diisocyanate",
                "ketone",
                "methyl ketone",
                "methyl methacrylate",
                "methyl propenyl ketone",
                "methylglyoxal",
                "oxaloacetic acid",
                "pyruvic acid",
                "succinyl-CoA"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (ACT)",
            "Formaldehyde (FORM)",
            "Acetaldehyde (ACTA)",
            "Benzaldehyde (BENZ)",
            "Acetophenone (ACTP)",
            "Propanone (PRON)",
            "Butanone (BUTA)",
            "Acetyl chloride (ACLR)",
            "Benzoyl chloride (BCHL)",
            "Acetic acid (ACAC)",
            "Formic acid (FOAC)",
            "Propionic acid (PRAC)",
            "Butyric acid (BUAC)",
            "Acetamide (ACAM)",
            "Formamide (FOAM)",
            "Acetanilide (ACAN)",
            "Aspirin (ASP)",
            "Ibuprofen (IBU)",
            "Naproxen (NAP)",
            "Diethyl ketone (DIEK)",
            "Methyl ethyl ketone (MEK)",
            "Cyclohexanone (CHON)",
            "Phenyl acetone (PHAC)",
            "Furfural (FURA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (ACT)",
                "Formaldehyde (FORM)",
                "Acetaldehyde (ACTA)",
                "Benzaldehyde (BENZ)",
                "Acetophenone (ACTP)",
                "Butanone (BUTA)",
                "Acetyl chloride (ACLR)",
                "Benzoyl chloride (BCHL)",
                "Acetic acid (ACAC)",
                "Formic acid (FOAC)",
                "Propionic acid (PRAC)",
                "Butyric acid (BUAC)",
                "Formamide (FOAM)",
                "Ibuprofen (IBU)",
                "Naproxen (NAP)",
                "Cyclohexanone (CHON)",
                "Furfural (FURA)"
            ],
            "mismatches": [
                "Propanone (PRON)",
                "Acetamide (ACAM)",
                "Acetanilide (ACAN)",
                "Aspirin (ASP)",
                "Diethyl ketone (DIEK)",
                "Methyl ethyl ketone (MEK)",
                "Phenyl acetone (PHAC)"
            ],
            "true_referents": [
                "(S)-bethanechol chloride",
                "1-propanoylproline",
                "2-formylphenylformamide",
                "3-hydroxycyclohexanone",
                "4'-(chloroacetyl)acetanilide",
                "BenzAldehyde, 3-ethyl-",
                "Methyl furfuracrylate",
                "Methyl isobutyl ketone",
                "N-methylformamide",
                "acacic acid",
                "acetaldehyde",
                "acetic acid",
                "acetoacetamide",
                "acetone",
                "acetophenone",
                "acetophenones",
                "acetyl chloride",
                "acetyl-L-lysine",
                "acetylsalicylic acid",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "benzoyl chloride",
                "bethanechol chloride",
                "butanone",
                "butyl butanoate",
                "butyric acid",
                "cyclohexanone",
                "cyclohexanones",
                "dialdehyde",
                "dialkenyl ketone",
                "dialkyl ketone",
                "formaldehyde",
                "formamide",
                "formic acid",
                "formic acid-d2",
                "formic anhydride",
                "furfural",
                "ibuprofen",
                "lactaldehyde",
                "methyl ketone",
                "methyl propenyl ketone",
                "naproxen",
                "phenyl acetate",
                "phenylacetic acid",
                "propan-1-ol",
                "propanones",
                "propionic acid",
                "propionyl-CoA"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": "[\n  \"Acetone (Acetone)\", \n  \"Acetoacetate (Acetoacetate)\", \n  \"Acetyl-CoA (Acetyl-CoA)\", \n  \"Acetylcholine (Acetylcholine)\", \n  \"Acetylsalicylic acid (Aspirin)\", \n  \"Aldehydes\", \n  \"Alizarin (Alizarin)\", \n  \"Alloxan (Alloxan)\", \n  \"Aminoacetone (Aminoacetone)\", \n  \"Aminoacetonitrile (Aminoacetonitrile)\", \n  \"Aminoacetophenone (Aminoacetophenone)\", \n  \"Aminoacrolein (Aminoacrolein)\", \n  \"Aminoacrylic acid (Aminoacrylic acid)\", \n  \"Aminoacetone (Aminoacetone)\", \n  \"Aminoacetonitrile (Aminoacetonitrile)\", \n  \"Aminoacetophenone (Aminoacetophenone)\", \n  \"Aminoacrolein (Aminoacrolein)\", \n  \"Aminoacrylic acid (Aminoacrylic acid)\", \n  \"Benzoic acid (Benzoic acid)\", \n  \"Benzoin (Benzoin)\", \n  \"Benzophenone (Benzophenone)\", \n  \"Benzyl acetone (Benzyl acetone)\", \n  \"Benzylideneacetone (Benzylideneacetone)\", \n  \"Biotin (Biotin)\", \n  \"Citral (Citral)\", \n  \"Citric acid (Citric acid)\", \n  \"Coumarin (Coumarin)\", \n  \"Cyclohexanone (Cyclohexanone)\", \n  \"Cyclohexanone oxime (Cyclohexanone oxime)\", \n  \"Cyclohexenone (Cyclohexenone)\", \n  \"Cyclohexenone oxime (Cyclohexenone oxime)\", \n  \"Cycloheximide (Cycloheximide)\", \n  \"Cyclohexyl acetone (Cyclohexyl acetone)\", \n  \"Cyclohexylideneacetone (Cyclohexylideneacetone)\", \n  \"Cyclopentanone (Cyclopentanone)\", \n  \"Cyclopentenone (Cyclopentenone)\", \n  \"Cyclophosphamide (Cyclophosphamide)\", \n  \"Cyclopropanone (Cyclopropanone)\", \n  \"Cyclopropenone (Cyclopropenone)\", \n  \"Dehydroacetic acid (Dehydroacetic acid)\", \n  \"Diacetyl (Diacetyl)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone alcohol (Diacetone alcohol)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacetone)\", \n  \"Diacetone (Diacet",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            "Acetone (ACN)",
            "Acetic acid (ACOH)",
            "Benzaldehyde (BZD)",
            "Butyraldehyde (BTD)",
            "Formaldehyde (FA)",
            "Glutaraldehyde (GTA)",
            "Acetaldehyde (ACD)",
            "Propionaldehyde (PTD)",
            "Pyruvate (PYR)",
            "Oxalic acid (OX)",
            "Malonic acid (MAL)",
            "Succinic acid (SUCC)",
            "Glutaric acid (GLT)",
            "Adipic acid (ADI)",
            "Sebacic acid (SEB)",
            "Urea (UR)",
            "Biuret (BT)",
            "Thiourea (TU)",
            "Carbamate (CBM)",
            "Chloral (CHL)",
            "Chloroformate (CLF)",
            "Fluoroacetate (FAC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (ACOH)",
                "Benzaldehyde (BZD)",
                "Formaldehyde (FA)",
                "Glutaraldehyde (GTA)",
                "Acetaldehyde (ACD)",
                "Oxalic acid (OX)",
                "Malonic acid (MAL)",
                "Succinic acid (SUCC)",
                "Glutaric acid (GLT)",
                "Adipic acid (ADI)",
                "Sebacic acid (SEB)"
            ],
            "mismatches": [
                "Acetone (ACN)",
                "Butyraldehyde (BTD)",
                "Propionaldehyde (PTD)",
                "Pyruvate (PYR)",
                "Urea (UR)",
                "Biuret (BT)",
                "Thiourea (TU)",
                "Carbamate (CBM)",
                "Chloral (CHL)",
                "Chloroformate (CLF)",
                "Fluoroacetate (FAC)"
            ],
            "true_referents": [
                "(+)-ureidoglycolic acid",
                "1-[[2-(3-methylanilino)-1-oxoethyl]amino]-3-phenylthiourea",
                "1-[[oxo(3-pyridinyl)methyl]amino]-3-(4-propan-2-ylphenyl)thiourea",
                "2,3-butanediol diacetate",
                "3,4-Dichlorophenyl 2-fluorobenzoate",
                "3-(4-acetylphenyl)-1-(phenylmethyl)-1-propylthiourea",
                "Atropaldehyde",
                "BenzAldehyde, 3-ethyl-",
                "Bybetaochlamic acid",
                "Chlorthal",
                "Oxalicine A",
                "Urea glutamate",
                "acetaldehyde",
                "acetic acid",
                "acetoacetic acid",
                "acetone",
                "adipic acid",
                "aldehyde",
                "benzaldehyde",
                "benzaldehydes",
                "butyl benzoate",
                "carbamate ester",
                "dialdehyde",
                "fluoroacetic acid",
                "fluoroacetyl-CoA",
                "fluorobenzoic acid",
                "formaldehyde",
                "glutaraldehyde",
                "glutaramic acid",
                "glutaric acid",
                "lactaldehyde",
                "malonic acid",
                "methyl carbamate",
                "methyl pyruvate",
                "methyl(phenyl)malonic acid",
                "methylcarbamoylcarbamate",
                "oxalic acid",
                "p-tolualdehyde",
                "sebacic acid",
                "succinic acid",
                "tridecyl butyrate"
            ],
            "TP": 11,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "carbonyl compound",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Formaldehyde",
                "Canonical name": "(CH2O)",
                "Notes": "The simplest aldehyde"
            },
            {
                "Referent": "Acetaldehyde",
                "Canonical name": "(CH3CHO)",
                "Notes": "The second simplest aldehyde"
            },
            {
                "Referent": "Propionaldehyde",
                "Canonical name": "(CH3CH2CHO)",
                "Notes": "The third simplest aldehyde"
            },
            {
                "Referent": "Butyraldehyde",
                "Canonical name": "(CH3)2CHCHO",
                "Notes": "The fourth simplest aldehyde"
            },
            {
                "Referent": "Valeraldehyde",
                "Canonical name": "(CH3)3CHO",
                "Notes": "The fifth simplest aldehyde"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Formaldehyde",
                "Valeraldehyde"
            ],
            "mismatches": [
                "Acetaldehyde",
                "Propionaldehyde",
                "Butyraldehyde"
            ],
            "true_referents": [
                "(+)-abscisic aldehyde",
                "(S)-lactaldehyde",
                "(Z)-7-Dodecenyl butyrate",
                "Butanoic acid, 2,2-dimethyl-3-oxo-, methyl ester",
                "aldehyde",
                "beta,beta-dimethyl valeric acid",
                "dialdehyde",
                "formaldehyde",
                "valeraldehyde"
            ],
            "TP": 2,
            "FP": 3,
            "FN": 0
        }
    }
]