[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetone (CH3COCH3)",
            "Acetaldehyde (CH3CHO)",
            "Acrolein (CH2=CH-CHO)",
            "Benzoquinone (C6H4(O=)C=O)",
            "Butanone (CH3COCH2CH3)",
            "Cinnamaldehyde (C9H8O)",
            "Cyclohexanone (C6H10O)",
            "Formaldehyde (CH2O)",
            "Glyoxal (OHCCHO)",
            "Hexanal (C6H12O)",
            "Methyl ethyl ketone (CH3COCH2CH3)",
            "Methyl isobutyl ketone (CH3COCH(CH3)CH2CH3)",
            "Methyl vinyl ketone (CH2=C(CH3)-CO-CH3)",
            "Phenylacetone (C9H10O)",
            "Pyruvic acid (CH3COCOOH)",
            "Sorbic acid (CH3CH=CHCH=CHCOOH)",
            "Valeraldehyde (C5H10O)",
            "Valeric acid (C5H10COOH)",
            "Vanillin (C8H8O3)",
            "Acetophenone (C9H10O)",
            "Acetanilide (C8H9NO)",
            "Acetamide (CH3CONH2)",
            "Acetanilide (C8H11NO)",
            "Acetic anhydride (C4H6O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (CH3COCH3)",
                "Acetaldehyde (CH3CHO)",
                "Acrolein (CH2=CH-CHO)",
                "Benzoquinone (C6H4(O=)C=O)",
                "Butanone (CH3COCH2CH3)",
                "Cinnamaldehyde (C9H8O)",
                "Cyclohexanone (C6H10O)",
                "Formaldehyde (CH2O)",
                "Glyoxal (OHCCHO)",
                "Hexanal (C6H12O)",
                "Methyl ethyl ketone (CH3COCH2CH3)",
                "Methyl isobutyl ketone (CH3COCH(CH3)CH2CH3)",
                "Methyl vinyl ketone (CH2=C(CH3)-CO-CH3)",
                "Phenylacetone (C9H10O)",
                "Pyruvic acid (CH3COCOOH)",
                "Sorbic acid (CH3CH=CHCH=CHCOOH)",
                "Valeraldehyde (C5H10O)",
                "Valeric acid (C5H10COOH)",
                "Vanillin (C8H8O3)",
                "Acetophenone (C9H10O)",
                "Acetanilide (C8H9NO)",
                "Acetamide (CH3CONH2)",
                "Acetanilide (C8H11NO)",
                "Acetic anhydride (C4H6O3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde (CH3CHO)",
                "Acetamide (CH3CONH2)",
                "Acetanilide (C8H11NO)",
                "Acetanilide (C8H9NO)",
                "Acetic anhydride (C4H6O3)",
                "Acetone (CH3COCH3)",
                "Acetophenone (C9H10O)",
                "Acrolein (CH2=CH-CHO)",
                "Benzoquinone (C6H4(O=)C=O)",
                "Butanone (CH3COCH2CH3)",
                "Cinnamaldehyde (C9H8O)",
                "Cyclohexanone (C6H10O)",
                "Formaldehyde (CH2O)",
                "Glyoxal (OHCCHO)",
                "Hexanal (C6H12O)",
                "Methyl ethyl ketone (CH3COCH2CH3)",
                "Methyl isobutyl ketone (CH3COCH(CH3)CH2CH3)",
                "Methyl vinyl ketone (CH2=C(CH3)-CO-CH3)",
                "Phenylacetone (C9H10O)",
                "Pyruvic acid (CH3COCOOH)",
                "Sorbic acid (CH3CH=CHCH=CHCOOH)",
                "Valeraldehyde (C5H10O)",
                "Valeric acid (C5H10COOH)",
                "Vanillin (C8H8O3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic anhydride (Ac2O)",
            "Benzoic anhydride (Bz2O)",
            "Phthalic anhydride (C6H4(CO)2O)",
            "Maleic anhydride (C4H2O3)",
            "Succinic anhydride (C4H4O3)",
            "Glutaric anhydride (C5H6O3)",
            "Pimelic anhydride (C7H12O3)",
            "Adipic anhydride (C6H8O3)",
            "Suberic anhydride (C8H12O3)",
            "Sebacic anhydride (C10H16O3)",
            "Azelaic anhydride (C9H14O3)",
            "Behenic anhydride (C32H62O3)",
            "Oxalic anhydride (C2O3)",
            "Malonic anhydride (C3H2O3)",
            "Succinimide (C4H5NO2)",
            "Phthalimide (C8H5NO2)",
            "Maleimide (C4H3NO2)",
            "Acetyl chloride (CH3COCl)",
            "Benzenesulfonyl chloride (C6H5SO2Cl)",
            "Acetyl fluoride (CH3COF)",
            "Acetyl bromide (CH3COBr)",
            "Acetyl iodide (CH3COI)",
            "Acetic chloride (CH3COCl)",
            "Trichloroacetic anhydride (CCl3C(O)OC(O)CCl3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic anhydride (Ac2O)",
                "Benzoic anhydride (Bz2O)",
                "Phthalic anhydride (C6H4(CO)2O)",
                "Maleic anhydride (C4H2O3)",
                "Succinic anhydride (C4H4O3)",
                "Glutaric anhydride (C5H6O3)",
                "Pimelic anhydride (C7H12O3)",
                "Adipic anhydride (C6H8O3)",
                "Suberic anhydride (C8H12O3)",
                "Sebacic anhydride (C10H16O3)",
                "Azelaic anhydride (C9H14O3)",
                "Behenic anhydride (C32H62O3)",
                "Oxalic anhydride (C2O3)",
                "Malonic anhydride (C3H2O3)",
                "Succinimide (C4H5NO2)",
                "Phthalimide (C8H5NO2)",
                "Maleimide (C4H3NO2)",
                "Acetyl chloride (CH3COCl)",
                "Benzenesulfonyl chloride (C6H5SO2Cl)",
                "Acetyl fluoride (CH3COF)",
                "Acetyl bromide (CH3COBr)",
                "Acetyl iodide (CH3COI)",
                "Acetic chloride (CH3COCl)",
                "Trichloroacetic anhydride (CCl3C(O)OC(O)CCl3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic anhydride (Ac2O)",
                "Acetic chloride (CH3COCl)",
                "Acetyl bromide (CH3COBr)",
                "Acetyl chloride (CH3COCl)",
                "Acetyl fluoride (CH3COF)",
                "Acetyl iodide (CH3COI)",
                "Adipic anhydride (C6H8O3)",
                "Azelaic anhydride (C9H14O3)",
                "Behenic anhydride (C32H62O3)",
                "Benzenesulfonyl chloride (C6H5SO2Cl)",
                "Benzoic anhydride (Bz2O)",
                "Glutaric anhydride (C5H6O3)",
                "Maleic anhydride (C4H2O3)",
                "Maleimide (C4H3NO2)",
                "Malonic anhydride (C3H2O3)",
                "Oxalic anhydride (C2O3)",
                "Phthalic anhydride (C6H4(CO)2O)",
                "Phthalimide (C8H5NO2)",
                "Pimelic anhydride (C7H12O3)",
                "Sebacic anhydride (C10H16O3)",
                "Suberic anhydride (C8H12O3)",
                "Succinic anhydride (C4H4O3)",
                "Succinimide (C4H5NO2)",
                "Trichloroacetic anhydride (CCl3C(O)OC(O)CCl3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetyl chloride (CH3COCl)",
            "Acetic anhydride (CH3CO)2O)",
            "Benzoyl chloride (C6H5COCl)",
            "Propanoyl chloride (C2H5COCl)",
            "Butanoyl chloride (C3H7COCl)",
            "Pentanoyl chloride (C4H9COCl)",
            "Hexanoyl chloride (C5H11COCl)",
            "Heptanoyl chloride (C6H13COCl)",
            "Octanoyl chloride (C7H15COCl)",
            "Nonanoyl chloride (C8H17COCl)",
            "Decanoyl chloride (C9H19COCl)",
            "Stearoyl chloride (C17H35COCl)",
            "Palmitoyl chloride (C15H31COCl)",
            "Myristoyl chloride (C13H27COCl)",
            "Lauryl chloride (C11H23COCl)",
            "Oleoyl chloride (C17H33COCl)",
            "Lauroyl chloride (C11H23COCl)",
            "Acetic anhydride (CH3CO)2O",
            "Propionic anhydride (C2H5CO)2O",
            "Butyric anhydride (C3H7CO)2O",
            "Valeric anhydride (C4H9CO)2O",
            "Isobutyric anhydride ((CH3)2CHCO)2O",
            "Isovaleric anhydride ((CH3)2CHCH2CO)2O",
            "Acetyl bromide (CH3COBr)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetyl chloride (CH3COCl)",
                "Acetic anhydride (CH3CO)2O)",
                "Benzoyl chloride (C6H5COCl)",
                "Propanoyl chloride (C2H5COCl)",
                "Butanoyl chloride (C3H7COCl)",
                "Pentanoyl chloride (C4H9COCl)",
                "Hexanoyl chloride (C5H11COCl)",
                "Heptanoyl chloride (C6H13COCl)",
                "Octanoyl chloride (C7H15COCl)",
                "Nonanoyl chloride (C8H17COCl)",
                "Decanoyl chloride (C9H19COCl)",
                "Stearoyl chloride (C17H35COCl)",
                "Palmitoyl chloride (C15H31COCl)",
                "Myristoyl chloride (C13H27COCl)",
                "Lauryl chloride (C11H23COCl)",
                "Oleoyl chloride (C17H33COCl)",
                "Lauroyl chloride (C11H23COCl)",
                "Acetic anhydride (CH3CO)2O",
                "Propionic anhydride (C2H5CO)2O",
                "Butyric anhydride (C3H7CO)2O",
                "Valeric anhydride (C4H9CO)2O",
                "Isobutyric anhydride ((CH3)2CHCO)2O",
                "Isovaleric anhydride ((CH3)2CHCH2CO)2O",
                "Acetyl bromide (CH3COBr)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic anhydride (CH3CO)2O",
                "Acetic anhydride (CH3CO)2O)",
                "Acetyl bromide (CH3COBr)",
                "Acetyl chloride (CH3COCl)",
                "Benzoyl chloride (C6H5COCl)",
                "Butanoyl chloride (C3H7COCl)",
                "Butyric anhydride (C3H7CO)2O",
                "Decanoyl chloride (C9H19COCl)",
                "Heptanoyl chloride (C6H13COCl)",
                "Hexanoyl chloride (C5H11COCl)",
                "Isobutyric anhydride ((CH3)2CHCO)2O",
                "Isovaleric anhydride ((CH3)2CHCH2CO)2O",
                "Lauroyl chloride (C11H23COCl)",
                "Lauryl chloride (C11H23COCl)",
                "Myristoyl chloride (C13H27COCl)",
                "Nonanoyl chloride (C8H17COCl)",
                "Octanoyl chloride (C7H15COCl)",
                "Oleoyl chloride (C17H33COCl)",
                "Palmitoyl chloride (C15H31COCl)",
                "Pentanoyl chloride (C4H9COCl)",
                "Propanoyl chloride (C2H5COCl)",
                "Propionic anhydride (C2H5CO)2O",
                "Stearoyl chloride (C17H35COCl)",
                "Valeric anhydride (C4H9CO)2O"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic Anhydride (Ac2O)",
            "Propionic Anhydride",
            "Butyric Anhydride",
            "Formic Anhydride",
            "Succinic Anhydride (SA)",
            "Glutaric Anhydride",
            "Adipic Anhydride",
            "Phthalic Anhydride (PA)",
            "Maleic Anhydride (MA)",
            "Fumaric Anhydride",
            "Citric Anhydride",
            "Itaconic Anhydride",
            "Trimellitic Anhydride (TMA)",
            "Pyromellitic Dianhydride (PMDA)",
            "Benzoyl Formic Anhydride",
            "Acetylsalicylic Anhydride",
            "Benzoyl Chloride",
            "Acetyl Chloride (AcCl)",
            "Propionyl Chloride",
            "Butyryl Chloride",
            "Formyl Chloride",
            "Carbonyl Chloride (Phosgene)",
            "Oxalyl Chloride",
            "Thionyl Chloride (SOCl2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Anhydride (Ac2O)",
                "Propionic Anhydride",
                "Butyric Anhydride",
                "Formic Anhydride",
                "Succinic Anhydride (SA)",
                "Glutaric Anhydride",
                "Adipic Anhydride",
                "Phthalic Anhydride (PA)",
                "Maleic Anhydride (MA)",
                "Fumaric Anhydride",
                "Citric Anhydride",
                "Itaconic Anhydride",
                "Trimellitic Anhydride (TMA)",
                "Pyromellitic Dianhydride (PMDA)",
                "Benzoyl Formic Anhydride",
                "Acetylsalicylic Anhydride",
                "Benzoyl Chloride",
                "Acetyl Chloride (AcCl)",
                "Propionyl Chloride",
                "Butyryl Chloride",
                "Formyl Chloride",
                "Carbonyl Chloride (Phosgene)",
                "Oxalyl Chloride",
                "Thionyl Chloride (SOCl2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic Anhydride (Ac2O)",
                "Acetyl Chloride (AcCl)",
                "Acetylsalicylic Anhydride",
                "Adipic Anhydride",
                "Benzoyl Chloride",
                "Benzoyl Formic Anhydride",
                "Butyric Anhydride",
                "Butyryl Chloride",
                "Carbonyl Chloride (Phosgene)",
                "Citric Anhydride",
                "Formic Anhydride",
                "Formyl Chloride",
                "Fumaric Anhydride",
                "Glutaric Anhydride",
                "Itaconic Anhydride",
                "Maleic Anhydride (MA)",
                "Oxalyl Chloride",
                "Phthalic Anhydride (PA)",
                "Propionic Anhydride",
                "Propionyl Chloride",
                "Pyromellitic Dianhydride (PMDA)",
                "Succinic Anhydride (SA)",
                "Thionyl Chloride (SOCl2)",
                "Trimellitic Anhydride (TMA)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic Anhydride (Ac2O)",
            "Benzoic Anhydride",
            "Propionic Anhydride",
            "Butyric Anhydride",
            "Valeric Anhydride",
            "Caproic Anhydride",
            "Phthalic Anhydride",
            "Succinic Anhydride",
            "Maleic Anhydride",
            "Glutaric Anhydride",
            "Adipic Anhydride",
            "Citric Anhydride",
            "Lauric Anhydride",
            "Stearic Anhydride",
            "Palmitic Anhydride",
            "Oxalic Anhydride",
            "Tartaric Anhydride",
            "Malonic Anhydride",
            "Pimelic Anhydride",
            "Suberic Anhydride",
            "Azelaic Anhydride",
            "Sebacic Anhydride",
            "Isobutyric Anhydride",
            "Trimellitic Anhydride"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Anhydride (Ac2O)",
                "Benzoic Anhydride",
                "Propionic Anhydride",
                "Butyric Anhydride",
                "Valeric Anhydride",
                "Caproic Anhydride",
                "Phthalic Anhydride",
                "Succinic Anhydride",
                "Maleic Anhydride",
                "Glutaric Anhydride",
                "Adipic Anhydride",
                "Citric Anhydride",
                "Lauric Anhydride",
                "Stearic Anhydride",
                "Palmitic Anhydride",
                "Oxalic Anhydride",
                "Tartaric Anhydride",
                "Malonic Anhydride",
                "Pimelic Anhydride",
                "Suberic Anhydride",
                "Azelaic Anhydride",
                "Sebacic Anhydride",
                "Isobutyric Anhydride",
                "Trimellitic Anhydride"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic Anhydride (Ac2O)",
                "Adipic Anhydride",
                "Azelaic Anhydride",
                "Benzoic Anhydride",
                "Butyric Anhydride",
                "Caproic Anhydride",
                "Citric Anhydride",
                "Glutaric Anhydride",
                "Isobutyric Anhydride",
                "Lauric Anhydride",
                "Maleic Anhydride",
                "Malonic Anhydride",
                "Oxalic Anhydride",
                "Palmitic Anhydride",
                "Phthalic Anhydride",
                "Pimelic Anhydride",
                "Propionic Anhydride",
                "Sebacic Anhydride",
                "Stearic Anhydride",
                "Suberic Anhydride",
                "Succinic Anhydride",
                "Tartaric Anhydride",
                "Trimellitic Anhydride",
                "Valeric Anhydride"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic anhydride (AA)",
            "Propionic anhydride (PA)",
            "Butyric anhydride (BA)",
            "Valeric anhydride (VA)",
            "Caproic anhydride (CA)",
            "Formic anhydride (FA)",
            "Succinic anhydride (SA)",
            "Glutaric anhydride (GA)",
            "Adipic anhydride (AA)",
            "Phthalic anhydride (PA)",
            "Maleic anhydride (MA)",
            "Fumaric anhydride (FA)",
            "Terephthalic anhydride (TA)",
            "Cyclopentane-1,2-dicarboxylic anhydride (CPDCA)",
            "Citric anhydride (CA)",
            "Salicylic anhydride (SA)",
            "Lactic anhydride (LA)",
            "Malonic anhydride (MA)",
            "Acrylic anhydride (AA)",
            "Stearic anhydride (SA)",
            "Benzoic anhydride (BA)",
            "Isobutyric anhydride (IA)",
            "Chloroacetic anhydride (CAA)",
            "2-Methylpropanoic anhydride (2-MPA)",
            "3-Hydroxybutyric anhydride (3-HBA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic anhydride (AA)",
                "Propionic anhydride (PA)",
                "Butyric anhydride (BA)",
                "Valeric anhydride (VA)",
                "Caproic anhydride (CA)",
                "Formic anhydride (FA)",
                "Succinic anhydride (SA)",
                "Glutaric anhydride (GA)",
                "Adipic anhydride (AA)",
                "Phthalic anhydride (PA)",
                "Maleic anhydride (MA)",
                "Fumaric anhydride (FA)",
                "Terephthalic anhydride (TA)",
                "Cyclopentane-1,2-dicarboxylic anhydride (CPDCA)",
                "Citric anhydride (CA)",
                "Salicylic anhydride (SA)",
                "Lactic anhydride (LA)",
                "Malonic anhydride (MA)",
                "Acrylic anhydride (AA)",
                "Stearic anhydride (SA)",
                "Benzoic anhydride (BA)",
                "Isobutyric anhydride (IA)",
                "Chloroacetic anhydride (CAA)",
                "2-Methylpropanoic anhydride (2-MPA)"
            ],
            "mismatches": [
                "3-Hydroxybutyric anhydride (3-HBA)"
            ],
            "true_referents": [
                "2-Methylpropanoic anhydride (2-MPA)",
                "Acetic anhydride (AA)",
                "Acrylic anhydride (AA)",
                "Adipic anhydride (AA)",
                "Benzoic anhydride (BA)",
                "Butyric anhydride (BA)",
                "Caproic anhydride (CA)",
                "Chloroacetic anhydride (CAA)",
                "Citric anhydride (CA)",
                "Cyclopentane-1,2-dicarboxylic anhydride (CPDCA)",
                "Formic anhydride (FA)",
                "Fumaric anhydride (FA)",
                "Glutaric anhydride (GA)",
                "Isobutyric anhydride (IA)",
                "Lactic anhydride (LA)",
                "Maleic anhydride (MA)",
                "Malonic anhydride (MA)",
                "Phthalic anhydride (PA)",
                "Propionic anhydride (PA)",
                "Salicylic anhydride (SA)",
                "Stearic anhydride (SA)",
                "Succinic anhydride (SA)",
                "Terephthalic anhydride (TA)",
                "Valeric anhydride (VA)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic Anhydride (AA)",
            "Phthalic Anhydride (PA)",
            "Maleic Anhydride (MA)",
            "Succinic Anhydride (SA)",
            "Acrylic Anhydride (ACA)",
            "Methacrylic Anhydride (MAA)",
            "Propionic Anhydride (PRA)",
            "Butyric Anhydride (BA)",
            "Isobutyric Anhydride (IBA)",
            "Valeric Anhydride (VA)",
            "Caproic Anhydride (CA)",
            "Heptanoic Anhydride (HA)",
            "Octanoic Anhydride (OA)",
            "Nonanoic Anhydride (NA)",
            "Decanoic Anhydride (DA)",
            "Undecanoic Anhydride (UA)",
            "Dodecanoic Anhydride (DDA)",
            "Tridecanoic Anhydride (TA)",
            "Tetradecanoic Anhydride (TTA)",
            "Pentadecanoic Anhydride (PTA)",
            "Hexadecanoic Anhydride (HDA)",
            "Heptadecanoic Anhydride (HHA)",
            "Octadecanoic Anhydride (OHA)",
            "Nonadecanoic Anhydride (NAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Anhydride (AA)",
                "Phthalic Anhydride (PA)",
                "Maleic Anhydride (MA)",
                "Succinic Anhydride (SA)",
                "Acrylic Anhydride (ACA)",
                "Methacrylic Anhydride (MAA)",
                "Propionic Anhydride (PRA)",
                "Butyric Anhydride (BA)",
                "Isobutyric Anhydride (IBA)",
                "Valeric Anhydride (VA)",
                "Caproic Anhydride (CA)",
                "Heptanoic Anhydride (HA)",
                "Octanoic Anhydride (OA)",
                "Nonanoic Anhydride (NA)",
                "Decanoic Anhydride (DA)",
                "Undecanoic Anhydride (UA)",
                "Dodecanoic Anhydride (DDA)",
                "Tridecanoic Anhydride (TA)",
                "Tetradecanoic Anhydride (TTA)",
                "Pentadecanoic Anhydride (PTA)",
                "Hexadecanoic Anhydride (HDA)",
                "Heptadecanoic Anhydride (HHA)",
                "Octadecanoic Anhydride (OHA)",
                "Nonadecanoic Anhydride (NAA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic Anhydride (AA)",
                "Acrylic Anhydride (ACA)",
                "Butyric Anhydride (BA)",
                "Caproic Anhydride (CA)",
                "Decanoic Anhydride (DA)",
                "Dodecanoic Anhydride (DDA)",
                "Heptadecanoic Anhydride (HHA)",
                "Heptanoic Anhydride (HA)",
                "Hexadecanoic Anhydride (HDA)",
                "Isobutyric Anhydride (IBA)",
                "Maleic Anhydride (MA)",
                "Methacrylic Anhydride (MAA)",
                "Nonadecanoic Anhydride (NAA)",
                "Nonanoic Anhydride (NA)",
                "Octadecanoic Anhydride (OHA)",
                "Octanoic Anhydride (OA)",
                "Pentadecanoic Anhydride (PTA)",
                "Phthalic Anhydride (PA)",
                "Propionic Anhydride (PRA)",
                "Succinic Anhydride (SA)",
                "Tetradecanoic Anhydride (TTA)",
                "Tridecanoic Anhydride (TA)",
                "Undecanoic Anhydride (UA)",
                "Valeric Anhydride (VA)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Ester (ethyl acetate)",
            "Ester (methyl butyrate)",
            "Ester (isopropyl propanoate)",
            "Ester (butyl valerate)",
            "Ester (pentyl hexanoate)",
            "Ester (hexyl heptanoate)",
            "Ester (heptyl octanoate)",
            "Ester (octyl nonanoate)",
            "Ester (nonyl decanoate)",
            "Ester (decyl undecanoate)",
            "Ester (undecyl dodecanoate)",
            "Ester (dodecyl tridecanoate)",
            "Ester (tridecyl tetradecanoate)",
            "Ester (tetradecyl pentadecanoate)",
            "Ester (pentadecyl hexadecanoate)",
            "Ester (hexadecyl heptadecanoate)",
            "Ester (heptadecyl octadecanoate)",
            "Ester (octadecyl nonadecanoate)",
            "Ester (nonadecyl eicosanoate)",
            "Ester (eicosyl heneicosanoate)",
            "Ester (heneicosyl docosanoate)",
            "Ester (docosyl tricosanoate)",
            "Ester (tricosyl tetracosanoate)",
            "Ester (tetracosyl pentacosanoate)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ester (ethyl acetate)",
                "Ester (methyl butyrate)",
                "Ester (isopropyl propanoate)",
                "Ester (butyl valerate)",
                "Ester (pentyl hexanoate)",
                "Ester (hexyl heptanoate)",
                "Ester (heptyl octanoate)",
                "Ester (octyl nonanoate)",
                "Ester (nonyl decanoate)",
                "Ester (decyl undecanoate)",
                "Ester (undecyl dodecanoate)",
                "Ester (dodecyl tridecanoate)",
                "Ester (tridecyl tetradecanoate)",
                "Ester (tetradecyl pentadecanoate)",
                "Ester (pentadecyl hexadecanoate)",
                "Ester (hexadecyl heptadecanoate)",
                "Ester (heptadecyl octadecanoate)",
                "Ester (octadecyl nonadecanoate)",
                "Ester (nonadecyl eicosanoate)",
                "Ester (eicosyl heneicosanoate)",
                "Ester (heneicosyl docosanoate)",
                "Ester (docosyl tricosanoate)",
                "Ester (tricosyl tetracosanoate)",
                "Ester (tetracosyl pentacosanoate)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ester (butyl valerate)",
                "Ester (decyl undecanoate)",
                "Ester (docosyl tricosanoate)",
                "Ester (dodecyl tridecanoate)",
                "Ester (eicosyl heneicosanoate)",
                "Ester (ethyl acetate)",
                "Ester (heneicosyl docosanoate)",
                "Ester (heptadecyl octadecanoate)",
                "Ester (heptyl octanoate)",
                "Ester (hexadecyl heptadecanoate)",
                "Ester (hexyl heptanoate)",
                "Ester (isopropyl propanoate)",
                "Ester (methyl butyrate)",
                "Ester (nonadecyl eicosanoate)",
                "Ester (nonyl decanoate)",
                "Ester (octadecyl nonadecanoate)",
                "Ester (octyl nonanoate)",
                "Ester (pentadecyl hexadecanoate)",
                "Ester (pentyl hexanoate)",
                "Ester (tetracosyl pentacosanoate)",
                "Ester (tetradecyl pentadecanoate)",
                "Ester (tricosyl tetracosanoate)",
                "Ester (tridecyl tetradecanoate)",
                "Ester (undecyl dodecanoate)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Methyl acetate (MeOAc)",
            "Ethyl acetate (EtOAc)",
            "Propyl acetate",
            "Butyl acetate",
            "Isopropyl acetate",
            "Isobutyl acetate",
            "Pentyl acetate",
            "Hexyl acetate",
            "Heptyl acetate",
            "Octyl acetate",
            "Nonyl acetate",
            "Decyl acetate",
            "Benzyl acetate",
            "Methyl benzoate",
            "Ethyl benzoate",
            "Phenethyl acetate",
            "Vinyl acetate",
            "Butyl butyrate",
            "Methyl formate",
            "Ethyl formate",
            "Methyl propionate",
            "Ethyl propionate",
            "Methyl butyrate",
            "Ethyl butyrate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methyl acetate (MeOAc)",
                "Ethyl acetate (EtOAc)",
                "Propyl acetate",
                "Butyl acetate",
                "Isopropyl acetate",
                "Isobutyl acetate",
                "Pentyl acetate",
                "Hexyl acetate",
                "Heptyl acetate",
                "Octyl acetate",
                "Nonyl acetate",
                "Decyl acetate",
                "Benzyl acetate",
                "Methyl benzoate",
                "Ethyl benzoate",
                "Phenethyl acetate",
                "Vinyl acetate",
                "Butyl butyrate",
                "Methyl formate",
                "Ethyl formate",
                "Methyl propionate",
                "Ethyl propionate",
                "Methyl butyrate",
                "Ethyl butyrate"
            ],
            "mismatches": [],
            "true_referents": [
                "Benzyl acetate",
                "Butyl acetate",
                "Butyl butyrate",
                "Decyl acetate",
                "Ethyl acetate (EtOAc)",
                "Ethyl benzoate",
                "Ethyl butyrate",
                "Ethyl formate",
                "Ethyl propionate",
                "Heptyl acetate",
                "Hexyl acetate",
                "Isobutyl acetate",
                "Isopropyl acetate",
                "Methyl acetate (MeOAc)",
                "Methyl benzoate",
                "Methyl butyrate",
                "Methyl formate",
                "Methyl propionate",
                "Nonyl acetate",
                "Octyl acetate",
                "Pentyl acetate",
                "Phenethyl acetate",
                "Propyl acetate",
                "Vinyl acetate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Ethyl acetate (EtOAc)",
            "Methyl formate",
            "Butyl butyrate",
            "Isopropyl myristate",
            "Benzyl benzoate",
            "Methyl salicylate",
            "Propyl propionate",
            "Isoamyl acetate",
            "Octyl octanoate",
            "Ethyl butyrate",
            "Methyl acetate",
            "Pentyl valerate",
            "Glyceryl triacetate (triacetin)",
            "Cellulose acetate",
            "Ethyl formate",
            "Methyl benzoate",
            "Propyl acetate",
            "Butyl acetate",
            "Ethyl propionate",
            "Isobutyl formate",
            "Hexyl hexanoate",
            "Methyl stearate",
            "Ethyl palmitate",
            "Vinyl acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl acetate (EtOAc)",
                "Methyl formate",
                "Butyl butyrate",
                "Isopropyl myristate",
                "Benzyl benzoate",
                "Methyl salicylate",
                "Propyl propionate",
                "Isoamyl acetate",
                "Octyl octanoate",
                "Ethyl butyrate",
                "Methyl acetate",
                "Pentyl valerate",
                "Glyceryl triacetate (triacetin)",
                "Cellulose acetate",
                "Ethyl formate",
                "Methyl benzoate",
                "Propyl acetate",
                "Butyl acetate",
                "Ethyl propionate",
                "Isobutyl formate",
                "Hexyl hexanoate",
                "Methyl stearate",
                "Ethyl palmitate",
                "Vinyl acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Benzyl benzoate",
                "Butyl acetate",
                "Butyl butyrate",
                "Cellulose acetate",
                "Ethyl acetate (EtOAc)",
                "Ethyl butyrate",
                "Ethyl formate",
                "Ethyl palmitate",
                "Ethyl propionate",
                "Glyceryl triacetate (triacetin)",
                "Hexyl hexanoate",
                "Isoamyl acetate",
                "Isobutyl formate",
                "Isopropyl myristate",
                "Methyl acetate",
                "Methyl benzoate",
                "Methyl formate",
                "Methyl salicylate",
                "Methyl stearate",
                "Octyl octanoate",
                "Pentyl valerate",
                "Propyl acetate",
                "Propyl propionate",
                "Vinyl acetate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic anhydride",
            "Acetyl chloride",
            "Benzoic anhydride",
            "Benzoyl chloride",
            "Butyric anhydride",
            "Maleic anhydride",
            "Phthalic anhydride",
            "Propionic anhydride",
            "Succinic anhydride",
            "Trifluoroacetic anhydride",
            "Acetic-benzoic anhydride",
            "Acetic-formic anhydride",
            "Acetic-propionic anhydride",
            "Benzoic-formic anhydride",
            "Benzoic-propionic anhydride",
            "Butyric-acetic anhydride",
            "Butyric-benzoic anhydride",
            "Butyric-formic anhydride",
            "Butyric-propionic anhydride",
            "Formic-propionic anhydride",
            "Glutaric anhydride",
            "Itaconic anhydride",
            "Methacrylic anhydride",
            "Valeric anhydride"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic anhydride",
                "Acetyl chloride",
                "Benzoic anhydride",
                "Benzoyl chloride",
                "Butyric anhydride",
                "Maleic anhydride",
                "Phthalic anhydride",
                "Propionic anhydride",
                "Succinic anhydride",
                "Trifluoroacetic anhydride",
                "Acetic-benzoic anhydride",
                "Acetic-formic anhydride",
                "Acetic-propionic anhydride",
                "Benzoic-formic anhydride",
                "Benzoic-propionic anhydride",
                "Butyric-acetic anhydride",
                "Butyric-benzoic anhydride",
                "Butyric-formic anhydride",
                "Butyric-propionic anhydride",
                "Formic-propionic anhydride",
                "Glutaric anhydride",
                "Itaconic anhydride",
                "Methacrylic anhydride",
                "Valeric anhydride"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic anhydride",
                "Acetic-benzoic anhydride",
                "Acetic-formic anhydride",
                "Acetic-propionic anhydride",
                "Acetyl chloride",
                "Benzoic anhydride",
                "Benzoic-formic anhydride",
                "Benzoic-propionic anhydride",
                "Benzoyl chloride",
                "Butyric anhydride",
                "Butyric-acetic anhydride",
                "Butyric-benzoic anhydride",
                "Butyric-formic anhydride",
                "Butyric-propionic anhydride",
                "Formic-propionic anhydride",
                "Glutaric anhydride",
                "Itaconic anhydride",
                "Maleic anhydride",
                "Methacrylic anhydride",
                "Phthalic anhydride",
                "Propionic anhydride",
                "Succinic anhydride",
                "Trifluoroacetic anhydride",
                "Valeric anhydride"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic anhydride (C4H6O3)",
            "Propionic anhydride (C6H10O3)",
            "Butyric anhydride (C8H14O3)",
            "Valeric anhydride (C10H18O3)",
            "Hexanoic anhydride (C12H22O3)",
            "Heptanoic anhydride (C14H26O3)",
            "Octanoic anhydride (C16H30O3)",
            "Nonanoic anhydride (C18H34O3)",
            "Decanoic anhydride (C20H38O3)",
            "Undecanoic anhydride (C22H42O3)",
            "Dodecanoic anhydride (C24H46O3)",
            "Tridecanoic anhydride (C26H50O3)",
            "Tetradecanoic anhydride (C28H54O3)",
            "Pentadecanoic anhydride (C30H58O3)",
            "Hexadecanoic anhydride (C32H62O3)",
            "Heptadecanoic anhydride (C34H66O3)",
            "Octadecanoic anhydride (C36H70O3)",
            "Nonadecanoic anhydride (C38H74O3)",
            "Eicosanoic anhydride (C40H78O3)",
            "Heneicosanoic anhydride (C42H82O3)",
            "Docosanoic anhydride (C44H86O3)",
            "Tricosanoic anhydride (C46H90O3)",
            "Tetracosanoic anhydride (C48H94O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic anhydride (C4H6O3)",
                "Propionic anhydride (C6H10O3)",
                "Butyric anhydride (C8H14O3)",
                "Valeric anhydride (C10H18O3)",
                "Hexanoic anhydride (C12H22O3)",
                "Heptanoic anhydride (C14H26O3)",
                "Octanoic anhydride (C16H30O3)",
                "Nonanoic anhydride (C18H34O3)",
                "Decanoic anhydride (C20H38O3)",
                "Undecanoic anhydride (C22H42O3)",
                "Dodecanoic anhydride (C24H46O3)",
                "Tridecanoic anhydride (C26H50O3)",
                "Tetradecanoic anhydride (C28H54O3)",
                "Pentadecanoic anhydride (C30H58O3)",
                "Hexadecanoic anhydride (C32H62O3)",
                "Heptadecanoic anhydride (C34H66O3)",
                "Octadecanoic anhydride (C36H70O3)",
                "Nonadecanoic anhydride (C38H74O3)",
                "Eicosanoic anhydride (C40H78O3)",
                "Heneicosanoic anhydride (C42H82O3)",
                "Docosanoic anhydride (C44H86O3)",
                "Tricosanoic anhydride (C46H90O3)",
                "Tetracosanoic anhydride (C48H94O3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic anhydride (C4H6O3)",
                "Butyric anhydride (C8H14O3)",
                "Decanoic anhydride (C20H38O3)",
                "Docosanoic anhydride (C44H86O3)",
                "Dodecanoic anhydride (C24H46O3)",
                "Eicosanoic anhydride (C40H78O3)",
                "Heneicosanoic anhydride (C42H82O3)",
                "Heptadecanoic anhydride (C34H66O3)",
                "Heptanoic anhydride (C14H26O3)",
                "Hexadecanoic anhydride (C32H62O3)",
                "Hexanoic anhydride (C12H22O3)",
                "Nonadecanoic anhydride (C38H74O3)",
                "Nonanoic anhydride (C18H34O3)",
                "Octadecanoic anhydride (C36H70O3)",
                "Octanoic anhydride (C16H30O3)",
                "Pentadecanoic anhydride (C30H58O3)",
                "Propionic anhydride (C6H10O3)",
                "Tetracosanoic anhydride (C48H94O3)",
                "Tetradecanoic anhydride (C28H54O3)",
                "Tricosanoic anhydride (C46H90O3)",
                "Tridecanoic anhydride (C26H50O3)",
                "Undecanoic anhydride (C22H42O3)",
                "Valeric anhydride (C10H18O3)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetic Acid (CH3COOH)",
            "Formic Acid (HCOOH)",
            "Propionic Acid (CH3CH2COOH)",
            "Butyric Acid (CH3CH2CH2COOH)",
            "Valeric Acid (CH3CH2CH2CH2COOH)",
            "Caproic Acid (CH3CH2CH2CH2CH2COOH)",
            "Heptanoic Acid (CH3CH2CH2CH2CH2CH2COOH)",
            "Octanoic Acid (CH3CH2CH2CH2CH2CH2CH2COOH)",
            "Nonanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Decanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Undecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Dodecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Tridecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Tetradecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Pentadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Hexadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Heptadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
            "Octadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Acid (CH3COOH)",
                "Formic Acid (HCOOH)",
                "Propionic Acid (CH3CH2COOH)",
                "Butyric Acid (CH3CH2CH2COOH)",
                "Valeric Acid (CH3CH2CH2CH2COOH)",
                "Caproic Acid (CH3CH2CH2CH2CH2COOH)",
                "Heptanoic Acid (CH3CH2CH2CH2CH2CH2COOH)",
                "Octanoic Acid (CH3CH2CH2CH2CH2CH2CH2COOH)",
                "Nonanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Decanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Undecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Dodecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Tridecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Tetradecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Pentadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Hexadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Heptadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Octadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetic Acid (CH3COOH)",
                "Butyric Acid (CH3CH2CH2COOH)",
                "Caproic Acid (CH3CH2CH2CH2CH2COOH)",
                "Decanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Dodecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Formic Acid (HCOOH)",
                "Heptadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Heptanoic Acid (CH3CH2CH2CH2CH2CH2COOH)",
                "Hexadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Nonanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Octadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Octanoic Acid (CH3CH2CH2CH2CH2CH2CH2COOH)",
                "Pentadecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Propionic Acid (CH3CH2COOH)",
                "Tetradecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Tridecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Undecanoic Acid (CH3CH2CH2CH2CH2CH2CH2CH2CH2CH2COOH)",
                "Valeric Acid (CH3CH2CH2CH2COOH)"
            ],
            "TP": 18,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Anhydride (Acetic anhydride)",
            "Anhydride (Maleic anhydride)",
            "Anhydride (Terephthalic anhydride)",
            "Anhydride (Phthalic anhydride)",
            "Anhydride (Benzoic anhydride)",
            "Anhydride (Oxalic anhydride)",
            "Anhydride (Citric anhydride)",
            "Anhydride (Lactic anhydride)",
            "Anhydride (Malic anhydride)",
            "Anhydride (Tartaric anhydride)",
            "Anhydride (Fumaric anhydride)",
            "Anhydride (Ascorbic anhydride)",
            "Anhydride (Gluconic anhydride)",
            "Anhydride (Glutaric anhydride)",
            "Anhydride (Adipic anhydride)",
            "Anhydride (Succinic anhydride)",
            "Anhydride (Malonic anhydride)",
            "Anhydride (Citramalic anhydride)",
            "Anhydride (Isocitric anhydride)",
            "Anhydride (Aconitic anhydride)",
            "Anhydride (Phenylacetic anhydride)",
            "Anhydride (Cinnamic anhydride)",
            "Anhydride (Cumaric anhydride)",
            "Anhydride (Caffeic anhydride)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Anhydride (Acetic anhydride)",
                "Anhydride (Maleic anhydride)",
                "Anhydride (Terephthalic anhydride)",
                "Anhydride (Phthalic anhydride)",
                "Anhydride (Benzoic anhydride)",
                "Anhydride (Oxalic anhydride)",
                "Anhydride (Citric anhydride)",
                "Anhydride (Lactic anhydride)",
                "Anhydride (Malic anhydride)",
                "Anhydride (Tartaric anhydride)",
                "Anhydride (Fumaric anhydride)",
                "Anhydride (Ascorbic anhydride)",
                "Anhydride (Gluconic anhydride)",
                "Anhydride (Glutaric anhydride)",
                "Anhydride (Adipic anhydride)",
                "Anhydride (Succinic anhydride)",
                "Anhydride (Malonic anhydride)",
                "Anhydride (Citramalic anhydride)",
                "Anhydride (Isocitric anhydride)",
                "Anhydride (Aconitic anhydride)",
                "Anhydride (Phenylacetic anhydride)",
                "Anhydride (Cinnamic anhydride)",
                "Anhydride (Cumaric anhydride)",
                "Anhydride (Caffeic anhydride)"
            ],
            "mismatches": [],
            "true_referents": [
                "Anhydride (Acetic anhydride)",
                "Anhydride (Aconitic anhydride)",
                "Anhydride (Adipic anhydride)",
                "Anhydride (Ascorbic anhydride)",
                "Anhydride (Benzoic anhydride)",
                "Anhydride (Caffeic anhydride)",
                "Anhydride (Cinnamic anhydride)",
                "Anhydride (Citramalic anhydride)",
                "Anhydride (Citric anhydride)",
                "Anhydride (Cumaric anhydride)",
                "Anhydride (Fumaric anhydride)",
                "Anhydride (Gluconic anhydride)",
                "Anhydride (Glutaric anhydride)",
                "Anhydride (Isocitric anhydride)",
                "Anhydride (Lactic anhydride)",
                "Anhydride (Maleic anhydride)",
                "Anhydride (Malic anhydride)",
                "Anhydride (Malonic anhydride)",
                "Anhydride (Oxalic anhydride)",
                "Anhydride (Phenylacetic anhydride)",
                "Anhydride (Phthalic anhydride)",
                "Anhydride (Succinic anhydride)",
                "Anhydride (Tartaric anhydride)",
                "Anhydride (Terephthalic anhydride)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": [
            "Acetanilide (C10H11NO)",
            "Acetophenone (C8H8O)",
            "Benzamide (C7H7NO)",
            "Benzoic anhydride (C14H10O3)",
            "Cyclohexanecarbonyl chloride (C7H11ClO)",
            "Ethyl acetate (C4H8O2)",
            "Ethyl benzoate (C9H10O2)",
            "Methyl benzoate (C8H8O2)",
            "Methyl salicylate (C8H8O3)",
            "Naphthyl acetate (C12H10O2)",
            "Phenyl acetate (C8H8O2)",
            "Phenyl benzoate (C13H10O2)",
            "Propyl benzoate (C10H12O2)",
            "Salicylic anhydride (C14H10O5)",
            "Terephthalic anhydride (C8H4O3)",
            "Toluene-2,4-diisocyanate (C9H6N2O2)",
            "Valeryl chloride (C5H9ClO)",
            "Acetyl chloride (C2H3ClO)",
            "Pivaloyl chloride (C5H9ClO)",
            "Hexanoyl chloride (C6H11ClO)",
            "Heptanoyl chloride (C7H13ClO)",
            "Octanoyl chloride (C8H15ClO)",
            "Nonanoyl chloride (C9H17ClO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetanilide (C10H11NO)",
                "Acetophenone (C8H8O)",
                "Benzamide (C7H7NO)",
                "Benzoic anhydride (C14H10O3)",
                "Cyclohexanecarbonyl chloride (C7H11ClO)",
                "Ethyl acetate (C4H8O2)",
                "Ethyl benzoate (C9H10O2)",
                "Methyl benzoate (C8H8O2)",
                "Methyl salicylate (C8H8O3)",
                "Naphthyl acetate (C12H10O2)",
                "Phenyl acetate (C8H8O2)",
                "Phenyl benzoate (C13H10O2)",
                "Propyl benzoate (C10H12O2)",
                "Salicylic anhydride (C14H10O5)",
                "Terephthalic anhydride (C8H4O3)",
                "Toluene-2,4-diisocyanate (C9H6N2O2)",
                "Valeryl chloride (C5H9ClO)",
                "Acetyl chloride (C2H3ClO)",
                "Pivaloyl chloride (C5H9ClO)",
                "Hexanoyl chloride (C6H11ClO)",
                "Heptanoyl chloride (C7H13ClO)",
                "Octanoyl chloride (C8H15ClO)",
                "Nonanoyl chloride (C9H17ClO)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetanilide (C10H11NO)",
                "Acetophenone (C8H8O)",
                "Acetyl chloride (C2H3ClO)",
                "Benzamide (C7H7NO)",
                "Benzoic anhydride (C14H10O3)",
                "Cyclohexanecarbonyl chloride (C7H11ClO)",
                "Ethyl acetate (C4H8O2)",
                "Ethyl benzoate (C9H10O2)",
                "Heptanoyl chloride (C7H13ClO)",
                "Hexanoyl chloride (C6H11ClO)",
                "Methyl benzoate (C8H8O2)",
                "Methyl salicylate (C8H8O3)",
                "Naphthyl acetate (C12H10O2)",
                "Nonanoyl chloride (C9H17ClO)",
                "Octanoyl chloride (C8H15ClO)",
                "Phenyl acetate (C8H8O2)",
                "Phenyl benzoate (C13H10O2)",
                "Pivaloyl chloride (C5H9ClO)",
                "Propyl benzoate (C10H12O2)",
                "Salicylic anhydride (C14H10O5)",
                "Terephthalic anhydride (C8H4O3)",
                "Toluene-2,4-diisocyanate (C9H6N2O2)",
                "Valeryl chloride (C5H9ClO)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "carboxylic anhydride",
        "domain": "chemistry",
        "response": {
            "1": "acetic acid",
            "2": "ethanoic acid",
            "3": "ethanoic anhydride",
            "4": "acetamide",
            "5": "acetic anhydride",
            "6": "acetyl chloride",
            "7": "acetyl bromide",
            "8": "acetyl iodide",
            "9": "acetyl nitrate",
            "10": "acetyl phosphate",
            "11": "acetyl sulfide",
            "12": "acetyl sulfate",
            "13": "acetyl thiocyanate",
            "14": "acetylurea",
            "15": "acetylurethanol",
            "16": "acetylurea",
            "17": "acetylurethanol",
            "18": "acetylurethanol",
            "19": "acetylurethanol",
            "20": "acetylurethanol",
            "21": "acetylurethanol",
            "22": "acetylurethanol",
            "23": "acetylurethanol",
            "24": "acetylurethanol"
        },
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetic acid",
                "ethanoic acid",
                "ethanoic anhydride",
                "acetamide",
                "acetic anhydride",
                "acetyl chloride",
                "acetyl bromide",
                "acetyl iodide",
                "acetyl nitrate",
                "acetyl phosphate",
                "acetyl sulfide",
                "acetyl sulfate",
                "acetyl thiocyanate",
                "acetylurea",
                "acetylurethanol"
            ],
            "mismatches": [],
            "true_referents": [
                "10: acetyl phosphate",
                "11: acetyl sulfide",
                "12: acetyl sulfate",
                "13: acetyl thiocyanate",
                "14: acetylurea",
                "15: acetylurethanol",
                "16: acetylurea",
                "17: acetylurethanol",
                "18: acetylurethanol",
                "19: acetylurethanol",
                "1: acetic acid",
                "20: acetylurethanol",
                "21: acetylurethanol",
                "22: acetylurethanol",
                "23: acetylurethanol",
                "24: acetylurethanol",
                "2: ethanoic acid",
                "3: ethanoic anhydride",
                "4: acetamide",
                "5: acetic anhydride",
                "6: acetyl chloride",
                "7: acetyl bromide",
                "8: acetyl iodide",
                "9: acetyl nitrate"
            ],
            "TP": 15,
            "FP": 0,
            "FN": 0
        }
    }
]