[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Lactic acid (C3H6O3)",
            "Citric acid (C6H8O7)",
            "Oxalic acid (C2H2O4)",
            "Malic acid (C4H6O5)",
            "Tartaric acid (C4H6O6)",
            "Benzoic acid (C7H6O2)",
            "Fumaric acid (C4H4O4)",
            "Ascorbic acid (vitamin C, C6H8O6)",
            "Glutamic acid (C5H9NO4)",
            "Glycolic acid (C2H4O3)",
            "Terephthalic acid (C8H6O4)",
            "Adipic acid (C6H10O4)",
            "Succinic acid (C4H6O4)",
            "Ethyl acetate (ester, CH3COOC2H5)",
            "Propionic acid (C3H6O2)",
            "Salicylic acid (C7H6O3)",
            "Acrylic acid (C3H4O2)",
            "Phthalic acid (C8H6O4)",
            "Maleic acid (C4H4O4)",
            "Itaconic acid (C5H6O4)",
            "Lysine (amino acid, C6H14N2O2)",
            "Pyruvic acid (C3H4O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Lactic acid (C3H6O3)",
                "Citric acid (C6H8O7)",
                "Oxalic acid (C2H2O4)",
                "Benzoic acid (C7H6O2)",
                "Ascorbic acid (vitamin C, C6H8O6)",
                "Succinic acid (C4H6O4)",
                "Salicylic acid (C7H6O3)",
                "Pyruvic acid (C3H4O3)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Malic acid (C4H6O5)",
                "Tartaric acid (C4H6O6)",
                "Fumaric acid (C4H4O4)",
                "Glutamic acid (C5H9NO4)",
                "Glycolic acid (C2H4O3)",
                "Terephthalic acid (C8H6O4)",
                "Adipic acid (C6H10O4)",
                "Ethyl acetate (ester, CH3COOC2H5)",
                "Propionic acid (C3H6O2)",
                "Acrylic acid (C3H4O2)",
                "Phthalic acid (C8H6O4)",
                "Maleic acid (C4H4O4)",
                "Itaconic acid (C5H6O4)",
                "Lysine (amino acid, C6H14N2O2)"
            ],
            "true_referents": [
                "2-Aminoadipic Acid",
                "Acetates",
                "Acetic Acid",
                "Acrylates",
                "Aminooxyacetic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Dibutyl Phthalate",
                "Diethylhexyl Phthalate",
                "Dimethyl Fumarate",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Lactic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Phthalic Acids",
                "Phthalic Anhydrides",
                "Propionates",
                "Pyruvic Acid",
                "Quinic Acid",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Salicylate",
                "Succinic Acid",
                "o-Phthalaldehyde"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Benzoic acid (C6H5COOH)",
            "Citric acid (C6H8O7)",
            "Formic acid (HCOOH)",
            "Glutaric acid (C5H8O4)",
            "Lactic acid (C3H6O3)",
            "Malic acid (C4H6O5)",
            "Oxalic acid (C2H2O4)",
            "Palmitic acid (C16H32O2)",
            "Stearic acid (C18H36O2)",
            "Tartaric acid (C4H6O6)",
            "Uric acid (C5H4N4O3)",
            "Ascorbic acid (C6H8O6)",
            "Butyric acid (C4H8O2)",
            "Fumaric acid (C4H4O4)",
            "Glycolic acid (C2H4O3)",
            "Hydrochloric acid (HCl, not a carboxylic acid, but often confused)",
            "Isocitric acid (C6H8O7)",
            "Lignoceric acid (C24H48O2)",
            "Myristic acid (C14H28O2)",
            "Nonanoic acid (C9H18O2)",
            "Pentanoic acid (C5H10O2)",
            "Propionic acid (C3H6O2)",
            "Succinic acid (C4H6O4)",
            "Valeric acid (C5H10O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Benzoic acid (C6H5COOH)",
                "Citric acid (C6H8O7)",
                "Lactic acid (C3H6O3)",
                "Oxalic acid (C2H2O4)",
                "Ascorbic acid (C6H8O6)",
                "Butyric acid (C4H8O2)",
                "Pentanoic acid (C5H10O2)",
                "Succinic acid (C4H6O4)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Glutaric acid (C5H8O4)",
                "Malic acid (C4H6O5)",
                "Palmitic acid (C16H32O2)",
                "Stearic acid (C18H36O2)",
                "Tartaric acid (C4H6O6)",
                "Uric acid (C5H4N4O3)",
                "Fumaric acid (C4H4O4)",
                "Glycolic acid (C2H4O3)",
                "Hydrochloric acid (HCl, not a carboxylic acid, but often confused)",
                "Isocitric acid (C6H8O7)",
                "Lignoceric acid (C24H48O2)",
                "Myristic acid (C14H28O2)",
                "Nonanoic acid (C9H18O2)",
                "Propionic acid (C3H6O2)",
                "Valeric acid (C5H10O2)"
            ],
            "true_referents": [
                "Acetic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Dichloroacetic Acid",
                "Dimethyl Fumarate",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Glyceric Acids",
                "Isocitrates",
                "Lactic Acid",
                "Muramic Acids",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Pentanoic Acids",
                "Pentetic Acid",
                "Propionates",
                "Pyruvic Acid",
                "Sodium Acetate",
                "Succinic Acid",
                "Trichloroacetic Acid",
                "Urocanic Acid"
            ],
            "TP": 9,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Methanoic acid (Formic acid)",
            "Ethanoic acid (Acetic acid)",
            "Propanoic acid (Propionic acid)",
            "Butanoic acid (Butyric acid)",
            "Pentanoic acid (Valeric acid)",
            "Hexanoic acid (Caproic acid)",
            "Heptanoic acid (Enanthic acid)",
            "Octanoic acid (Caprylic acid)",
            "Nonanoic acid (Pelargonic acid)",
            "Decanoic acid (Capric acid)",
            "Undecanoic acid",
            "Dodecanoic acid (Lauric acid)",
            "Tridecanoic acid",
            "Tetradecanoic acid (Myristic acid)",
            "Pentadecanoic acid",
            "Hexadecanoic acid (Palmitic acid)",
            "Heptadecanoic acid",
            "Octadecanoic acid (Stearic acid)",
            "9-Octadecanoic acid (Oleic acid)",
            "9,12-Octadecadienoic acid (Linoleic acid)",
            "9,12,15-Octadecatrienoic acid (Linolenic acid)",
            "Benzoic acid",
            "Salicylic acid",
            "Citrulline"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Acid",
                "Propionates",
                "Butyric Acid",
                "Benzoic Acid",
                "Salicylic Acid"
            ],
            "mismatches": [
                "Methanoic acid (Formic acid)",
                "Pentanoic acid (Valeric acid)",
                "Hexanoic acid (Caproic acid)",
                "Heptanoic acid (Enanthic acid)",
                "Octanoic acid (Caprylic acid)",
                "Nonanoic acid (Pelargonic acid)",
                "Decanoic acid (Capric acid)",
                "Undecanoic acid",
                "Dodecanoic acid (Lauric acid)",
                "Tridecanoic acid",
                "Tetradecanoic acid (Myristic acid)",
                "Pentadecanoic acid",
                "Hexadecanoic acid (Palmitic acid)",
                "Heptadecanoic acid",
                "Octadecanoic acid (Stearic acid)",
                "9-Octadecanoic acid (Oleic acid)",
                "9,12-Octadecadienoic acid (Linoleic acid)",
                "9,12,15-Octadecatrienoic acid (Linolenic acid)",
                "Citrulline"
            ],
            "true_referents": [
                "1-Carboxyglutamic Acid",
                "3-Hydroxybutyric Acid",
                "Acetic Acid",
                "Aconitic Acid",
                "Acrylamide",
                "Aminocaproic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Glyceric Acids",
                "Homogentisic Acid",
                "Metrizoic Acid",
                "Pentanoic Acids",
                "Pentetic Acid",
                "Peracetic Acid",
                "Propionates",
                "Quinic Acid",
                "Salicylic Acid",
                "Sodium Salicylate",
                "Succinic Acid",
                "Tricarboxylic Acids"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Benzoic acid (C6H5COOH)",
            "Citric acid (C6H8O7)",
            "Oxalic acid (C2H2O4)",
            "Lactic acid (C3H6O3)",
            "Glycolic acid (C2H4O3)",
            "Propionic acid (C3H6O2)",
            "Butyric acid (C4H8O2)",
            "Valeric acid (C5H10O2)",
            "Caproic acid (C6H12O2)",
            "Caprylic acid (C8H16O2)",
            "Capric acid (C10H20O2)",
            "Lauric acid (C12H24O2)",
            "Myristic acid (C14H28O2)",
            "Palmitic acid (C16H32O2)",
            "Stearic acid (C18H36O2)",
            "Oleic acid (C18H34O2)",
            "Linoleic acid (C18H32O2)",
            "Alpha-ketoglutaric acid (C5H6O5)",
            "Pyruvic acid (C3H4O3)",
            "Uric acid (C5H4N4O3)",
            "Ascorbic acid (C6H8O6)",
            "Taurocholic acid (C26H44NO7S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Benzoic acid (C6H5COOH)",
                "Citric acid (C6H8O7)",
                "Oxalic acid (C2H2O4)",
                "Lactic acid (C3H6O3)",
                "Butyric acid (C4H8O2)",
                "Valeric acid (C5H10O2)",
                "Caprylic acid (C8H16O2)",
                "Pyruvic acid (C3H4O3)",
                "Ascorbic acid (C6H8O6)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Glycolic acid (C2H4O3)",
                "Propionic acid (C3H6O2)",
                "Caproic acid (C6H12O2)",
                "Capric acid (C10H20O2)",
                "Lauric acid (C12H24O2)",
                "Myristic acid (C14H28O2)",
                "Palmitic acid (C16H32O2)",
                "Stearic acid (C18H36O2)",
                "Oleic acid (C18H34O2)",
                "Linoleic acid (C18H32O2)",
                "Alpha-ketoglutaric acid (C5H6O5)",
                "Uric acid (C5H4N4O3)",
                "Taurocholic acid (C26H44NO7S)"
            ],
            "true_referents": [
                "1-Carboxyglutamic Acid",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Glyceric Acids",
                "Ketoglutaric Acids",
                "Lactic Acid",
                "Mevalonic Acid",
                "Muramic Acids",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Polylactic Acid-Polyglycolic Acid Copolymer",
                "Propionates",
                "Propyl Gallate",
                "Pyruvic Acid",
                "Sodium Acetate",
                "Succinic Acid",
                "Thioctic Acid",
                "Urocanic Acid",
                "Valerates"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Propionic acid (C2H5COOH)",
            "Butyric acid (C3H7COOH)",
            "Valeric acid (C4H9COOH)",
            "Caproic acid (C5H11COOH)",
            "Caprylic acid (C7H15COOH)",
            "Capric acid (C9H19COOH)",
            "Lauric acid (C11H23COOH)",
            "Myristic acid (C13H27COOH)",
            "Palmitic acid (C15H31COOH)",
            "Stearic acid (C17H35COOH)",
            "Oleic acid (C18H34O2)",
            "Linoleic acid (C18H32O2)",
            "Arachidic acid (C19H39COOH)",
            "Behenic acid (C21H43COOH)",
            "Lignoceric acid (C23H47COOH)",
            "Cerotic acid (C25H51COOH)",
            "Malonic acid (C3H4O4)",
            "Succinic acid (C4H6O4)",
            "Glutaric acid (C5H8O4)",
            "Adipic acid (C6H10O4)",
            "Pimelic acid (C7H12O4)",
            "Suberic acid (C8H14O4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Butyric acid (C3H7COOH)",
                "Succinic acid (C4H6O4)",
                "Pimelic acid (C7H12O4)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propionic acid (C2H5COOH)",
                "Valeric acid (C4H9COOH)",
                "Caproic acid (C5H11COOH)",
                "Caprylic acid (C7H15COOH)",
                "Capric acid (C9H19COOH)",
                "Lauric acid (C11H23COOH)",
                "Myristic acid (C13H27COOH)",
                "Palmitic acid (C15H31COOH)",
                "Stearic acid (C17H35COOH)",
                "Oleic acid (C18H34O2)",
                "Linoleic acid (C18H32O2)",
                "Arachidic acid (C19H39COOH)",
                "Behenic acid (C21H43COOH)",
                "Lignoceric acid (C23H47COOH)",
                "Cerotic acid (C25H51COOH)",
                "Malonic acid (C3H4O4)",
                "Glutaric acid (C5H8O4)",
                "Adipic acid (C6H10O4)",
                "Suberic acid (C8H14O4)"
            ],
            "true_referents": [
                "2-Aminoadipic Acid",
                "Acetic Acid",
                "Aconitic Acid",
                "Aminocaproic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Edetic Acid",
                "Glucaric Acid",
                "Glyceric Acids",
                "Methylmalonic Acid",
                "Mevalonic Acid",
                "Peracetic Acid",
                "Phthalic Acids",
                "Pimelic Acids",
                "Polylactic Acid-Polyglycolic Acid Copolymer",
                "Propionates",
                "Sodium Acetate",
                "Succinic Acid",
                "Valerates"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propionic acid (C2H5COOH)",
            "Butyric acid (C3H7COOH)",
            "Valeric acid (C4H9COOH)",
            "Caproic acid (C5H11COOH)",
            "Caprylic acid (C7H15COOH)",
            "Capric acid (C9H19COOH)",
            "Lauric acid (C11H23COOH)",
            "Myristic acid (C13H27COOH)",
            "Palmitic acid (C15H31COOH)",
            "Stearic acid (C17H35COOH)",
            "Arachidic acid (C19H39COOH)",
            "Benzoic acid (C6H5COOH)",
            "Salicylic acid (C7H6O3)",
            "Citric acid (C6H8O7)",
            "Lactic acid (C3H6O3)",
            "Malic acid (C4H6O5)",
            "Tartaric acid (C4H6O6)",
            "Oxalic acid (C2H2O4)",
            "Fumaric acid (C4H4O4)",
            "Succinic acid (C4H6O4)",
            "Glutaric acid (C5H8O4)",
            "Adipic acid (C6H10O4)",
            "Ascorbic acid (C6H8O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Butyric acid (C3H7COOH)",
                "Benzoic acid (C6H5COOH)",
                "Salicylic acid (C7H6O3)",
                "Citric acid (C6H8O7)",
                "Lactic acid (C3H6O3)",
                "Oxalic acid (C2H2O4)",
                "Succinic acid (C4H6O4)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propionic acid (C2H5COOH)",
                "Valeric acid (C4H9COOH)",
                "Caproic acid (C5H11COOH)",
                "Caprylic acid (C7H15COOH)",
                "Capric acid (C9H19COOH)",
                "Lauric acid (C11H23COOH)",
                "Myristic acid (C13H27COOH)",
                "Palmitic acid (C15H31COOH)",
                "Stearic acid (C17H35COOH)",
                "Arachidic acid (C19H39COOH)",
                "Malic acid (C4H6O5)",
                "Tartaric acid (C4H6O6)",
                "Fumaric acid (C4H4O4)",
                "Glutaric acid (C5H8O4)",
                "Adipic acid (C6H10O4)",
                "Ascorbic acid (C6H8O6)"
            ],
            "true_referents": [
                "2-Aminoadipic Acid",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Dimethyl Fumarate",
                "Glucaric Acid",
                "Glyceric Acids",
                "Lactic Acid",
                "Mevalonic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Propionates",
                "Pyruvic Acid",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Salicylate",
                "Succinic Acid",
                "Valerates"
            ],
            "TP": 8,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propionic acid (C2H5COOH)",
            "Butyric acid (C3H7COOH)",
            "Valeric acid (C4H9COOH)",
            "Caproic acid (C5H11COOH)",
            "Enanthic acid (C6H13COOH)",
            "Caprylic acid (C7H15COOH)",
            "Pelargonic acid (C8H17COOH)",
            "Capric acid (C9H19COOH)",
            "Undecylic acid (C10H21COOH)",
            "Lauric acid (C12H24O2)",
            "Myristic acid (C14H28O2)",
            "Palmitic acid (C16H32O2)",
            "Stearic acid (C18H36O2)",
            "Arachidic acid (C20H40O2)",
            "Behenic acid (C22H44O2)",
            "Lignoceric acid (C24H48O2)",
            "Cerotic acid (C26H52O2)",
            "Montanic acid (C28H56O2)",
            "Melissic acid (C30H60O2)",
            "Oxalic acid (C2H2O4)",
            "Malonic acid (C3H4O4)",
            "Succinic acid (C4H6O4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Butyric acid (C3H7COOH)",
                "Oxalic acid (C2H2O4)",
                "Succinic acid (C4H6O4)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propionic acid (C2H5COOH)",
                "Valeric acid (C4H9COOH)",
                "Caproic acid (C5H11COOH)",
                "Enanthic acid (C6H13COOH)",
                "Caprylic acid (C7H15COOH)",
                "Pelargonic acid (C8H17COOH)",
                "Capric acid (C9H19COOH)",
                "Undecylic acid (C10H21COOH)",
                "Lauric acid (C12H24O2)",
                "Myristic acid (C14H28O2)",
                "Palmitic acid (C16H32O2)",
                "Stearic acid (C18H36O2)",
                "Arachidic acid (C20H40O2)",
                "Behenic acid (C22H44O2)",
                "Lignoceric acid (C24H48O2)",
                "Cerotic acid (C26H52O2)",
                "Montanic acid (C28H56O2)",
                "Melissic acid (C30H60O2)",
                "Malonic acid (C3H4O4)"
            ],
            "true_referents": [
                "2,4-Dichlorophenoxyacetic Acid",
                "3,4-Dihydroxyphenylacetic Acid",
                "Acetic Acid",
                "Aconitic Acid",
                "Aminocaproic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Glucaric Acid",
                "Glyceric Acids",
                "Methylmalonic Acid",
                "Muramic Acids",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Pentetic Acid",
                "Propionates",
                "Sodium Acetate",
                "Succinic Acid",
                "Valerates",
                "Vanillic Acid"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propionic acid (CH3CH2COOH)",
            "Butyric acid (CH3(CH2)2COOH)",
            "Valeric acid (CH3(CH2)3COOH)",
            "Caproic acid (CH3(CH2)4COOH)",
            "Enanthic acid (CH3(CH2)5COOH)",
            "Caprylic acid (CH3(CH2)6COOH)",
            "Pelargonic acid (CH3(CH2)7COOH)",
            "Capric acid (CH3(CH2)8COOH)",
            "Lauric acid (CH3(CH2)10COOH)",
            "Myristic acid (CH3(CH2)12COOH)",
            "Palmitic acid (CH3(CH2)14COOH)",
            "Stearic acid (CH3(CH2)16COOH)",
            "Arachidic acid (CH3(CH2)18COOH)",
            "Behenic acid (CH3(CH2)20COOH)",
            "Lignoceric acid (CH3(CH2)22COOH)",
            "Oleic acid (CH3(CH2)7CH=CH(CH2)7COOH)",
            "Linoleic acid (CH3(CH2)4CH=CHCH2CH=CH(CH2)7COOH)",
            "Linolenic acid (CH3CH2CH=CHCH2CH=CHCH2CH=CH(CH2)7COOH)",
            "Arachidonic acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
            "Docosahexaenoic acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
            "Terephthalic acid (C6H4(COOH)2)",
            "Phthalic acid (C6H4(COOH)2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Butyric acid (CH3(CH2)2COOH)",
                "Valeric acid (CH3(CH2)3COOH)",
                "Phthalic acid (C6H4(COOH)2)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propionic acid (CH3CH2COOH)",
                "Caproic acid (CH3(CH2)4COOH)",
                "Enanthic acid (CH3(CH2)5COOH)",
                "Caprylic acid (CH3(CH2)6COOH)",
                "Pelargonic acid (CH3(CH2)7COOH)",
                "Capric acid (CH3(CH2)8COOH)",
                "Lauric acid (CH3(CH2)10COOH)",
                "Myristic acid (CH3(CH2)12COOH)",
                "Palmitic acid (CH3(CH2)14COOH)",
                "Stearic acid (CH3(CH2)16COOH)",
                "Arachidic acid (CH3(CH2)18COOH)",
                "Behenic acid (CH3(CH2)20COOH)",
                "Lignoceric acid (CH3(CH2)22COOH)",
                "Oleic acid (CH3(CH2)7CH=CH(CH2)7COOH)",
                "Linoleic acid (CH3(CH2)4CH=CHCH2CH=CH(CH2)7COOH)",
                "Linolenic acid (CH3CH2CH=CHCH2CH=CHCH2CH=CH(CH2)7COOH)",
                "Arachidonic acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
                "Docosahexaenoic acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
                "Terephthalic acid (C6H4(COOH)2)"
            ],
            "true_referents": [
                "2,4,5-Trichlorophenoxyacetic Acid",
                "3,4-Dihydroxyphenylacetic Acid",
                "3-Hydroxybutyric Acid",
                "Acetates",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Butyrates",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Diethylhexyl Phthalate",
                "Esters",
                "Glyceric Acids",
                "Mevalonic Acid",
                "Pentetic Acid",
                "Peracetic Acid",
                "Phthalic Acids",
                "Phthalic Anhydrides",
                "Propionates",
                "Sodium Acetate",
                "Succinic Acid",
                "Valerates",
                "o-Phthalaldehyde"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH\u2083COOH)",
            "Formic acid (HCOOH)",
            "Benzoic acid (C\u2086H\u2085COOH)",
            "Citric acid (C\u2086H\u2088O\u2087)",
            "Lactic acid (C\u2083H\u2086O\u2083)",
            "Oxalic acid (C\u2082H\u2082O\u2084)",
            "Tartaric acid (C\u2084H\u2086O\u2086)",
            "Malic acid (C\u2084H\u2086O\u2085)",
            "Succinic acid (C\u2084H\u2086O\u2084)",
            "Butyric acid (C\u2083H\u2087COOH)",
            "Propionic acid (C\u2082H\u2085COOH)",
            "Palmitic acid (C\u2081\u2086H\u2083\u2082O\u2082)",
            "Stearic acid (C\u2081\u2088H\u2083\u2086O\u2082)",
            "Oleic acid (C\u2081\u2087H\u2083\u2083COOH)",
            "Ascorbic acid (C\u2086H\u2088O\u2086)",
            "Phthalic acid (C\u2086H\u2084(CO)\u2082OH)",
            "Adipic acid (C\u2086H\u2081\u2080O\u2084)",
            "Fumaric acid (C\u2084H\u2084O\u2084)",
            "Maleic acid (C\u2084H\u2084O\u2084)",
            "Caproic acid (C\u2085H\u2081\u2081COOH)",
            "Heptanoic acid (C\u2087H\u2081\u2085COOH)",
            "Nonanoic acid (C\u2089H\u2081\u2089COOH)",
            "Decanoic acid (C\u2081\u2080H\u2081\u2089COOH)",
            "Eicosanoic acid (C\u2082\u2080H\u2083\u2089COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH\u2083COOH)",
                "Benzoic acid (C\u2086H\u2085COOH)",
                "Citric acid (C\u2086H\u2088O\u2087)",
                "Lactic acid (C\u2083H\u2086O\u2083)",
                "Oxalic acid (C\u2082H\u2082O\u2084)",
                "Succinic acid (C\u2084H\u2086O\u2084)",
                "Butyric acid (C\u2083H\u2087COOH)",
                "Ascorbic acid (C\u2086H\u2088O\u2086)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Tartaric acid (C\u2084H\u2086O\u2086)",
                "Malic acid (C\u2084H\u2086O\u2085)",
                "Propionic acid (C\u2082H\u2085COOH)",
                "Palmitic acid (C\u2081\u2086H\u2083\u2082O\u2082)",
                "Stearic acid (C\u2081\u2088H\u2083\u2086O\u2082)",
                "Oleic acid (C\u2081\u2087H\u2083\u2083COOH)",
                "Phthalic acid (C\u2086H\u2084(CO)\u2082OH)",
                "Adipic acid (C\u2086H\u2081\u2080O\u2084)",
                "Fumaric acid (C\u2084H\u2084O\u2084)",
                "Maleic acid (C\u2084H\u2084O\u2084)",
                "Caproic acid (C\u2085H\u2081\u2081COOH)",
                "Heptanoic acid (C\u2087H\u2081\u2085COOH)",
                "Nonanoic acid (C\u2089H\u2081\u2089COOH)",
                "Decanoic acid (C\u2081\u2080H\u2081\u2089COOH)",
                "Eicosanoic acid (C\u2082\u2080H\u2083\u2089COOH)"
            ],
            "true_referents": [
                "3-Hydroxybutyric Acid",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Dibutyl Phthalate",
                "Dimethyl Fumarate",
                "Fumarates",
                "Glucaric Acid",
                "Glyceric Acids",
                "Lactic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Peracetic Acid",
                "Phthalic Acids",
                "Propionates",
                "Pyruvic Acid",
                "Quinic Acid",
                "Sodium Acetate",
                "Succinic Acid",
                "Tricarboxylic Acids",
                "o-Phthalaldehyde"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propionic acid (C2H5COOH)",
            "Butyric acid (C3H7COOH)",
            "Valeric acid (C4H9COOH)",
            "Caproic acid (C5H11COOH)",
            "Benzoic acid (C6H5COOH)",
            "Oxalic acid (HOOCCOOH)",
            "Lactic acid (CH3CH(OH)COOH)",
            "Citric acid (C6H8O7)",
            "Malic acid (C4H6O5)",
            "Tartaric acid (C4H6O6)",
            "Salicylic acid (C7H6O3)",
            "Palmitic acid (C15H31COOH)",
            "Stearic acid (C17H35COOH)",
            "Oleic acid (C17H33COOH)",
            "Linoleic acid (C17H31COOH)",
            "Arachidonic acid (C19H31COOH)",
            "Docosahexaenoic acid (DHA)",
            "Eicosapentaenoic acid (EPA)",
            "Ascorbic acid (Vitamin C)",
            "Folic acid (Vitamin B9)",
            "Retinoic acid (Vitamin A acid)",
            "Acetylsalicylic acid (Aspirin)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Butyric acid (C3H7COOH)",
                "Benzoic acid (C6H5COOH)",
                "Oxalic acid (HOOCCOOH)",
                "Lactic acid (CH3CH(OH)COOH)",
                "Citric acid (C6H8O7)",
                "Salicylic acid (C7H6O3)",
                "Ascorbic acid (Vitamin C)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propionic acid (C2H5COOH)",
                "Valeric acid (C4H9COOH)",
                "Caproic acid (C5H11COOH)",
                "Malic acid (C4H6O5)",
                "Tartaric acid (C4H6O6)",
                "Palmitic acid (C15H31COOH)",
                "Stearic acid (C17H35COOH)",
                "Oleic acid (C17H33COOH)",
                "Linoleic acid (C17H31COOH)",
                "Arachidonic acid (C19H31COOH)",
                "Docosahexaenoic acid (DHA)",
                "Eicosapentaenoic acid (EPA)",
                "Folic acid (Vitamin B9)",
                "Retinoic acid (Vitamin A acid)",
                "Acetylsalicylic acid (Aspirin)"
            ],
            "true_referents": [
                "3-Hydroxybutyric Acid",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Glucaric Acid",
                "Glyceric Acids",
                "Lactates",
                "Lactic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Propionates",
                "Retinyl Esters",
                "Salicylates",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Lactate",
                "Sodium Salicylate",
                "Succinic Acid",
                "Valerates"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Citric acid (C6H8O7)",
            "Lactic acid (C3H6O3)",
            "Benzoic acid (C6H5COOH)",
            "Oxalic acid (H2C2O4)",
            "Succinic acid (C4H6O4)",
            "Fumaric acid (C4H4O4)",
            "Malic acid (C4H6O5)",
            "Tartaric acid (C4H6O6)",
            "Salicylic acid (C7H6O3)",
            "Acetylsalicylic acid (Aspirin, C9H8O4)",
            "Palmitic acid (C16H32O2)",
            "Stearic acid (C18H36O2)",
            "Oleic acid (C18H34O2)",
            "Linoleic acid (C18H32O2)",
            "\u03b1-Linolenic acid (C18H30O2)",
            "Arachidonic acid (C20H32O2)",
            "Eicosapentaenoic acid (EPA, C20H30O2)",
            "Docosahexaenoic acid (DHA, C22H32O2)",
            "Valproic acid (C8H16O2)",
            "Retinoic acid (C20H28O2)",
            "Indole-3-acetic acid (IAA, C10H9NO2)",
            "Gibberellic acid (GA3, C19H22O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Citric acid (C6H8O7)",
                "Lactic acid (C3H6O3)",
                "Benzoic acid (C6H5COOH)",
                "Oxalic acid (H2C2O4)",
                "Succinic acid (C4H6O4)",
                "Salicylic acid (C7H6O3)",
                "Valproic acid (C8H16O2)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Fumaric acid (C4H4O4)",
                "Malic acid (C4H6O5)",
                "Tartaric acid (C4H6O6)",
                "Acetylsalicylic acid (Aspirin, C9H8O4)",
                "Palmitic acid (C16H32O2)",
                "Stearic acid (C18H36O2)",
                "Oleic acid (C18H34O2)",
                "Linoleic acid (C18H32O2)",
                "\u03b1-Linolenic acid (C18H30O2)",
                "Arachidonic acid (C20H32O2)",
                "Eicosapentaenoic acid (EPA, C20H30O2)",
                "Docosahexaenoic acid (DHA, C22H32O2)",
                "Retinoic acid (C20H28O2)",
                "Indole-3-acetic acid (IAA, C10H9NO2)",
                "Gibberellic acid (GA3, C19H22O6)"
            ],
            "true_referents": [
                "2,4,5-Trichlorophenoxyacetic Acid",
                "3,4-Dihydroxyphenylacetic Acid",
                "3-Hydroxybutyric Acid",
                "Abscisic Acid",
                "Acetic Acid",
                "Aurintricarboxylic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Citric Acid",
                "Diethylhexyl Phthalate",
                "Dimethyl Fumarate",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Glyceric Acids",
                "Lactic Acid",
                "Meprobamate",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Polylactic Acid-Polyglycolic Acid Copolymer",
                "Propionates",
                "Pyruvic Acid",
                "Retinyl Esters",
                "Salicylates",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Salicylate",
                "Succinic Acid",
                "Valproic Acid",
                "Vigabatrin"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Propanoic acid (CH3CH2COOH)",
            "Butanoic acid (CH3(CH2)2COOH)",
            "Benzoic acid (C6H5COOH)",
            "Citric acid (C6H8O7)",
            "Oxalic acid (C2H2O4)",
            "Lactic acid (CH3CH(OH)COOH)",
            "Malic acid (C4H6O5)",
            "Tartaric acid (C4H6O6)",
            "Succinic acid (C4H6O4)",
            "Fumaric acid (C4H4O4)",
            "Maleic acid (C4H4O4)",
            "Glutaric acid (C5H8O4)",
            "Adipic acid (C6H10O4)",
            "Pimelic acid (C7H12O4)",
            "Stearic acid (CH3(CH2)16COOH)",
            "Palmitic acid (CH3(CH2)14COOH)",
            "Oleic acid (CH3(CH2)7CH=CH(CH2)7COOH)",
            "Linoleic acid (C18H32O2)",
            "Arachidonic acid (C20H32O2)",
            "Salicylic acid (C7H6O3)",
            "Acetylsalicylic acid (C9H8O4)",
            "Valproic acid (C8H16O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Benzoic acid (C6H5COOH)",
                "Citric acid (C6H8O7)",
                "Oxalic acid (C2H2O4)",
                "Lactic acid (CH3CH(OH)COOH)",
                "Succinic acid (C4H6O4)",
                "Pimelic acid (C7H12O4)",
                "Salicylic acid (C7H6O3)",
                "Valproic acid (C8H16O2)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Propanoic acid (CH3CH2COOH)",
                "Butanoic acid (CH3(CH2)2COOH)",
                "Malic acid (C4H6O5)",
                "Tartaric acid (C4H6O6)",
                "Fumaric acid (C4H4O4)",
                "Maleic acid (C4H4O4)",
                "Glutaric acid (C5H8O4)",
                "Adipic acid (C6H10O4)",
                "Stearic acid (CH3(CH2)16COOH)",
                "Palmitic acid (CH3(CH2)14COOH)",
                "Oleic acid (CH3(CH2)7CH=CH(CH2)7COOH)",
                "Linoleic acid (C18H32O2)",
                "Arachidonic acid (C20H32O2)",
                "Acetylsalicylic acid (C9H8O4)"
            ],
            "true_referents": [
                "2-Aminoadipic Acid",
                "3-Hydroxybutyric Acid",
                "Acetates",
                "Acetic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Citric Acid",
                "Dimethyl Fumarate",
                "Glucaric Acid",
                "Lactates",
                "Lactic Acid",
                "Meprobamate",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Phthalic Acids",
                "Pimelic Acids",
                "Polylactic Acid-Polyglycolic Acid Copolymer",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Lactate",
                "Sodium Salicylate",
                "Succinic Acid",
                "Valproic Acid",
                "Vigabatrin"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic Acid (CH3COOH)",
            "Formic Acid (HCOOH)",
            "Benzoic Acid (C6H5COOH)",
            "Lactic Acid (CH3CHOHCOOH)",
            "Oxalic Acid (HOOCCOOH)",
            "Citric Acid (C6H8O7)",
            "Salicylic Acid (C6H4(OH)COOH)",
            "Butyric Acid (CH3CH2CH2COOH)",
            "Valeric Acid (CH3CH2CH2CH2COOH)",
            "Succinic Acid (HOOCCH2CH2COOH)",
            "Glutaric Acid (HOOCCH2CH2CH2COOH)",
            "Adipic Acid (HOOC(CH2)4COOH)",
            "Maleic Acid (HOOCCH=CHCOOH)",
            "Fumaric Acid (HOOC-CH=CH-COOH)",
            "Malic Acid (HOOCCH2CHOHCOOH)",
            "Tartaric Acid (HOOCCHOHCHOHCOOH)",
            "Pyruvic Acid (CH3COCOOH)",
            "Ascorbic Acid (C6H8O6)",
            "Stearic Acid (CH3(CH2)16COOH)",
            "Oleic Acid (CH3(CH2)7CH=CH(CH2)7COOH)",
            "Linoleic Acid (CH3(CH2)4CH=CHCH2CH=CH(CH2)7COOH)",
            "Linolenic Acid (CH3CH2CH=CHCH2CH=CHCH2CH=CH(CH2)7COOH)",
            "Arachidonic Acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
            "Docosahexaenoic Acid (CH3CH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)2COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic Acid (CH3COOH)",
                "Benzoic Acid (C6H5COOH)",
                "Lactic Acid (CH3CHOHCOOH)",
                "Oxalic Acid (HOOCCOOH)",
                "Citric Acid (C6H8O7)",
                "Salicylic Acid (C6H4(OH)COOH)",
                "Butyric Acid (CH3CH2CH2COOH)",
                "Valeric Acid (CH3CH2CH2CH2COOH)",
                "Succinic Acid (HOOCCH2CH2COOH)",
                "Pyruvic Acid (CH3COCOOH)",
                "Ascorbic Acid (C6H8O6)"
            ],
            "mismatches": [
                "Formic Acid (HCOOH)",
                "Glutaric Acid (HOOCCH2CH2CH2COOH)",
                "Adipic Acid (HOOC(CH2)4COOH)",
                "Maleic Acid (HOOCCH=CHCOOH)",
                "Fumaric Acid (HOOC-CH=CH-COOH)",
                "Malic Acid (HOOCCH2CHOHCOOH)",
                "Tartaric Acid (HOOCCHOHCHOHCOOH)",
                "Stearic Acid (CH3(CH2)16COOH)",
                "Oleic Acid (CH3(CH2)7CH=CH(CH2)7COOH)",
                "Linoleic Acid (CH3(CH2)4CH=CHCH2CH=CH(CH2)7COOH)",
                "Linolenic Acid (CH3CH2CH=CHCH2CH=CHCH2CH=CH(CH2)7COOH)",
                "Arachidonic Acid (CH3(CH2)4CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)3COOH)",
                "Docosahexaenoic Acid (CH3CH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CHCH2CH=CH(CH2)2COOH)"
            ],
            "true_referents": [
                "3-Hydroxybutyric Acid",
                "Acetates",
                "Acetic Acid",
                "Ascorbic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Citric Acid",
                "Dehydroascorbic Acid",
                "Dimethyl Fumarate",
                "Esters",
                "Glucaric Acid",
                "Lactates",
                "Lactic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Pyruvic Acid",
                "Salicylic Acid",
                "Sodium Acetate",
                "Sodium Lactate",
                "Sodium Salicylate",
                "Succinic Acid",
                "Valerates"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH) (AcA)",
            "Benzoic acid (C6H5COOH) (BzA)",
            "Formic acid (HCOOH) (FA)",
            "Oxalic acid (C2H2O4) (OA)",
            "Malonic acid (C3H4O4) (MA)",
            "Malic acid (C4H6O5) (MLA)",
            "Succinic acid (C4H6O4) (SA)",
            "Fumaric acid (C4H4O4) (FA)",
            "Maleic acid (C4H4O4) (MLA)",
            "Glutaric acid (C5H8O4) (GA)",
            "Adipic acid (C6H10O4) (ADA)",
            "Sebacic acid (C10H16O4) (SA)",
            "Undecanoic acid (C11H22O2) (UDA)",
            "Lauroyl acid (C12H24O2) (LA)",
            "Oleic acid (C18H34O2) (OA)",
            "Linoleic acid (C18H32O2) (LNA)",
            "Linolenic acid (C18H30O2) (LNA)",
            "Stearic acid (C18H36O2) (SA)",
            "Palmitic acid (C16H32O2) (PA)",
            "Capric acid (C10H20O2) (CA)",
            "Caprylic acid (C8H16O2) (CA)",
            "Caproic acid (C6H12O2) (CA)",
            "Gluconic acid (C6H12O7) (GA)",
            "Tartronic acid (C4H6O6) (TA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH) (AcA)",
                "Benzoic acid (C6H5COOH) (BzA)",
                "Oxalic acid (C2H2O4) (OA)",
                "Succinic acid (C4H6O4) (SA)"
            ],
            "mismatches": [
                "Formic acid (HCOOH) (FA)",
                "Malonic acid (C3H4O4) (MA)",
                "Malic acid (C4H6O5) (MLA)",
                "Fumaric acid (C4H4O4) (FA)",
                "Maleic acid (C4H4O4) (MLA)",
                "Glutaric acid (C5H8O4) (GA)",
                "Adipic acid (C6H10O4) (ADA)",
                "Sebacic acid (C10H16O4) (SA)",
                "Undecanoic acid (C11H22O2) (UDA)",
                "Lauroyl acid (C12H24O2) (LA)",
                "Oleic acid (C18H34O2) (OA)",
                "Linoleic acid (C18H32O2) (LNA)",
                "Linolenic acid (C18H30O2) (LNA)",
                "Stearic acid (C18H36O2) (SA)",
                "Palmitic acid (C16H32O2) (PA)",
                "Capric acid (C10H20O2) (CA)",
                "Caprylic acid (C8H16O2) (CA)",
                "Caproic acid (C6H12O2) (CA)",
                "Gluconic acid (C6H12O7) (GA)",
                "Tartronic acid (C4H6O6) (TA)"
            ],
            "true_referents": [
                "2-Aminoadipic Acid",
                "3,4-Dihydroxyphenylacetic Acid",
                "3-Hydroxybutyric Acid",
                "4-Aminobenzoic Acid",
                "Acetic Acid",
                "Aminocaproic Acid",
                "Aurintricarboxylic Acid",
                "Benzoic Acid",
                "Butyric Acid",
                "Calcium Citrate",
                "Caprylates",
                "Citric Acid",
                "Dicarboxylic Acids",
                "Dimethyl Fumarate",
                "Fumarates",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Glyceric Acids",
                "Methylmalonic Acid",
                "Metrizoic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Pentetic Acid",
                "Polylactic Acid-Polyglycolic Acid Copolymer",
                "Sodium Acetate",
                "Succinates",
                "Succinic Acid"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Oxalic acid (C2H2O4)",
            "Citric acid (C6H8O7)",
            "Tartaric acid (C4H6O6)",
            "Malic acid (C4H6O5)",
            "Fumaric acid (C4H4O4)",
            "Succinic acid (C4H6O4)",
            "Glutaric acid (C5H8O4)",
            "Adipic acid (C6H10O4)",
            "Pimelic acid (C7H12O4)",
            "Salicylic acid (C7H6O3)",
            "Benzoic acid (C7H6O2)",
            "Hexylresorcinic acid (C12H18O3)",
            "Ursolic acid (C30H48O3)",
            "Cholic acid (C24H40O5)",
            "Deoxycholic acid (C24H40O4)",
            "Chenodeoxycholic acid (C24H40O4)",
            "Lithocholic acid (C24H40O3)",
            "Hyodeoxycholic acid (C24H42O4)",
            "Hyocholic acid (C24H42O5)",
            "Taurocholic acid (C26H45NO7S)",
            "Glycocholic acid (C26H43NO8S)",
            "Taurolithocholic acid (C26H45NO7S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Oxalic acid (C2H2O4)",
                "Citric acid (C6H8O7)",
                "Succinic acid (C4H6O4)",
                "Pimelic acid (C7H12O4)",
                "Salicylic acid (C7H6O3)",
                "Benzoic acid (C7H6O2)"
            ],
            "mismatches": [
                "Formic acid (HCOOH)",
                "Tartaric acid (C4H6O6)",
                "Malic acid (C4H6O5)",
                "Fumaric acid (C4H4O4)",
                "Glutaric acid (C5H8O4)",
                "Adipic acid (C6H10O4)",
                "Hexylresorcinic acid (C12H18O3)",
                "Ursolic acid (C30H48O3)",
                "Cholic acid (C24H40O5)",
                "Deoxycholic acid (C24H40O4)",
                "Chenodeoxycholic acid (C24H40O4)",
                "Lithocholic acid (C24H40O3)",
                "Hyodeoxycholic acid (C24H42O4)",
                "Hyocholic acid (C24H42O5)",
                "Taurocholic acid (C26H45NO7S)",
                "Glycocholic acid (C26H43NO8S)",
                "Taurolithocholic acid (C26H45NO7S)"
            ],
            "true_referents": [
                "2,4,5-Trichlorophenoxyacetic Acid",
                "2,4-Dichlorophenoxyacetic Acid",
                "2-Aminoadipic Acid",
                "3,4-Dihydroxyphenylacetic Acid",
                "Acetic Acid",
                "Benzoic Acid",
                "Chorismic Acid",
                "Citric Acid",
                "Dimethyl Fumarate",
                "Dioctyl Sulfosuccinic Acid",
                "Glucaric Acid",
                "Glucuronic Acid",
                "Hexuronic Acids",
                "Mevalonic Acid",
                "Oxalic Acid",
                "Oxaloacetic Acid",
                "Phthalic Acids",
                "Pimelic Acids",
                "Propyl Gallate",
                "Quinic Acid",
                "Salicylic Acid",
                "Sodium Salicylate",
                "Succinic Acid",
                "Thioctic Acid"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "carboxylic acid",
        "domain": "chemistry",
        "response": [
            "Acetic acid",
            "Benzoic acid",
            "Butanoic acid",
            "Caproic acid",
            "Caprylic acid",
            "Cinnamic acid",
            "Citric acid",
            "Cyclohexanecarboxylic acid",
            "Decanoic acid",
            "Dodecanoic acid",
            "Diethylmalonate",
            "Diethylsuccinate",
            "Dihydroxybenzoic acid",
            "Dimethylglycolic acid",
            "Dimethylmalonate",
            "Dimethylsuccinate",
            "Dipropyleneglycolic acid",
            "Dipropylsuccinate",
            "Ethylmalonate",
            "Ethylsuccinate",
            "Fumaric acid",
            "Glutaric acid",
            "Glycolic acid",
            "Heptanoic acid",
            "Hydroxybenzoic acid",
            "Isobutyric acid",
            "Isovaleric acid",
            "Lauric acid",
            "Lactic acid",
            "Maleic acid",
            "Malonic acid",
            "Mandelic acid",
            "Methacrylic acid",
            "Methylmalonate",
            "Methylsuccinate",
            "Myristic acid",
            "Naphtanoic acid",
            "Nonanoic acid",
            "Octanoic acid",
            "Palmitic acid",
            "Phenylacetic acid",
            "Phenylmalonic acid",
            "Phenylpropionic acid",
            "Propionic acid",
            "Pyruvic acid",
            "Salicylic acid",
            "Succinic acid",
            "Terephthalic acid",
            "Valeric acid",
            "Valproic acid",
            "Xenobiotic acid",
            "Xylic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid",
                "Benzoic acid",
                "Citric acid",
                "Cyclohexanecarboxylic acid",
                "Hydroxybenzoic acid",
                "Succinic acid"
            ],
            "mismatches": [
                "Butanoic acid",
                "Caproic acid",
                "Caprylic acid",
                "Cinnamic acid",
                "Decanoic acid",
                "Dodecanoic acid",
                "Diethylmalonate",
                "Diethylsuccinate",
                "Dihydroxybenzoic acid",
                "Dimethylglycolic acid",
                "Dimethylmalonate",
                "Dimethylsuccinate",
                "Dipropyleneglycolic acid",
                "Dipropylsuccinate",
                "Ethylmalonate",
                "Ethylsuccinate",
                "Fumaric acid",
                "Glutaric acid",
                "Glycolic acid",
                "Heptanoic acid",
                "Isobutyric acid",
                "Isovaleric acid",
                "Lauric acid",
                "Lactic acid",
                "Maleic acid",
                "Malonic acid",
                "Mandelic acid",
                "Methacrylic acid",
                "Methylmalonate",
                "Methylsuccinate",
                "Myristic acid",
                "Naphtanoic acid",
                "Nonanoic acid",
                "Octanoic acid",
                "Palmitic acid",
                "Phenylacetic acid",
                "Phenylmalonic acid",
                "Phenylpropionic acid",
                "Propionic acid",
                "Pyruvic acid",
                "Salicylic acid",
                "Terephthalic acid",
                "Valeric acid",
                "Valproic acid",
                "Xenobiotic acid",
                "Xylic acid"
            ],
            "true_referents": [
                "2,3-Diphosphoglycerate",
                "3,4-Dihydroxyphenylacetic Acid",
                "3-Hydroxybutyric Acid",
                "Acetic Acid",
                "Acrylamide",
                "Acrylates",
                "Aminocaproic Acid",
                "Benzoic Acid",
                "Butyrates",
                "Butyric Acid",
                "Caprylates",
                "Citric Acid",
                "Coumaric Acids",
                "Cyclohexanecarboxylic Acids",
                "Dicarboxylic Acids",
                "Diethyl Pyrocarbonate",
                "Dimethyl Fumarate",
                "Dimethyldithiocarbamate",
                "Dimethylformamide",
                "Dioctyl Sulfosuccinic Acid",
                "Diphosphoglyceric Acids",
                "Ethylmaleimide",
                "Glucaric Acid",
                "Glyceric Acids",
                "Hydroxybenzoates",
                "Methylmalonic Acid",
                "Phenylethylmalonamide",
                "Quinic Acid",
                "Succinates",
                "Succinic Acid",
                "Thioctic Acid"
            ],
            "TP": 6,
            "FP": 46,
            "FN": 0
        }
    }
]