[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "NADH (Nicotinamide adenine dinucleotide)",
            "NADPH (Nicotinamide adenine dinucleotide phosphate)",
            "Glucose",
            "Hydrogen gas (H2)",
            "Formaldehyde (HCHO)",
            "Sodium borohydride (NaBH4)",
            "Sodium cyanoborohydride (NaBH3CN)",
            "Iron (II) sulfate (FeSO4)",
            "Sodium bisulfite (NaHSO3)",
            "Sodium dithionite (Na2S2O4)",
            "Sodium hydrosulfite (Na2S2O4)",
            "Sodium sulfite (Na2SO3)",
            "Sodium sulfide (Na2S)",
            "Sodium thiosulfate (Na2S2O3)",
            "Sodium tetrahydroborate (NaBH4)",
            "Sodium triacetoxyborohydride (NaBH(OAc)3)",
            "Sodium borohydride solution",
            "Potassium ferricyanide (K3Fe(CN)6)",
            "Potassium iodide (KI)",
            "Potassium permanganate (KMnO4) in acidic solution",
            "Hydrazine (N2H4)",
            "Tetrahydrofuran (THF)",
            "Trimethylsilyl hydride (Me3SiH)",
            "Zinc (Zn)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "Glucose",
                "Hydrogen gas (H2)",
                "Formaldehyde (HCHO)",
                "Sodium borohydride (NaBH4)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Iron (II) sulfate (FeSO4)",
                "Sodium bisulfite (NaHSO3)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sodium sulfite (Na2SO3)",
                "Sodium sulfide (Na2S)",
                "Sodium thiosulfate (Na2S2O3)",
                "Sodium tetrahydroborate (NaBH4)",
                "Sodium triacetoxyborohydride (NaBH(OAc)3)",
                "Sodium borohydride solution",
                "Potassium ferricyanide (K3Fe(CN)6)",
                "Potassium iodide (KI)",
                "Potassium permanganate (KMnO4) in acidic solution",
                "Hydrazine (N2H4)",
                "Tetrahydrofuran (THF)",
                "Trimethylsilyl hydride (Me3SiH)",
                "Zinc (Zn)"
            ],
            "mismatches": [],
            "true_referents": [
                "Formaldehyde (HCHO)",
                "Glucose",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Iron (II) sulfate (FeSO4)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "Potassium ferricyanide (K3Fe(CN)6)",
                "Potassium iodide (KI)",
                "Potassium permanganate (KMnO4) in acidic solution",
                "Sodium bisulfite (NaHSO3)",
                "Sodium borohydride (NaBH4)",
                "Sodium borohydride solution",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sodium sulfide (Na2S)",
                "Sodium sulfite (Na2SO3)",
                "Sodium tetrahydroborate (NaBH4)",
                "Sodium thiosulfate (Na2S2O3)",
                "Sodium triacetoxyborohydride (NaBH(OAc)3)",
                "Tetrahydrofuran (THF)",
                "Trimethylsilyl hydride (Me3SiH)",
                "Zinc (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Hydrogen gas (H2)",
            "Lithium aluminium hydride (LiAlH4)",
            "Sodium borohydride (NaBH4)",
            "Ascorbic acid (Vitamin C)",
            "Glutathione",
            "NADPH (Nicotinamide adenine dinucleotide phosphate)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "FADH2 (Flavin adenine dinucleotide)",
            "Carbon monoxide (CO)",
            "Formaldehyde (CH2O)",
            "Ethanol (C2H5OH)",
            "Glucose (C6H12O6)",
            "Iron(II) ions (Fe2+)",
            "Copper(I) ions (Cu+)",
            "Hydrazine (N2H4)",
            "Sulfur dioxide (SO2)",
            "Hydrogen peroxide (H2O2) in catalytic amounts",
            "Dithiothreitol (DTT)",
            "Tetrathionate (S4O6^2-)",
            "Dimethyl sulfoxide (DMSO) under certain conditions",
            "Ferrous sulfate (FeSO4)",
            "Zinc metal (Zn)",
            "Sodium hydrosulfite (Na2S2O4)",
            "Thiosulfate ions (S2O3^2-)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen gas (H2)",
                "Lithium aluminium hydride (LiAlH4)",
                "Sodium borohydride (NaBH4)",
                "Ascorbic acid (Vitamin C)",
                "Glutathione",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Carbon monoxide (CO)",
                "Formaldehyde (CH2O)",
                "Ethanol (C2H5OH)",
                "Glucose (C6H12O6)",
                "Iron(II) ions (Fe2+)",
                "Copper(I) ions (Cu+)",
                "Hydrazine (N2H4)",
                "Sulfur dioxide (SO2)",
                "Hydrogen peroxide (H2O2) in catalytic amounts",
                "Dithiothreitol (DTT)",
                "Tetrathionate (S4O6^2-)",
                "Dimethyl sulfoxide (DMSO) under certain conditions",
                "Ferrous sulfate (FeSO4)",
                "Zinc metal (Zn)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Thiosulfate ions (S2O3^2-)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (Vitamin C)",
                "Carbon monoxide (CO)",
                "Copper(I) ions (Cu+)",
                "Dimethyl sulfoxide (DMSO) under certain conditions",
                "Dithiothreitol (DTT)",
                "Ethanol (C2H5OH)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Ferrous sulfate (FeSO4)",
                "Formaldehyde (CH2O)",
                "Glucose (C6H12O6)",
                "Glutathione",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Hydrogen peroxide (H2O2) in catalytic amounts",
                "Iron(II) ions (Fe2+)",
                "Lithium aluminium hydride (LiAlH4)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "Sodium borohydride (NaBH4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sulfur dioxide (SO2)",
                "Tetrathionate (S4O6^2-)",
                "Thiosulfate ions (S2O3^2-)",
                "Zinc metal (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Sodium Borohydride (NaBH4)",
            "Lithium Aluminum Hydride (LiAlH4)",
            "Sodium Cyanoborohydride (NaBH3CN)",
            "Hydrogen (H2)",
            "Carbon Monoxide (CO)",
            "Sodium Dithionite (Na2S2O4)",
            "Formic Acid (HCOOH)",
            "Ascorbic Acid (Vitamin C)",
            "Glutathione",
            "Thiourea",
            "Dithiothreitol (DTT)",
            "Mercaptoethanol",
            "Sodium Dithionite (Na2S2O4)",
            "Sodium Bisulfite (NaHSO3)",
            "Hydrazine (N2H4)",
            "Hydroquinone",
            "Cysteine",
            "Sodium Metabisulfite (Na2S2O5)",
            "Formaldehyde (HCHO)",
            "Sodium Thiosulfate (Na2S2O3)",
            "Sodium Hydroxymethanesulfinate (Rongalite)",
            "Glucose",
            "Sodium Hydrosulfite",
            "Sodium Sulfite (Na2SO3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sodium Borohydride (NaBH4)",
                "Lithium Aluminum Hydride (LiAlH4)",
                "Sodium Cyanoborohydride (NaBH3CN)",
                "Hydrogen (H2)",
                "Carbon Monoxide (CO)",
                "Sodium Dithionite (Na2S2O4)",
                "Formic Acid (HCOOH)",
                "Ascorbic Acid (Vitamin C)",
                "Glutathione",
                "Thiourea",
                "Dithiothreitol (DTT)",
                "Mercaptoethanol",
                "Sodium Bisulfite (NaHSO3)",
                "Hydrazine (N2H4)",
                "Hydroquinone",
                "Cysteine",
                "Sodium Metabisulfite (Na2S2O5)",
                "Formaldehyde (HCHO)",
                "Sodium Thiosulfate (Na2S2O3)",
                "Sodium Hydroxymethanesulfinate (Rongalite)",
                "Glucose",
                "Sodium Sulfite (Na2SO3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic Acid (Vitamin C)",
                "Carbon Monoxide (CO)",
                "Cysteine",
                "Dithiothreitol (DTT)",
                "Formaldehyde (HCHO)",
                "Formic Acid (HCOOH)",
                "Glucose",
                "Glutathione",
                "Hydrazine (N2H4)",
                "Hydrogen (H2)",
                "Hydroquinone",
                "Lithium Aluminum Hydride (LiAlH4)",
                "Mercaptoethanol",
                "Sodium Bisulfite (NaHSO3)",
                "Sodium Borohydride (NaBH4)",
                "Sodium Cyanoborohydride (NaBH3CN)",
                "Sodium Dithionite (Na2S2O4)",
                "Sodium Hydrosulfite",
                "Sodium Hydroxymethanesulfinate (Rongalite)",
                "Sodium Metabisulfite (Na2S2O5)",
                "Sodium Sulfite (Na2SO3)",
                "Sodium Thiosulfate (Na2S2O3)",
                "Thiourea"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Sodium Borohydride (NaBH\u2084)",
            "Lithium Aluminum Hydride (LiAlH\u2084)",
            "Ascorbic Acid (Vitamin C)",
            "Glutathione (GSH)",
            "NADH (Nicotinamide Adenine Dinucleotide, reduced form)",
            "NADPH (Nicotinamide Adenine Dinucleotide Phosphate, reduced form)",
            "Formic Acid (HCOOH)",
            "Hypophosphorous Acid (H\u2083PO\u2082)",
            "Tin(II) Chloride (SnCl\u2082)",
            "Iron(II) Sulfate (FeSO\u2084)",
            "Zinc (Zn)",
            "Hydrogen Gas (H\u2082)",
            "Hydrazine (N\u2082H\u2084)",
            "Diisobutylaluminum Hydride (DIBAL-H)",
            "Sodium Sulfite (Na\u2082SO\u2083)",
            "Sodium Thiosulfate (Na\u2082S\u2082O\u2083)",
            "Borane (BH\u2083)",
            "Titanium(III) Chloride (TiCl\u2083)",
            "Potassium Iodide (KI)",
            "Hydrogen Sulfide (H\u2082S)",
            "Ethanol (C\u2082H\u2085OH) - in certain contexts",
            "Formamide (HCONH\u2082)",
            "Dimethyl Sulfide (DMS)",
            "Triphenylphosphine (PPh\u2083)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sodium Borohydride (NaBH\u2084)",
                "Lithium Aluminum Hydride (LiAlH\u2084)",
                "Ascorbic Acid (Vitamin C)",
                "Glutathione (GSH)",
                "NADH (Nicotinamide Adenine Dinucleotide, reduced form)",
                "NADPH (Nicotinamide Adenine Dinucleotide Phosphate, reduced form)",
                "Formic Acid (HCOOH)",
                "Hypophosphorous Acid (H\u2083PO\u2082)",
                "Tin(II) Chloride (SnCl\u2082)",
                "Iron(II) Sulfate (FeSO\u2084)",
                "Zinc (Zn)",
                "Hydrogen Gas (H\u2082)",
                "Hydrazine (N\u2082H\u2084)",
                "Diisobutylaluminum Hydride (DIBAL-H)",
                "Sodium Sulfite (Na\u2082SO\u2083)",
                "Sodium Thiosulfate (Na\u2082S\u2082O\u2083)",
                "Borane (BH\u2083)",
                "Titanium(III) Chloride (TiCl\u2083)",
                "Potassium Iodide (KI)",
                "Hydrogen Sulfide (H\u2082S)",
                "Ethanol (C\u2082H\u2085OH) - in certain contexts",
                "Formamide (HCONH\u2082)",
                "Dimethyl Sulfide (DMS)",
                "Triphenylphosphine (PPh\u2083)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic Acid (Vitamin C)",
                "Borane (BH\u2083)",
                "Diisobutylaluminum Hydride (DIBAL-H)",
                "Dimethyl Sulfide (DMS)",
                "Ethanol (C\u2082H\u2085OH) - in certain contexts",
                "Formamide (HCONH\u2082)",
                "Formic Acid (HCOOH)",
                "Glutathione (GSH)",
                "Hydrazine (N\u2082H\u2084)",
                "Hydrogen Gas (H\u2082)",
                "Hydrogen Sulfide (H\u2082S)",
                "Hypophosphorous Acid (H\u2083PO\u2082)",
                "Iron(II) Sulfate (FeSO\u2084)",
                "Lithium Aluminum Hydride (LiAlH\u2084)",
                "NADH (Nicotinamide Adenine Dinucleotide, reduced form)",
                "NADPH (Nicotinamide Adenine Dinucleotide Phosphate, reduced form)",
                "Potassium Iodide (KI)",
                "Sodium Borohydride (NaBH\u2084)",
                "Sodium Sulfite (Na\u2082SO\u2083)",
                "Sodium Thiosulfate (Na\u2082S\u2082O\u2083)",
                "Tin(II) Chloride (SnCl\u2082)",
                "Titanium(III) Chloride (TiCl\u2083)",
                "Triphenylphosphine (PPh\u2083)",
                "Zinc (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Hydrogen gas (H2)",
            "Carbon monoxide (CO)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Diborane (B2H6)",
            "Iron(II) sulfate (FeSO4)",
            "Tin(II) chloride (SnCl2)",
            "Zinc (Zn)",
            "Copper(I) chloride (CuCl)",
            "Titanium(III) chloride (TiCl3)",
            "Nickel(II) chloride (NiCl2)",
            "Chromium(II) chloride (CrCl2)",
            "Sodium thiosulfate (Na2S2O3)",
            "Ascorbic acid (C6H8O6)",
            "Glutathione (GSH)",
            "Dithiothreitol (DTT)",
            "Mercaptoethanol (BME)",
            "Sodium hydrosulfite (Na2S2O4)",
            "Hypophosphorous acid (H3PO2)",
            "Formic acid (HCOOH)",
            "Hydrazine (N2H4)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "FADH2 (Flavin adenine dinucleotide)",
            "Cysteine (Cys)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen gas (H2)",
                "Carbon monoxide (CO)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Diborane (B2H6)",
                "Iron(II) sulfate (FeSO4)",
                "Tin(II) chloride (SnCl2)",
                "Zinc (Zn)",
                "Copper(I) chloride (CuCl)",
                "Titanium(III) chloride (TiCl3)",
                "Nickel(II) chloride (NiCl2)",
                "Chromium(II) chloride (CrCl2)",
                "Sodium thiosulfate (Na2S2O3)",
                "Ascorbic acid (C6H8O6)",
                "Glutathione (GSH)",
                "Dithiothreitol (DTT)",
                "Mercaptoethanol (BME)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Hypophosphorous acid (H3PO2)",
                "Formic acid (HCOOH)",
                "Hydrazine (N2H4)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Cysteine (Cys)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (C6H8O6)",
                "Carbon monoxide (CO)",
                "Chromium(II) chloride (CrCl2)",
                "Copper(I) chloride (CuCl)",
                "Cysteine (Cys)",
                "Diborane (B2H6)",
                "Dithiothreitol (DTT)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Formic acid (HCOOH)",
                "Glutathione (GSH)",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Hypophosphorous acid (H3PO2)",
                "Iron(II) sulfate (FeSO4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Mercaptoethanol (BME)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "Nickel(II) chloride (NiCl2)",
                "Sodium borohydride (NaBH4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sodium thiosulfate (Na2S2O3)",
                "Tin(II) chloride (SnCl2)",
                "Titanium(III) chloride (TiCl3)",
                "Zinc (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Hydrogen gas (H2)",
            "Zinc dust (Zn)",
            "Iron powder (Fe)",
            "Sodium dithionite (Na2S2O4)",
            "Lactate (C3H5O3-)",
            "Ascorbic acid (Vitamin C, C6H8O6)",
            "Formic acid (HCOOH)",
            "NADH (Nicotinamide adenine dinucleotide, reduced form)",
            "Thioacetamide (C2H5NS)",
            "Sodium sulfite (Na2SO3)",
            "Tartaric acid (C4H6O6)",
            "Boron trifluoride etherate (BF3\u00b7OEt2)",
            "Ammonium thiol (R-NH3^+)",
            "Cyanoborohydride (NaCNBH3)",
            "Tellurium (Te)",
            "Calcium hydride (CaH2)",
            "Sodium metal (Na)",
            "Copper(I) oxide (Cu2O)",
            "Magnesium (Mg)",
            "Aluminum (Al)",
            "Potassium borohydride (KBH4)",
            "Ferrous sulfate (FeSO4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Hydrogen gas (H2)",
                "Zinc dust (Zn)",
                "Iron powder (Fe)",
                "Sodium dithionite (Na2S2O4)",
                "Lactate (C3H5O3-)",
                "Ascorbic acid (Vitamin C, C6H8O6)",
                "Formic acid (HCOOH)",
                "NADH (Nicotinamide adenine dinucleotide, reduced form)",
                "Thioacetamide (C2H5NS)",
                "Sodium sulfite (Na2SO3)",
                "Tartaric acid (C4H6O6)",
                "Boron trifluoride etherate (BF3\u00b7OEt2)",
                "Ammonium thiol (R-NH3^+)",
                "Cyanoborohydride (NaCNBH3)",
                "Tellurium (Te)",
                "Calcium hydride (CaH2)",
                "Sodium metal (Na)",
                "Copper(I) oxide (Cu2O)",
                "Magnesium (Mg)",
                "Aluminum (Al)",
                "Potassium borohydride (KBH4)",
                "Ferrous sulfate (FeSO4)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminum (Al)",
                "Ammonium thiol (R-NH3^+)",
                "Ascorbic acid (Vitamin C, C6H8O6)",
                "Boron trifluoride etherate (BF3\u00b7OEt2)",
                "Calcium hydride (CaH2)",
                "Copper(I) oxide (Cu2O)",
                "Cyanoborohydride (NaCNBH3)",
                "Ferrous sulfate (FeSO4)",
                "Formic acid (HCOOH)",
                "Hydrogen gas (H2)",
                "Iron powder (Fe)",
                "Lactate (C3H5O3-)",
                "Lithium aluminum hydride (LiAlH4)",
                "Magnesium (Mg)",
                "NADH (Nicotinamide adenine dinucleotide, reduced form)",
                "Potassium borohydride (KBH4)",
                "Sodium borohydride (NaBH4)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium metal (Na)",
                "Sodium sulfite (Na2SO3)",
                "Tartaric acid (C4H6O6)",
                "Tellurium (Te)",
                "Thioacetamide (C2H5NS)",
                "Zinc dust (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Zinc (Zn)",
            "Iron (Fe)",
            "Tin (Sn)",
            "Titanium (Ti)",
            "Hydrogen sulfide (H2S)",
            "Sodium bisulfite (NaHSO3)",
            "Potassium iodide (KI)",
            "Ammonium formate (NH4HCO2)",
            "Ascorbic acid (C6H8O6)",
            "Glutathione (GSH)",
            "Cysteine (C3H7NO2S)",
            "Dithiothreitol (DTT)",
            "Beta-mercaptoethanol (\u03b2-ME)",
            "Hydroquinone (C6H4(OH)2)",
            "Formic acid (HCOOH)",
            "Hydrazine (N2H4)",
            "Sodium hydrosulfite (Na2S2O4)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Isopropanol (C3H8O)",
            "Phosphite ion (PO3^3-)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Zinc (Zn)",
                "Iron (Fe)",
                "Tin (Sn)",
                "Titanium (Ti)",
                "Hydrogen sulfide (H2S)",
                "Sodium bisulfite (NaHSO3)",
                "Potassium iodide (KI)",
                "Ammonium formate (NH4HCO2)",
                "Ascorbic acid (C6H8O6)",
                "Glutathione (GSH)",
                "Cysteine (C3H7NO2S)",
                "Dithiothreitol (DTT)",
                "Beta-mercaptoethanol (\u03b2-ME)",
                "Hydroquinone (C6H4(OH)2)",
                "Formic acid (HCOOH)",
                "Hydrazine (N2H4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Isopropanol (C3H8O)",
                "Phosphite ion (PO3^3-)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ammonium formate (NH4HCO2)",
                "Ascorbic acid (C6H8O6)",
                "Beta-mercaptoethanol (\u03b2-ME)",
                "Cysteine (C3H7NO2S)",
                "Dithiothreitol (DTT)",
                "Ethanol (C2H5OH)",
                "Formic acid (HCOOH)",
                "Glutathione (GSH)",
                "Hydrazine (N2H4)",
                "Hydrogen (H)",
                "Hydrogen sulfide (H2S)",
                "Hydroquinone (C6H4(OH)2)",
                "Iron (Fe)",
                "Isopropanol (C3H8O)",
                "Lithium aluminum hydride (LiAlH4)",
                "Methanol (CH3OH)",
                "Phosphite ion (PO3^3-)",
                "Potassium iodide (KI)",
                "Sodium bisulfite (NaHSO3)",
                "Sodium borohydride (NaBH4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Tin (Sn)",
                "Titanium (Ti)",
                "Zinc (Zn)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Hydrazine (N2H4)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Sodium hydrosulfite (Na2S2O4)",
            "Sodium bisulfite (NaHSO3)",
            "Sodium dithionite (Na2S2O4)",
            "Sodium borohydride (NaBH4)",
            "Ascorbic acid (C6H8O6)",
            "Sodium ascorbate (C6H7NaO6)",
            "Glutathione (C10H17N3O6S)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "L-cysteine (C3H7NO2S)",
            "Formaldehyde (CH2O)",
            "Sodium hypophosphite (NaPO2H2)",
            "Sodium thiosulfate (Na2S2O3)",
            "Sodium amalgam (Na(Hg))",
            "Titanium(III) chloride (TiCl3)",
            "Zinc (Zn)",
            "Iron(II) sulfate (FeSO4)",
            "Hydroquinone (C6H6O2)",
            "Sodium sulfite (Na2SO3)",
            "Sodium nitrite (NaNO2)",
            "Sodium metabisulfite (Na2S2O5)",
            "Sodium cyanoborohydride (NaBH3CN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrazine (N2H4)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sodium bisulfite (NaHSO3)",
                "Sodium dithionite (Na2S2O4)",
                "Ascorbic acid (C6H8O6)",
                "Sodium ascorbate (C6H7NaO6)",
                "Glutathione (C10H17N3O6S)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "L-cysteine (C3H7NO2S)",
                "Formaldehyde (CH2O)",
                "Sodium hypophosphite (NaPO2H2)",
                "Sodium thiosulfate (Na2S2O3)",
                "Sodium amalgam (Na(Hg))",
                "Titanium(III) chloride (TiCl3)",
                "Zinc (Zn)",
                "Iron(II) sulfate (FeSO4)",
                "Hydroquinone (C6H6O2)",
                "Sodium sulfite (Na2SO3)",
                "Sodium nitrite (NaNO2)",
                "Sodium metabisulfite (Na2S2O5)",
                "Sodium cyanoborohydride (NaBH3CN)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (C6H8O6)",
                "Formaldehyde (CH2O)",
                "Glutathione (C10H17N3O6S)",
                "Hydrazine (N2H4)",
                "Hydroquinone (C6H6O2)",
                "Iron(II) sulfate (FeSO4)",
                "L-cysteine (C3H7NO2S)",
                "Lithium aluminum hydride (LiAlH4)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "Sodium amalgam (Na(Hg))",
                "Sodium ascorbate (C6H7NaO6)",
                "Sodium bisulfite (NaHSO3)",
                "Sodium borohydride (NaBH4)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium hydrosulfite (Na2S2O4)",
                "Sodium hypophosphite (NaPO2H2)",
                "Sodium metabisulfite (Na2S2O5)",
                "Sodium nitrite (NaNO2)",
                "Sodium sulfite (Na2SO3)",
                "Sodium thiosulfate (Na2S2O3)",
                "Titanium(III) chloride (TiCl3)",
                "Zinc (Zn)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "NADH (Nicotinamide adenine dinucleotide, reduced form)",
            "NADPH (Nicotinamide adenine dinucleotide phosphate, reduced form)",
            "FADH2 (Flavin adenine dinucleotide, reduced form)",
            "Glutathione (GSH)",
            "Ascorbic acid (Vitamin C)",
            "Thioredoxin",
            "Dithiothreitol (DTT)",
            "Beta-mercaptoethanol",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Hydrogen gas (H2)",
            "Carbon monoxide (CO)",
            "Ethanol",
            "Formate",
            "Resazurin",
            "Alpha-tocopherol (Vitamin E)",
            "L-Cysteine",
            "Iron(II) sulfate (FeSO4)",
            "Sodium cyanoborohydride (NaBH3CN)",
            "Tris(2-carboxyethyl)phosphine (TCEP)",
            "Sodium dithionite",
            "Methionine",
            "Hydroquinone",
            "Tin(II) chloride (SnCl2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH (Nicotinamide adenine dinucleotide, reduced form)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate, reduced form)",
                "FADH2 (Flavin adenine dinucleotide, reduced form)",
                "Glutathione (GSH)",
                "Ascorbic acid (Vitamin C)",
                "Thioredoxin",
                "Dithiothreitol (DTT)",
                "Beta-mercaptoethanol",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Hydrogen gas (H2)",
                "Carbon monoxide (CO)",
                "Ethanol",
                "Formate",
                "Resazurin",
                "Alpha-tocopherol (Vitamin E)",
                "L-Cysteine",
                "Iron(II) sulfate (FeSO4)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Sodium dithionite",
                "Methionine",
                "Hydroquinone",
                "Tin(II) chloride (SnCl2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-tocopherol (Vitamin E)",
                "Ascorbic acid (Vitamin C)",
                "Beta-mercaptoethanol",
                "Carbon monoxide (CO)",
                "Dithiothreitol (DTT)",
                "Ethanol",
                "FADH2 (Flavin adenine dinucleotide, reduced form)",
                "Formate",
                "Glutathione (GSH)",
                "Hydrogen gas (H2)",
                "Hydroquinone",
                "Iron(II) sulfate (FeSO4)",
                "L-Cysteine",
                "Lithium aluminum hydride (LiAlH4)",
                "Methionine",
                "NADH (Nicotinamide adenine dinucleotide, reduced form)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate, reduced form)",
                "Resazurin",
                "Sodium borohydride (NaBH4)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Sodium dithionite",
                "Thioredoxin",
                "Tin(II) chloride (SnCl2)",
                "Tris(2-carboxyethyl)phosphine (TCEP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Hydrogen gas (H2)",
            "Zinc (Zn)",
            "Iron (Fe)",
            "Sodium (Na)",
            "Magnesium (Mg)",
            "Hydrazine (N2H4)",
            "Sodium dithionite (Na2S2O4)",
            "Ascorbic acid (Vitamin C)",
            "Glutathione (GSH)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "FADH2 (Flavin adenine dinucleotide)",
            "Formic acid (HCOOH)",
            "Carbon monoxide (CO)",
            "Sulfite (SO3^2-)",
            "Oxalic acid (C2H2O4)",
            "Tin(II) chloride (SnCl2)",
            "Thiosulfate (S2O3^2-)",
            "Sodium amalgam (NaHg)",
            "Dithiothreitol (DTT)",
            "\u03b2-Mercaptoethanol (BME)",
            "Tris(2-carboxyethyl)phosphine (TCEP)",
            "Borane (BH3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Hydrogen gas (H2)",
                "Zinc (Zn)",
                "Iron (Fe)",
                "Sodium (Na)",
                "Magnesium (Mg)",
                "Hydrazine (N2H4)",
                "Sodium dithionite (Na2S2O4)",
                "Ascorbic acid (Vitamin C)",
                "Glutathione (GSH)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Formic acid (HCOOH)",
                "Carbon monoxide (CO)",
                "Sulfite (SO3^2-)",
                "Oxalic acid (C2H2O4)",
                "Tin(II) chloride (SnCl2)",
                "Thiosulfate (S2O3^2-)",
                "Sodium amalgam (NaHg)",
                "Dithiothreitol (DTT)",
                "\u03b2-Mercaptoethanol (BME)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Borane (BH3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (Vitamin C)",
                "Borane (BH3)",
                "Carbon monoxide (CO)",
                "Dithiothreitol (DTT)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Formic acid (HCOOH)",
                "Glutathione (GSH)",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Iron (Fe)",
                "Lithium aluminum hydride (LiAlH4)",
                "Magnesium (Mg)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "Oxalic acid (C2H2O4)",
                "Sodium (Na)",
                "Sodium amalgam (NaHg)",
                "Sodium borohydride (NaBH4)",
                "Sodium dithionite (Na2S2O4)",
                "Sulfite (SO3^2-)",
                "Thiosulfate (S2O3^2-)",
                "Tin(II) chloride (SnCl2)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Zinc (Zn)",
                "\u03b2-Mercaptoethanol (BME)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Ascorbic acid (vitamin C)",
            "Glutathione (GSH)",
            "Cysteine (Cys)",
            "Thioredoxin (Trx)",
            "Nicotinamide adenine dinucleotide (NADH)",
            "Nicotinamide adenine dinucleotide phosphate (NADPH)",
            "Dithiothreitol (DTT)",
            "2-Mercaptoethanol (BME)",
            "Tris(2-carboxyethyl)phosphine (TCEP)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Sodium dithionite (Na2S2O4)",
            "Ferrous ion (Fe2+)",
            "Stannous ion (Sn2+)",
            "Titanium(III) chloride (TiCl3)",
            "Chromium(II) chloride (CrCl2)",
            "Vanadium(II) chloride (VCl2)",
            "Samarium(II) iodide (SmI2)",
            "Hydrogen sulfide (H2S)",
            "Carbon monoxide (CO)",
            "Formic acid (HCOOH)",
            "Oxalic acid (H2C2O4)",
            "Hydrazine (N2H4)",
            "Hydroxylamine (NH2OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ascorbic acid (vitamin C)",
                "Glutathione (GSH)",
                "Cysteine (Cys)",
                "Thioredoxin (Trx)",
                "Nicotinamide adenine dinucleotide (NADH)",
                "Nicotinamide adenine dinucleotide phosphate (NADPH)",
                "Dithiothreitol (DTT)",
                "2-Mercaptoethanol (BME)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Sodium dithionite (Na2S2O4)",
                "Ferrous ion (Fe2+)",
                "Stannous ion (Sn2+)",
                "Titanium(III) chloride (TiCl3)",
                "Chromium(II) chloride (CrCl2)",
                "Vanadium(II) chloride (VCl2)",
                "Samarium(II) iodide (SmI2)",
                "Hydrogen sulfide (H2S)",
                "Carbon monoxide (CO)",
                "Formic acid (HCOOH)",
                "Oxalic acid (H2C2O4)",
                "Hydrazine (N2H4)",
                "Hydroxylamine (NH2OH)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Mercaptoethanol (BME)",
                "Ascorbic acid (vitamin C)",
                "Carbon monoxide (CO)",
                "Chromium(II) chloride (CrCl2)",
                "Cysteine (Cys)",
                "Dithiothreitol (DTT)",
                "Ferrous ion (Fe2+)",
                "Formic acid (HCOOH)",
                "Glutathione (GSH)",
                "Hydrazine (N2H4)",
                "Hydrogen sulfide (H2S)",
                "Hydroxylamine (NH2OH)",
                "Lithium aluminum hydride (LiAlH4)",
                "Nicotinamide adenine dinucleotide (NADH)",
                "Nicotinamide adenine dinucleotide phosphate (NADPH)",
                "Oxalic acid (H2C2O4)",
                "Samarium(II) iodide (SmI2)",
                "Sodium borohydride (NaBH4)",
                "Sodium dithionite (Na2S2O4)",
                "Stannous ion (Sn2+)",
                "Thioredoxin (Trx)",
                "Titanium(III) chloride (TiCl3)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Vanadium(II) chloride (VCl2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Dithiothreitol (DTT)",
            "Tris(2-carboxyethyl)phosphine (TCEP)",
            "Ascorbic acid (Vitamin C)",
            "Glutathione (GSH)",
            "Hydrogen gas (H2)",
            "Hydrazine (N2H4)",
            "Sodium dithionite (Na2S2O4)",
            "Sodium sulfite (Na2SO3)",
            "Sodium thiosulfate (Na2S2O3)",
            "Zinc powder (Zn)",
            "Iron(II) sulfate (FeSO4)",
            "Stannous chloride (SnCl2)",
            "Sodium cyanoborohydride (NaBH3CN)",
            "Sodium triacetoxyborohydride (Na(OAc)3BH)",
            "Triphenylphosphine (PPh3)",
            "Tributyltin hydride (Bu3SnH)",
            "Sodium borohydride-methanol complex (NaBH4-MeOH)",
            "Sodium bis(2-methoxyethoxy)aluminum hydride (Red-Al)",
            "Sodium bis(2-methoxyethoxy)aluminum dihydride (SMEAH)",
            "Sodium triethylborohydride (NaEt3BH)",
            "Sodium trimethoxyborohydride (NaB(OCH3)3H)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Dithiothreitol (DTT)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Ascorbic acid (Vitamin C)",
                "Glutathione (GSH)",
                "Hydrogen gas (H2)",
                "Hydrazine (N2H4)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium sulfite (Na2SO3)",
                "Sodium thiosulfate (Na2S2O3)",
                "Zinc powder (Zn)",
                "Iron(II) sulfate (FeSO4)",
                "Stannous chloride (SnCl2)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Sodium triacetoxyborohydride (Na(OAc)3BH)",
                "Triphenylphosphine (PPh3)",
                "Tributyltin hydride (Bu3SnH)",
                "Sodium borohydride-methanol complex (NaBH4-MeOH)",
                "Sodium bis(2-methoxyethoxy)aluminum hydride (Red-Al)",
                "Sodium bis(2-methoxyethoxy)aluminum dihydride (SMEAH)",
                "Sodium triethylborohydride (NaEt3BH)",
                "Sodium trimethoxyborohydride (NaB(OCH3)3H)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (Vitamin C)",
                "Dithiothreitol (DTT)",
                "Glutathione (GSH)",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Iron(II) sulfate (FeSO4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Sodium bis(2-methoxyethoxy)aluminum dihydride (SMEAH)",
                "Sodium bis(2-methoxyethoxy)aluminum hydride (Red-Al)",
                "Sodium borohydride (NaBH4)",
                "Sodium borohydride-methanol complex (NaBH4-MeOH)",
                "Sodium cyanoborohydride (NaBH3CN)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium sulfite (Na2SO3)",
                "Sodium thiosulfate (Na2S2O3)",
                "Sodium triacetoxyborohydride (Na(OAc)3BH)",
                "Sodium triethylborohydride (NaEt3BH)",
                "Sodium trimethoxyborohydride (NaB(OCH3)3H)",
                "Stannous chloride (SnCl2)",
                "Tributyltin hydride (Bu3SnH)",
                "Triphenylphosphine (PPh3)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Zinc powder (Zn)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Ascorbic Acid (Vitamin C)",
            "Glutathione",
            "Hydrogen (H2)",
            "Sodium Borohydride (NaBH4)",
            "Dithiothreitol (DTT)",
            "Tris(2-carboxyethyl)phosphine (TCEP)",
            "Hydrazine (N2H4)",
            "Lithium Aluminum Hydride (LiAlH4)",
            "Diborane (B2H6)",
            "Sodium Sulfite (Na2SO3)",
            "Formic Acid (HCOOH)",
            "Glucose",
            "Thiols (e.g., Cysteine)",
            "Phenylhydrazine",
            "Tin(II) Chloride (SnCl2)",
            "Zinc Dust",
            "Ferrous Sulfate (FeSO4)",
            "Sodium Thiosulfate (Na2S2O3)",
            "Sulfur Dioxide (SO2)",
            "Hydrogen Sulfide (H2S)",
            "Sodium Hypophosphite (NaH2PO2)",
            "Sodium Sulfide (Na2S)",
            "Sodium Borohydride Cyanoborohydride (NaBH3CN)",
            "Sodium Dithionite (Na2S2O4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ascorbic Acid (Vitamin C)",
                "Glutathione",
                "Hydrogen (H2)",
                "Sodium Borohydride (NaBH4)",
                "Dithiothreitol (DTT)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Hydrazine (N2H4)",
                "Lithium Aluminum Hydride (LiAlH4)",
                "Diborane (B2H6)",
                "Sodium Sulfite (Na2SO3)",
                "Formic Acid (HCOOH)",
                "Glucose",
                "Thiols (e.g., Cysteine)",
                "Phenylhydrazine",
                "Tin(II) Chloride (SnCl2)",
                "Zinc Dust",
                "Ferrous Sulfate (FeSO4)",
                "Sodium Thiosulfate (Na2S2O3)",
                "Sulfur Dioxide (SO2)",
                "Hydrogen Sulfide (H2S)",
                "Sodium Hypophosphite (NaH2PO2)",
                "Sodium Sulfide (Na2S)",
                "Sodium Borohydride Cyanoborohydride (NaBH3CN)",
                "Sodium Dithionite (Na2S2O4)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic Acid (Vitamin C)",
                "Diborane (B2H6)",
                "Dithiothreitol (DTT)",
                "Ferrous Sulfate (FeSO4)",
                "Formic Acid (HCOOH)",
                "Glucose",
                "Glutathione",
                "Hydrazine (N2H4)",
                "Hydrogen (H2)",
                "Hydrogen Sulfide (H2S)",
                "Lithium Aluminum Hydride (LiAlH4)",
                "Phenylhydrazine",
                "Sodium Borohydride (NaBH4)",
                "Sodium Borohydride Cyanoborohydride (NaBH3CN)",
                "Sodium Dithionite (Na2S2O4)",
                "Sodium Hypophosphite (NaH2PO2)",
                "Sodium Sulfide (Na2S)",
                "Sodium Sulfite (Na2SO3)",
                "Sodium Thiosulfate (Na2S2O3)",
                "Sulfur Dioxide (SO2)",
                "Thiols (e.g., Cysteine)",
                "Tin(II) Chloride (SnCl2)",
                "Tris(2-carboxyethyl)phosphine (TCEP)",
                "Zinc Dust"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Ascorbic acid (Vitamin C) (Asc)",
            "Hydrogen gas (H2)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Sodium cyanoborohydride (NaCNBH3)",
            "Triphenylphosphine (PPh3)",
            "Dithiothreitol (DTT)",
            "Cysteine",
            "Glutathione",
            "NADH",
            "NADPH",
            "Flavodoxin",
            "Benzyltriethylammonium chloride (BTEAC)",
            "Tetrahydrofuran (THF)",
            "Tri-n-butylphosphine (PnBu3)",
            "Triphenylarsine (AsPh3)",
            "Hydroxylamine",
            "Hydrogen peroxide (H2O2)",
            "Dithioerythritol (DTE)",
            "Cysteamine",
            "Cystine",
            "Benzyl mercaptan",
            "Dithiothreitol (DTT) derivative",
            "Sodium dithionite (Na2S2O4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ascorbic acid (Vitamin C) (Asc)",
                "Hydrogen gas (H2)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Sodium cyanoborohydride (NaCNBH3)",
                "Triphenylphosphine (PPh3)",
                "Dithiothreitol (DTT)",
                "Cysteine",
                "Glutathione",
                "NADH",
                "NADPH",
                "Flavodoxin",
                "Benzyltriethylammonium chloride (BTEAC)",
                "Tetrahydrofuran (THF)",
                "Tri-n-butylphosphine (PnBu3)",
                "Triphenylarsine (AsPh3)",
                "Hydroxylamine",
                "Hydrogen peroxide (H2O2)",
                "Dithioerythritol (DTE)",
                "Cysteamine",
                "Cystine",
                "Benzyl mercaptan",
                "Dithiothreitol (DTT) derivative",
                "Sodium dithionite (Na2S2O4)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (Vitamin C) (Asc)",
                "Benzyl mercaptan",
                "Benzyltriethylammonium chloride (BTEAC)",
                "Cysteamine",
                "Cysteine",
                "Cystine",
                "Dithioerythritol (DTE)",
                "Dithiothreitol (DTT)",
                "Dithiothreitol (DTT) derivative",
                "Flavodoxin",
                "Glutathione",
                "Hydrogen gas (H2)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxylamine",
                "Lithium aluminum hydride (LiAlH4)",
                "NADH",
                "NADPH",
                "Sodium borohydride (NaBH4)",
                "Sodium cyanoborohydride (NaCNBH3)",
                "Sodium dithionite (Na2S2O4)",
                "Tetrahydrofuran (THF)",
                "Tri-n-butylphosphine (PnBu3)",
                "Triphenylarsine (AsPh3)",
                "Triphenylphosphine (PPh3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            "Hydrogen gas (H2)",
            "Sodium borohydride (NaBH4)",
            "Lithium aluminum hydride (LiAlH4)",
            "Sodium hydride (NaH)",
            "Hydrazine (N2H4)",
            "Dithiothreitol (DTT)",
            "Beta-mercaptoethanol",
            "Ascorbic acid (Vitamin C)",
            "Glutathione",
            "Cysteine",
            "Thioredoxin",
            "FADH2 (Flavin adenine dinucleotide)",
            "FMNH2 (Flavin mononucleotide)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "NADPH (Nicotinamide adenine dinucleotide phosphate)",
            "FAD (Flavin adenine dinucleotide)",
            "FMN (Flavin mononucleotide)",
            "Coenzyme Q (Ubiquinone)",
            "Vitamin E (Tocopherol)",
            "Sodium dithionite (Na2S2O4)",
            "Zinc powder",
            "Tin(II) chloride (SnCl2)",
            "Sodium amalgam (Na(Hg))",
            "Formic acid (HCOOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen gas (H2)",
                "Sodium borohydride (NaBH4)",
                "Lithium aluminum hydride (LiAlH4)",
                "Sodium hydride (NaH)",
                "Hydrazine (N2H4)",
                "Dithiothreitol (DTT)",
                "Beta-mercaptoethanol",
                "Ascorbic acid (Vitamin C)",
                "Glutathione",
                "Cysteine",
                "Thioredoxin",
                "FADH2 (Flavin adenine dinucleotide)",
                "FMNH2 (Flavin mononucleotide)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "FAD (Flavin adenine dinucleotide)",
                "FMN (Flavin mononucleotide)",
                "Coenzyme Q (Ubiquinone)",
                "Vitamin E (Tocopherol)",
                "Sodium dithionite (Na2S2O4)",
                "Zinc powder",
                "Tin(II) chloride (SnCl2)",
                "Sodium amalgam (Na(Hg))",
                "Formic acid (HCOOH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ascorbic acid (Vitamin C)",
                "Beta-mercaptoethanol",
                "Coenzyme Q (Ubiquinone)",
                "Cysteine",
                "Dithiothreitol (DTT)",
                "FAD (Flavin adenine dinucleotide)",
                "FADH2 (Flavin adenine dinucleotide)",
                "FMN (Flavin mononucleotide)",
                "FMNH2 (Flavin mononucleotide)",
                "Formic acid (HCOOH)",
                "Glutathione",
                "Hydrazine (N2H4)",
                "Hydrogen gas (H2)",
                "Lithium aluminum hydride (LiAlH4)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "Sodium amalgam (Na(Hg))",
                "Sodium borohydride (NaBH4)",
                "Sodium dithionite (Na2S2O4)",
                "Sodium hydride (NaH)",
                "Thioredoxin",
                "Tin(II) chloride (SnCl2)",
                "Vitamin E (Tocopherol)",
                "Zinc powder"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "reducing agent",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "A reducing agent (or reductant) is a particle (atom, ion, or molecule) that donates electrons to an oxidizing agent (or oxidant) in a redox chemical reaction."
            },
            {
                "Referent": "The most common reducing agents are metal atoms, metal ions, and certain organic molecules, such as glucose, amino acids, and some vitamins."
            },
            {
                "Referent": "Redox (reduction-oxidation) reactions are essential for many biological processes, including respiration, photosynthesis, and energy production in cells."
            },
            {
                "Referent": "In organic chemistry, reducing agents are used in various reactions, such as the synthesis of alcohols, aldehydes, and ketones."
            },
            {
                "Referent": "In medical research, reducing agents are used in the synthesis of drugs, such as aspirin and penicillin."
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "A reducing agent (or reductant) is a particle (atom, ion, or molecule) that donates electrons to an oxidizing agent (or oxidant) in a redox chemical reaction.",
                "The most common reducing agents are metal atoms, metal ions, and certain organic molecules, such as glucose, amino acids, and some vitamins.",
                "Redox (reduction-oxidation) reactions are essential for many biological processes, including respiration, photosynthesis, and energy production in cells.",
                "In organic chemistry, reducing agents are used in various reactions, such as the synthesis of alcohols, aldehydes, and ketones.",
                "In medical research, reducing agents are used in the synthesis of drugs, such as aspirin and penicillin."
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"A reducing agent (or reductant) is a particle (atom, ion, or molecule) that donates electrons to an oxidizing agent (or oxidant) in a redox chemical reaction.\"}",
                "{\"Referent\": \"In medical research, reducing agents are used in the synthesis of drugs, such as aspirin and penicillin.\"}",
                "{\"Referent\": \"In organic chemistry, reducing agents are used in various reactions, such as the synthesis of alcohols, aldehydes, and ketones.\"}",
                "{\"Referent\": \"Redox (reduction-oxidation) reactions are essential for many biological processes, including respiration, photosynthesis, and energy production in cells.\"}",
                "{\"Referent\": \"The most common reducing agents are metal atoms, metal ions, and certain organic molecules, such as glucose, amino acids, and some vitamins.\"}"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 0
        }
    }
]