[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide anion (O2\u2022\u2212)",
            "Hydroxyl radical (\u2022OH)",
            "Hypochlorous acid (HOCl)",
            "Singlet oxygen (1O2)",
            "Peroxynitrite (ONOO\u2212)",
            "Nitric oxide (NO\u2022)",
            "Alkoxy radical (RO\u2022)",
            "Ketohydroperoxide (ROOH)",
            "Alkoxyl radical (RO\u2022)",
            "Peroxyl radical (ROO\u2022)",
            "Peroxide anion (O2\u22122)",
            "Hydrogen peroxide (H2O2)",
            "Nitroxyl (HNO)",
            "Nitroxyl anion (HNO\u2212)",
            "Nitroxyl radical (HNO\u2022)",
            "Nitric oxide dimer (N2O3)",
            "Nitric dioxide (NO2)",
            "Nitrogen dioxide radical (NO2\u2022)",
            "Nitroxyl peroxide (N2O4)",
            "Nitroxyl dimer (N2O2\u2022\u2212)",
            "Nitroxyl peroxynitrite (N2O5)",
            "Nitroxyl peroxynitrite anion (N2O5\u2212)",
            "Nitroxyl peroxynitrite radical (N2O5\u2022\u2212)",
            "Nitroxyl peroxynitrite dimer (N2O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide anion (O2\u2022\u2212)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Singlet oxygen (1O2)",
                "Peroxynitrite (ONOO\u2212)",
                "Nitric oxide (NO\u2022)",
                "Alkoxy radical (RO\u2022)",
                "Ketohydroperoxide (ROOH)",
                "Alkoxyl radical (RO\u2022)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxide anion (O2\u22122)",
                "Hydrogen peroxide (H2O2)",
                "Nitroxyl (HNO)",
                "Nitroxyl anion (HNO\u2212)",
                "Nitroxyl radical (HNO\u2022)",
                "Nitric oxide dimer (N2O3)",
                "Nitric dioxide (NO2)",
                "Nitrogen dioxide radical (NO2\u2022)",
                "Nitroxyl peroxide (N2O4)",
                "Nitroxyl dimer (N2O2\u2022\u2212)",
                "Nitroxyl peroxynitrite (N2O5)",
                "Nitroxyl peroxynitrite anion (N2O5\u2212)",
                "Nitroxyl peroxynitrite radical (N2O5\u2022\u2212)",
                "Nitroxyl peroxynitrite dimer (N2O6)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxy radical (RO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Ketohydroperoxide (ROOH)",
                "Nitric dioxide (NO2)",
                "Nitric oxide (NO\u2022)",
                "Nitric oxide dimer (N2O3)",
                "Nitrogen dioxide radical (NO2\u2022)",
                "Nitroxyl (HNO)",
                "Nitroxyl anion (HNO\u2212)",
                "Nitroxyl dimer (N2O2\u2022\u2212)",
                "Nitroxyl peroxide (N2O4)",
                "Nitroxyl peroxynitrite (N2O5)",
                "Nitroxyl peroxynitrite anion (N2O5\u2212)",
                "Nitroxyl peroxynitrite dimer (N2O6)",
                "Nitroxyl peroxynitrite radical (N2O5\u2022\u2212)",
                "Nitroxyl radical (HNO\u2022)",
                "Peroxide anion (O2\u22122)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Superoxide anion (O2\u2022\u2212)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide anion (O2\u2022\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (RO2\u2022)",
            "Alkoxy radical (RO\u2022)",
            "Nitric oxide (NO\u2022)",
            "Peroxynitrite (ONOO\u2212)",
            "Lipid hydroperoxides (LOOH)",
            "Hydroperoxyl radical (HO2\u2022)",
            "Ozone (O3)",
            "Chlorine dioxide (ClO2)",
            "Nitrogen dioxide (NO2\u2022)",
            "Dioxygenyl ion (O2+)",
            "Triplet oxygen (3O2)",
            "Hydrogen peroxide radical (HOO\u2022)",
            "Oxygen singlet (1\u0394g)",
            "Methylperoxy radical (CH3OO\u2022)",
            "Ethylperoxy radical (C2H5OO\u2022)",
            "Chlorine radical (Cl\u2022)",
            "Bromine radical (Br\u2022)",
            "Iodine radical (I\u2022)",
            "Fluorine radical (F\u2022)",
            "Carbon dioxide radical anion (CO2\u2022\u2212)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide anion (O2\u2022\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (RO2\u2022)",
                "Alkoxy radical (RO\u2022)",
                "Nitric oxide (NO\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Lipid hydroperoxides (LOOH)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Ozone (O3)",
                "Chlorine dioxide (ClO2)",
                "Nitrogen dioxide (NO2\u2022)",
                "Dioxygenyl ion (O2+)",
                "Triplet oxygen (3O2)",
                "Hydrogen peroxide radical (HOO\u2022)",
                "Oxygen singlet (1\u0394g)",
                "Methylperoxy radical (CH3OO\u2022)",
                "Ethylperoxy radical (C2H5OO\u2022)",
                "Chlorine radical (Cl\u2022)",
                "Bromine radical (Br\u2022)",
                "Iodine radical (I\u2022)",
                "Fluorine radical (F\u2022)",
                "Carbon dioxide radical anion (CO2\u2022\u2212)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxy radical (RO\u2022)",
                "Bromine radical (Br\u2022)",
                "Carbon dioxide radical anion (CO2\u2022\u2212)",
                "Chlorine dioxide (ClO2)",
                "Chlorine radical (Cl\u2022)",
                "Dioxygenyl ion (O2+)",
                "Ethylperoxy radical (C2H5OO\u2022)",
                "Fluorine radical (F\u2022)",
                "Hydrogen peroxide (H2O2)",
                "Hydrogen peroxide radical (HOO\u2022)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Iodine radical (I\u2022)",
                "Lipid hydroperoxides (LOOH)",
                "Methylperoxy radical (CH3OO\u2022)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Oxygen singlet (1\u0394g)",
                "Ozone (O3)",
                "Peroxyl radical (RO2\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Superoxide anion (O2\u2022\u2212)",
                "Triplet oxygen (3O2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide (O2\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u00b7OH)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (ROO\u00b7)",
            "Alkyl peroxide (ROOH)",
            "Hypochlorous acid (HOCl)",
            "Ozone (O3)",
            "Peroxynitrite (ONOO\u2212)",
            "Nitric oxide (\u00b7NO)",
            "Nitrosyl cation (NO+)",
            "Nitrosyl anion (NO\u2212)",
            "Nitric dioxide (NO2)",
            "Dinitrogen trioxide (N2O3)",
            "Dinitrogen tetroxide (N2O4)",
            "Dinitrogen pentoxide (N2O5)",
            "Nitrosyl chloride (NOCl)",
            "Nitrosyl bromide (NOBr)",
            "Nitrosyl iodide (NOI)",
            "Nitrosyl fluoride (NOF)",
            "Nitrosyl cyanide (NOCN)",
            "Nitrosyl azide (NON3)",
            "Nitrosyl thiocyanate (NOSCN)",
            "Nitrosyl isocyanate (NOCNO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide (O2\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u00b7OH)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (ROO\u00b7)",
                "Alkyl peroxide (ROOH)",
                "Hypochlorous acid (HOCl)",
                "Ozone (O3)",
                "Peroxynitrite (ONOO\u2212)",
                "Nitric oxide (\u00b7NO)",
                "Nitrosyl cation (NO+)",
                "Nitrosyl anion (NO\u2212)",
                "Nitric dioxide (NO2)",
                "Dinitrogen trioxide (N2O3)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen pentoxide (N2O5)",
                "Nitrosyl chloride (NOCl)",
                "Nitrosyl bromide (NOBr)",
                "Nitrosyl iodide (NOI)",
                "Nitrosyl fluoride (NOF)",
                "Nitrosyl cyanide (NOCN)",
                "Nitrosyl azide (NON3)",
                "Nitrosyl thiocyanate (NOSCN)",
                "Nitrosyl isocyanate (NOCNO)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkyl peroxide (ROOH)",
                "Dinitrogen pentoxide (N2O5)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u00b7OH)",
                "Hypochlorous acid (HOCl)",
                "Nitric dioxide (NO2)",
                "Nitric oxide (\u00b7NO)",
                "Nitrosyl anion (NO\u2212)",
                "Nitrosyl azide (NON3)",
                "Nitrosyl bromide (NOBr)",
                "Nitrosyl cation (NO+)",
                "Nitrosyl chloride (NOCl)",
                "Nitrosyl cyanide (NOCN)",
                "Nitrosyl fluoride (NOF)",
                "Nitrosyl iodide (NOI)",
                "Nitrosyl isocyanate (NOCNO)",
                "Nitrosyl thiocyanate (NOSCN)",
                "Ozone (O3)",
                "Peroxyl radical (ROO\u00b7)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Superoxide (O2\u2212)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide radical (O\u2082\u207b\u2022)",
            "Hydrogen peroxide (H\u2082O\u2082)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (\u00b9O\u2082)",
            "Peroxyl radical (ROO\u2022)",
            "Hypochlorous acid (HOCl)",
            "Nitric oxide radical (\u2022NO)",
            "Peroxynitrite (ONOO\u207b)",
            "Ozone (O\u2083)",
            "Dimethyl dioxirane (DMDO)",
            "Formyl radical (HCO\u2022)",
            "Hydroperoxyl radical (HO\u2082\u2022)",
            "Glutathione radical (GS\u2022)",
            "Thiyl radical (RS\u2022)",
            "Nitrogen dioxide radical (\u2022NO\u2082)",
            "Carbonyl radical (RCO\u2022)",
            "Semiquinone radical (Q\u2022\u207b)",
            "Superoxide anion radical (O\u2082\u207b)",
            "Hydroxymethyl radical (\u2022CH\u2082OH)",
            "Peroxymonosulfate (HSO\u2085\u207b)",
            "Myeloperoxidase-derived hypochlorous acid",
            "Fenton reaction generated hydroxyl radical",
            "Haber-Weiss reaction generated hydroxyl radical",
            "Lipid peroxyl radical (LOO\u2022)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide radical (O\u2082\u207b\u2022)",
                "Hydrogen peroxide (H\u2082O\u2082)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (\u00b9O\u2082)",
                "Peroxyl radical (ROO\u2022)",
                "Hypochlorous acid (HOCl)",
                "Nitric oxide radical (\u2022NO)",
                "Peroxynitrite (ONOO\u207b)",
                "Ozone (O\u2083)",
                "Dimethyl dioxirane (DMDO)",
                "Formyl radical (HCO\u2022)",
                "Hydroperoxyl radical (HO\u2082\u2022)",
                "Glutathione radical (GS\u2022)",
                "Thiyl radical (RS\u2022)",
                "Nitrogen dioxide radical (\u2022NO\u2082)",
                "Carbonyl radical (RCO\u2022)",
                "Semiquinone radical (Q\u2022\u207b)",
                "Superoxide anion radical (O\u2082\u207b)",
                "Hydroxymethyl radical (\u2022CH\u2082OH)",
                "Peroxymonosulfate (HSO\u2085\u207b)",
                "Myeloperoxidase-derived hypochlorous acid",
                "Fenton reaction generated hydroxyl radical",
                "Haber-Weiss reaction generated hydroxyl radical",
                "Lipid peroxyl radical (LOO\u2022)"
            ],
            "mismatches": [],
            "true_referents": [
                "Carbonyl radical (RCO\u2022)",
                "Dimethyl dioxirane (DMDO)",
                "Fenton reaction generated hydroxyl radical",
                "Formyl radical (HCO\u2022)",
                "Glutathione radical (GS\u2022)",
                "Haber-Weiss reaction generated hydroxyl radical",
                "Hydrogen peroxide (H\u2082O\u2082)",
                "Hydroperoxyl radical (HO\u2082\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hydroxymethyl radical (\u2022CH\u2082OH)",
                "Hypochlorous acid (HOCl)",
                "Lipid peroxyl radical (LOO\u2022)",
                "Myeloperoxidase-derived hypochlorous acid",
                "Nitric oxide radical (\u2022NO)",
                "Nitrogen dioxide radical (\u2022NO\u2082)",
                "Ozone (O\u2083)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxymonosulfate (HSO\u2085\u207b)",
                "Peroxynitrite (ONOO\u207b)",
                "Semiquinone radical (Q\u2022\u207b)",
                "Singlet oxygen (\u00b9O\u2082)",
                "Superoxide anion radical (O\u2082\u207b)",
                "Superoxide radical (O\u2082\u207b\u2022)",
                "Thiyl radical (RS\u2022)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide anion (O2-)",
            "Hydroxyl radical (OH\u2022)",
            "Hydrogen peroxide (H2O2)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Ozone (O3)",
            "Hypochlorous acid (HOCl)",
            "Hypobromous acid (HOBr)",
            "Peroxynitrite (ONOO-)",
            "Nitric oxide (NO\u2022)",
            "Nitrogen dioxide (NO2\u2022)",
            "Dioxygen (O2)",
            "Bicarbonate radical (HCO3\u2022)",
            "Carbonate radical (CO3\u2022-)",
            "Perhydroxyl radical (HO2\u2022)",
            "Thiyl radical (RS\u2022)",
            "Nitrosyl cation (NO+)",
            "Nitroxyl anion (NO-)",
            "Nitrosyl radical (HNO\u2022)",
            "Sulfite radical (SO3\u2022-)",
            "Thiosulfate radical (S2O3\u2022-)",
            "Chlorine dioxide (ClO2\u2022)",
            "Dinitrogen trioxide (N2O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide anion (O2-)",
                "Hydroxyl radical (OH\u2022)",
                "Hydrogen peroxide (H2O2)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Ozone (O3)",
                "Hypochlorous acid (HOCl)",
                "Hypobromous acid (HOBr)",
                "Peroxynitrite (ONOO-)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Dioxygen (O2)",
                "Bicarbonate radical (HCO3\u2022)",
                "Carbonate radical (CO3\u2022-)",
                "Perhydroxyl radical (HO2\u2022)",
                "Thiyl radical (RS\u2022)",
                "Nitrosyl cation (NO+)",
                "Nitroxyl anion (NO-)",
                "Nitrosyl radical (HNO\u2022)",
                "Sulfite radical (SO3\u2022-)",
                "Thiosulfate radical (S2O3\u2022-)",
                "Chlorine dioxide (ClO2\u2022)",
                "Dinitrogen trioxide (N2O3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Bicarbonate radical (HCO3\u2022)",
                "Carbonate radical (CO3\u2022-)",
                "Chlorine dioxide (ClO2\u2022)",
                "Dinitrogen trioxide (N2O3)",
                "Dioxygen (O2)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (OH\u2022)",
                "Hypobromous acid (HOBr)",
                "Hypochlorous acid (HOCl)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Nitrosyl cation (NO+)",
                "Nitrosyl radical (HNO\u2022)",
                "Nitroxyl anion (NO-)",
                "Ozone (O3)",
                "Perhydroxyl radical (HO2\u2022)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO-)",
                "Singlet oxygen (1O2)",
                "Sulfite radical (SO3\u2022-)",
                "Superoxide anion (O2-)",
                "Thiosulfate radical (S2O3\u2022-)",
                "Thiyl radical (RS\u2022)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Hydrogen peroxide (H2O2)",
            "Superoxide anion (O2\u2022\u2212)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Peroxynitrite (ONOO\u2212)",
            "Aldose reductase (AR)",
            "Nitric oxide (NO)",
            "Ozone (O3)",
            "Hypochlorous acid (HOCl)",
            "Lipid peroxides (LOOH)",
            "Dihydroxyfumarate (DHF)",
            "Mitochondrial reactive oxygen species (mtROS)",
            "Singlet oxygen species (SOS)",
            "Reactive nitrogen species (RNS)",
            "Chloramines (NH2Cl)",
            "Toxic oxygen species (TOS)",
            "Radical species (RS)",
            "Flavin-based reactive oxygen species (FB-ROS)",
            "Transition metal-catalyzed reactive oxygen species (TM-ROS)",
            "Glutathione disulfide (GSSG)",
            "Carbon-centered radicals (R\u2022)",
            "Thiyl radicals (RSH\u2022)",
            "Peroxyl radicals (ROO\u2022)",
            "Aryl hydrocarbon receptor (AhR)",
            "Cyclooxygenase (COX)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen peroxide (H2O2)",
                "Superoxide anion (O2\u2022\u2212)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Peroxynitrite (ONOO\u2212)",
                "Aldose reductase (AR)",
                "Nitric oxide (NO)",
                "Ozone (O3)",
                "Hypochlorous acid (HOCl)",
                "Lipid peroxides (LOOH)",
                "Dihydroxyfumarate (DHF)",
                "Mitochondrial reactive oxygen species (mtROS)",
                "Singlet oxygen species (SOS)",
                "Reactive nitrogen species (RNS)",
                "Chloramines (NH2Cl)",
                "Toxic oxygen species (TOS)",
                "Radical species (RS)",
                "Flavin-based reactive oxygen species (FB-ROS)",
                "Transition metal-catalyzed reactive oxygen species (TM-ROS)",
                "Glutathione disulfide (GSSG)",
                "Carbon-centered radicals (R\u2022)",
                "Thiyl radicals (RSH\u2022)",
                "Peroxyl radicals (ROO\u2022)",
                "Aryl hydrocarbon receptor (AhR)"
            ],
            "mismatches": [
                "Cyclooxygenase (COX)"
            ],
            "true_referents": [
                "Aldose reductase (AR)",
                "Aryl hydrocarbon receptor (AhR)",
                "Carbon-centered radicals (R\u2022)",
                "Chloramines (NH2Cl)",
                "Dihydroxyfumarate (DHF)",
                "Flavin-based reactive oxygen species (FB-ROS)",
                "Glutathione disulfide (GSSG)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Lipid peroxides (LOOH)",
                "Mitochondrial reactive oxygen species (mtROS)",
                "Nitric oxide (NO)",
                "Ozone (O3)",
                "Peroxyl radicals (ROO\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Radical species (RS)",
                "Reactive nitrogen species (RNS)",
                "Singlet oxygen (1O2)",
                "Singlet oxygen species (SOS)",
                "Superoxide anion (O2\u2022\u2212)",
                "Thiyl radicals (RSH\u2022)",
                "Toxic oxygen species (TOS)",
                "Transition metal-catalyzed reactive oxygen species (TM-ROS)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide (O2\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hypochlorous acid (HOCl)",
            "Hypobromous acid (HOBr)",
            "Hypoiodous acid (HOI)",
            "Ozone (O3)",
            "Peroxynitrite (ONOO\u2212)",
            "Nitric oxide (NO\u2022)",
            "Nitrogen dioxide (NO2\u2022)",
            "Dioxirane (C2H4O2)",
            "Peroxy acid (RCOOH)",
            "Hydroperoxyl (HO2\u2022)",
            "Peroxide (R2O2)",
            "Hypofluorous acid (HOF)",
            "Peroxydisulfuric acid (H2S2O8)",
            "Peroxyformic acid (HCO4H)",
            "Peroxyacetic acid (CH3CO3H)",
            "Peroxymonosulfuric acid (H2SO5)",
            "Peroxydiphosphoric acid (H4P2O8)",
            "Peroxymonophosphoric acid (H3PO5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide (O2\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hypochlorous acid (HOCl)",
                "Hypobromous acid (HOBr)",
                "Hypoiodous acid (HOI)",
                "Ozone (O3)",
                "Peroxynitrite (ONOO\u2212)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Dioxirane (C2H4O2)",
                "Peroxy acid (RCOOH)",
                "Hydroperoxyl (HO2\u2022)",
                "Peroxide (R2O2)",
                "Hypofluorous acid (HOF)",
                "Peroxydisulfuric acid (H2S2O8)",
                "Peroxyformic acid (HCO4H)",
                "Peroxyacetic acid (CH3CO3H)",
                "Peroxymonosulfuric acid (H2SO5)",
                "Peroxydiphosphoric acid (H4P2O8)",
                "Peroxymonophosphoric acid (H3PO5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Dioxirane (C2H4O2)",
                "Hydrogen peroxide (H2O2)",
                "Hydroperoxyl (HO2\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hypobromous acid (HOBr)",
                "Hypochlorous acid (HOCl)",
                "Hypofluorous acid (HOF)",
                "Hypoiodous acid (HOI)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Ozone (O3)",
                "Peroxide (R2O2)",
                "Peroxy acid (RCOOH)",
                "Peroxyacetic acid (CH3CO3H)",
                "Peroxydiphosphoric acid (H4P2O8)",
                "Peroxydisulfuric acid (H2S2O8)",
                "Peroxyformic acid (HCO4H)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxymonophosphoric acid (H3PO5)",
                "Peroxymonosulfuric acid (H2SO5)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Superoxide (O2\u2212)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide radical (O2-)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (OH\u2022)",
            "Singlet oxygen (^1O2)",
            "Peroxynitrite (ONOO-)",
            "Hypochlorous acid (HOCl)",
            "Nitric oxide (NO)",
            "Nitric dioxide (NO2)",
            "Hydroperoxyl radical (HO2\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hydroperoxide (ROOH)",
            "Lipid peroxyl radical (LOO\u2022)",
            "Peroxyl radical (ROO\u2022)",
            "Ozone (O3)",
            "Singlet oxygen (^1O2)",
            "Nitroxyl (HNO)",
            "Peroxynitrous acid (ONOOH)",
            "Dioxygen (O2)",
            "Superoxide dismutase (SOD)",
            "Catalase (CAT)",
            "Glutathione peroxidase (GPx)",
            "Myeloperoxidase (MPO)",
            "NADPH oxidase",
            "Fenton reaction"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide radical (O2-)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (OH\u2022)",
                "Singlet oxygen (^1O2)",
                "Peroxynitrite (ONOO-)",
                "Hypochlorous acid (HOCl)",
                "Nitric oxide (NO)",
                "Nitric dioxide (NO2)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hydroperoxide (ROOH)",
                "Lipid peroxyl radical (LOO\u2022)",
                "Peroxyl radical (ROO\u2022)",
                "Ozone (O3)",
                "Nitroxyl (HNO)",
                "Peroxynitrous acid (ONOOH)",
                "Dioxygen (O2)",
                "Superoxide dismutase (SOD)",
                "Catalase (CAT)",
                "Glutathione peroxidase (GPx)",
                "Myeloperoxidase (MPO)",
                "NADPH oxidase",
                "Fenton reaction"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Catalase (CAT)",
                "Dioxygen (O2)",
                "Fenton reaction",
                "Glutathione peroxidase (GPx)",
                "Hydrogen peroxide (H2O2)",
                "Hydroperoxide (ROOH)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hydroxyl radical (OH\u2022)",
                "Hypochlorous acid (HOCl)",
                "Lipid peroxyl radical (LOO\u2022)",
                "Myeloperoxidase (MPO)",
                "NADPH oxidase",
                "Nitric dioxide (NO2)",
                "Nitric oxide (NO)",
                "Nitroxyl (HNO)",
                "Ozone (O3)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO-)",
                "Peroxynitrous acid (ONOOH)",
                "Singlet oxygen (^1O2)",
                "Superoxide dismutase (SOD)",
                "Superoxide radical (O2-)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Hydroxyl radical (\u2022OH)",
            "Superoxide anion (O\u2082\u207b)",
            "Hydrogen peroxide (H\u2082O\u2082)",
            "Singlet oxygen (\u00b9O\u2082)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hydroperoxyl radical (HO\u2082\u2022)",
            "Hypochlorous acid (HOCl)",
            "Peroxynitrite (ONOO\u207b)",
            "Peroxynitrosyl radical (ONNO\u2022)",
            "Carbonate radical (CO\u2083\u2022\u207b)",
            "Nitrogen dioxide radical (NO\u2082\u2022)",
            "Ozone (O\u2083)",
            "Lipid peroxyl radical (LOO\u2022)",
            "Ferryl ion (Fe=O)",
            "Chlorine radical (Cl\u2022)",
            "Dihydroxyl radical (HO\u2022HO\u2022)",
            "Singlet peroxynitrous acid (ONOOH*)",
            "Iodine monoxide radical (IO\u2022)",
            "Peroxymonocarbonate (HCO\u2084\u207b)",
            "Peroxydisulfate (S\u2082O\u2088\u00b2\u207b)",
            "Peroxymonosulfate (HSO\u2085\u207b)",
            "Benzoyloxy radical (PhCOO\u2022)",
            "Phenoxyl radical (PhO\u2022)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "error": "JSON decode error: Expecting value: line 1 column 1 (char 0)",
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Benzoyloxy radical (PhCOO\u2022)",
                "Carbonate radical (CO\u2083\u2022\u207b)",
                "Chlorine radical (Cl\u2022)",
                "Dihydroxyl radical (HO\u2022HO\u2022)",
                "Ferryl ion (Fe=O)",
                "Hydrogen peroxide (H\u2082O\u2082)",
                "Hydroperoxyl radical (HO\u2082\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Iodine monoxide radical (IO\u2022)",
                "Lipid peroxyl radical (LOO\u2022)",
                "Nitrogen dioxide radical (NO\u2082\u2022)",
                "Ozone (O\u2083)",
                "Peroxydisulfate (S\u2082O\u2088\u00b2\u207b)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxymonocarbonate (HCO\u2084\u207b)",
                "Peroxymonosulfate (HSO\u2085\u207b)",
                "Peroxynitrite (ONOO\u207b)",
                "Peroxynitrosyl radical (ONNO\u2022)",
                "Phenoxyl radical (PhO\u2022)",
                "Singlet oxygen (\u00b9O\u2082)",
                "Singlet peroxynitrous acid (ONOOH*)",
                "Superoxide anion (O\u2082\u207b)"
            ]
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide anion (O2\u2022\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hypochlorite (OCl\u2212)",
            "Peroxynitrite (ONOO\u2212)",
            "Ozone (O3)",
            "Nitric oxide (NO\u2022)",
            "Nitrogen dioxide (NO2\u2022)",
            "Hydroperoxyl radical (HO2\u2022)",
            "Carbonate radical anion (CO3\u2022\u2212)",
            "Hypobromous acid (HOBr)",
            "Hypochlorous acid (HOCl)",
            "Organic hydroperoxide (ROOH)",
            "Lipid peroxide (LOOH)",
            "Trichloromethyl radical (\u2022CCl3)",
            "Thiyl radical (RS\u2022)",
            "Sulfenyl radical (RSO\u2022)",
            "Perhydroxyl radical (HOO\u2022)",
            "Nitroxyl anion (NO\u2212)",
            "Dinitrogen trioxide (N2O3)",
            "Dinitrogen tetroxide (N2O4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide anion (O2\u2022\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hypochlorite (OCl\u2212)",
                "Peroxynitrite (ONOO\u2212)",
                "Ozone (O3)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Carbonate radical anion (CO3\u2022\u2212)",
                "Hypobromous acid (HOBr)",
                "Hypochlorous acid (HOCl)",
                "Organic hydroperoxide (ROOH)",
                "Lipid peroxide (LOOH)",
                "Trichloromethyl radical (\u2022CCl3)",
                "Thiyl radical (RS\u2022)",
                "Sulfenyl radical (RSO\u2022)",
                "Perhydroxyl radical (HOO\u2022)",
                "Nitroxyl anion (NO\u2212)",
                "Dinitrogen trioxide (N2O3)",
                "Dinitrogen tetroxide (N2O4)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Carbonate radical anion (CO3\u2022\u2212)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Hydrogen peroxide (H2O2)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hypobromous acid (HOBr)",
                "Hypochlorite (OCl\u2212)",
                "Hypochlorous acid (HOCl)",
                "Lipid peroxide (LOOH)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Nitroxyl anion (NO\u2212)",
                "Organic hydroperoxide (ROOH)",
                "Ozone (O3)",
                "Perhydroxyl radical (HOO\u2022)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Sulfenyl radical (RSO\u2022)",
                "Superoxide anion (O2\u2022\u2212)",
                "Thiyl radical (RS\u2022)",
                "Trichloromethyl radical (\u2022CCl3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide anion radical (O2\u2022\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hydroperoxyl radical (HO2\u2022)",
            "Hypochlorous acid (HOCl)",
            "Ozone (O3)",
            "Peroxynitrite (ONOO\u2212)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Peroxynitrous acid (ONOOH)",
            "Peroxymonocarbonate (HCO4\u2212)",
            "Carbonate radical anion (CO3\u2022\u2212)",
            "Peroxomonosulfate (HSO5\u2212)",
            "Peroxodisulfate (S2O82\u2212)",
            "Peroxodicarbonate (C2O62\u2212)",
            "Peroxomonophosphate (H2PO5\u2212)",
            "Peroxodiphosphate (P2O84\u2212)",
            "Peroxomonosulfate radical (SO4\u2022\u2212)",
            "Peroxodisulfate radical (S2O8\u2022\u2212)",
            "Peroxodicarbonate radical (C2O6\u2022\u2212)",
            "Peroxomonophosphate radical (HPO5\u2022\u2212)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide anion radical (O2\u2022\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hypochlorous acid (HOCl)",
                "Ozone (O3)",
                "Peroxynitrite (ONOO\u2212)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Peroxynitrous acid (ONOOH)",
                "Peroxymonocarbonate (HCO4\u2212)",
                "Carbonate radical anion (CO3\u2022\u2212)",
                "Peroxomonosulfate (HSO5\u2212)",
                "Peroxodisulfate (S2O82\u2212)",
                "Peroxodicarbonate (C2O62\u2212)",
                "Peroxomonophosphate (H2PO5\u2212)",
                "Peroxodiphosphate (P2O84\u2212)",
                "Peroxomonosulfate radical (SO4\u2022\u2212)",
                "Peroxodisulfate radical (S2O8\u2022\u2212)",
                "Peroxodicarbonate radical (C2O6\u2022\u2212)",
                "Peroxomonophosphate radical (HPO5\u2022\u2212)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Carbonate radical anion (CO3\u2022\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Ozone (O3)",
                "Peroxodicarbonate (C2O62\u2212)",
                "Peroxodicarbonate radical (C2O6\u2022\u2212)",
                "Peroxodiphosphate (P2O84\u2212)",
                "Peroxodisulfate (S2O82\u2212)",
                "Peroxodisulfate radical (S2O8\u2022\u2212)",
                "Peroxomonophosphate (H2PO5\u2212)",
                "Peroxomonophosphate radical (HPO5\u2022\u2212)",
                "Peroxomonosulfate (HSO5\u2212)",
                "Peroxomonosulfate radical (SO4\u2022\u2212)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxymonocarbonate (HCO4\u2212)",
                "Peroxynitrite (ONOO\u2212)",
                "Peroxynitrous acid (ONOOH)",
                "Singlet oxygen (1O2)",
                "Superoxide anion radical (O2\u2022\u2212)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide (O2\u2022\u2212)",
            "Hydrogen peroxide (H2O2)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (1O2)",
            "Ozone (O3)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hydroperoxyl radical (HO2\u2022)",
            "Hypochlorous acid (HOCl)",
            "Peroxynitrite (ONOO\u2212)",
            "Nitric oxide (NO\u2022)",
            "Nitrogen dioxide (NO2\u2022)",
            "Nitroxyl anion (NO\u2212)",
            "Carbonate radical (CO3\u2022\u2212)",
            "Sulfur dioxide radical anion (SO2\u2022\u2212)",
            "Thiyl radical (RS\u2022)",
            "Disulfide radical anion (RSSR\u2022\u2212)",
            "Trichloromethyl radical (CCl3\u2022)",
            "Trichloromethylperoxyl radical (CCl3OO\u2022)",
            "Dichloromethylperoxyl radical (CH2ClOO\u2022)",
            "Chloromethylperoxyl radical (CH2ClOO\u2022)",
            "Methoxyl radical (CH3O\u2022)",
            "Ethoxyl radical (CH3CH2O\u2022)",
            "Isopropoxyl radical ((CH3)2CHO\u2022)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide (O2\u2022\u2212)",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (1O2)",
                "Ozone (O3)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hypochlorous acid (HOCl)",
                "Peroxynitrite (ONOO\u2212)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Nitroxyl anion (NO\u2212)",
                "Carbonate radical (CO3\u2022\u2212)",
                "Sulfur dioxide radical anion (SO2\u2022\u2212)",
                "Thiyl radical (RS\u2022)",
                "Disulfide radical anion (RSSR\u2022\u2212)",
                "Trichloromethyl radical (CCl3\u2022)",
                "Trichloromethylperoxyl radical (CCl3OO\u2022)",
                "Dichloromethylperoxyl radical (CH2ClOO\u2022)",
                "Chloromethylperoxyl radical (CH2ClOO\u2022)",
                "Methoxyl radical (CH3O\u2022)",
                "Ethoxyl radical (CH3CH2O\u2022)",
                "Isopropoxyl radical ((CH3)2CHO\u2022)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Carbonate radical (CO3\u2022\u2212)",
                "Chloromethylperoxyl radical (CH2ClOO\u2022)",
                "Dichloromethylperoxyl radical (CH2ClOO\u2022)",
                "Disulfide radical anion (RSSR\u2022\u2212)",
                "Ethoxyl radical (CH3CH2O\u2022)",
                "Hydrogen peroxide (H2O2)",
                "Hydroperoxyl radical (HO2\u2022)",
                "Hydroxyl radical (\u2022OH)",
                "Hypochlorous acid (HOCl)",
                "Isopropoxyl radical ((CH3)2CHO\u2022)",
                "Methoxyl radical (CH3O\u2022)",
                "Nitric oxide (NO\u2022)",
                "Nitrogen dioxide (NO2\u2022)",
                "Nitroxyl anion (NO\u2212)",
                "Ozone (O3)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO\u2212)",
                "Singlet oxygen (1O2)",
                "Sulfur dioxide radical anion (SO2\u2022\u2212)",
                "Superoxide (O2\u2022\u2212)",
                "Thiyl radical (RS\u2022)",
                "Trichloromethyl radical (CCl3\u2022)",
                "Trichloromethylperoxyl radical (CCl3OO\u2022)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide (O\u2082\u2022\u207b)",
            "Hydrogen peroxide (H\u2082O\u2082)",
            "Hydroxyl radical (\u2022OH)",
            "Singlet oxygen (\u00b9O\u2082)",
            "Peroxyl radical (ROO\u2022)",
            "Alkoxyl radical (RO\u2022)",
            "Hypochlorous acid (HOCl)",
            "Ozone (O\u2083)",
            "Peroxynitrite (ONOO\u207b)",
            "Nitric oxide (\u2022NO)",
            "Nitrogen dioxide (\u2022NO\u2082)",
            "Lipid hydroperoxide (LOOH)",
            "Organic peroxide (ROOR')",
            "Nitrosoperoxycarbonate (ONOOCO\u2082\u207b)",
            "Carbonate radical (CO\u2083\u2022\u207b)",
            "Sulfur trioxide (SO\u2083)",
            "Chloramines (NHCl\u2082, NHCl)",
            "Bromine (Br\u2082)",
            "Iodine (I\u2082)",
            "Chlorine (Cl\u2082)",
            "Hypobromous acid (HOBr)",
            "Hypochlorous acid (HOCl)",
            "Chlorine dioxide (ClO\u2082)",
            "Nitrogen trioxide (N\u2082O\u2083)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide (O\u2082\u2022\u207b)",
                "Hydrogen peroxide (H\u2082O\u2082)",
                "Hydroxyl radical (\u2022OH)",
                "Singlet oxygen (\u00b9O\u2082)",
                "Peroxyl radical (ROO\u2022)",
                "Alkoxyl radical (RO\u2022)",
                "Hypochlorous acid (HOCl)",
                "Ozone (O\u2083)",
                "Peroxynitrite (ONOO\u207b)",
                "Nitric oxide (\u2022NO)",
                "Nitrogen dioxide (\u2022NO\u2082)",
                "Lipid hydroperoxide (LOOH)",
                "Organic peroxide (ROOR')",
                "Nitrosoperoxycarbonate (ONOOCO\u2082\u207b)",
                "Carbonate radical (CO\u2083\u2022\u207b)",
                "Sulfur trioxide (SO\u2083)",
                "Chloramines (NHCl\u2082, NHCl)",
                "Bromine (Br\u2082)",
                "Iodine (I\u2082)",
                "Chlorine (Cl\u2082)",
                "Hypobromous acid (HOBr)",
                "Chlorine dioxide (ClO\u2082)",
                "Nitrogen trioxide (N\u2082O\u2083)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkoxyl radical (RO\u2022)",
                "Bromine (Br\u2082)",
                "Carbonate radical (CO\u2083\u2022\u207b)",
                "Chloramines (NHCl\u2082, NHCl)",
                "Chlorine (Cl\u2082)",
                "Chlorine dioxide (ClO\u2082)",
                "Hydrogen peroxide (H\u2082O\u2082)",
                "Hydroxyl radical (\u2022OH)",
                "Hypobromous acid (HOBr)",
                "Hypochlorous acid (HOCl)",
                "Iodine (I\u2082)",
                "Lipid hydroperoxide (LOOH)",
                "Nitric oxide (\u2022NO)",
                "Nitrogen dioxide (\u2022NO\u2082)",
                "Nitrogen trioxide (N\u2082O\u2083)",
                "Nitrosoperoxycarbonate (ONOOCO\u2082\u207b)",
                "Organic peroxide (ROOR')",
                "Ozone (O\u2083)",
                "Peroxyl radical (ROO\u2022)",
                "Peroxynitrite (ONOO\u207b)",
                "Singlet oxygen (\u00b9O\u2082)",
                "Sulfur trioxide (SO\u2083)",
                "Superoxide (O\u2082\u2022\u207b)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": "[\n\"Hydrogen peroxide (H2O2)\" (1),\n\"Superoxide (O2\u2022-)\" (2),\n\"Hydroxyl radical (\u2022OH)\" (3),\n\"Perhydroxyl radical (HO2\u2022)\" (4),\n\"Singlet oxygen (1O2)\" (5),\n\"Ozone (O3)\" (6),\n\"Alkoxyl radical (RO\u2022)\" (7),\n\" Peroxyl radical (ROO\u2022)\" (8),\n\"Semiquinone radical (SQ\u2022)\" (9),\n\"Semiquinone radical anion (SQ\u2022-)\" (10),\n\"Quinone radical anion (Q\u2022-)\" (11),\n\"Quinone radical cation (Q\u2022+)\" (12),\n\"Hydroperoxyl radical (HOO\u2022)\" (13),\n\"Phenoxyl radical (PhO\u2022)\" (14),\n\"Phenoxyl radical anion (PhO\u2022-)\" (15),\n\"Phenoxyl radical cation (PhO\u2022+)\" (16),\n\"Chlorine dioxide (ClO2)\" (17),\n\"Chlorine radical (Cl\u2022)\" (18),\n\"Bromine radical (Br\u2022)\" (19),\n\"Iodine radical (I\u2022)\" (20),\n\"Hydrogen sulfide radical (HS\u2022)\" (21),\n\"Thiyl radical (RSH\u2022)\" (22),\n\"Thiyl radical anion (RSH\u2022-)\" (23),\n\"Thiyl radical cation (RSH\u2022+)\" (24)\n]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            "Superoxide (O2-)",
            "Peroxynitrite (ONOO-)",
            "Hydroxyl radical (OH)",
            "Hydrogen peroxide (H2O2)",
            "Singlet oxygen (1O2)",
            "Alkoxyl radical (RO)",
            "Peroxyl radical (ROO)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Hypochlorous acid (HOCl)",
            "Hypobromous acid (HOBr)",
            "Chlorine gas (Cl2)",
            "Ozone (O3)",
            "Lipid peroxide (LOOH)",
            "Aldehyde (R-CHO)",
            "Ketone (R-CO-R)",
            "Tetrachloroethene oxide",
            "Dioxin",
            "Quinone",
            "Semiquinone",
            "Phenoxy radical",
            "Alkylperoxy radical",
            "Dialkylperoxide",
            "Tertiary alkylperoxy radical"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Superoxide (O2-)",
                "Peroxynitrite (ONOO-)",
                "Hydroxyl radical (OH)",
                "Hydrogen peroxide (H2O2)",
                "Singlet oxygen (1O2)",
                "Alkoxyl radical (RO)",
                "Peroxyl radical (ROO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Hypochlorous acid (HOCl)",
                "Hypobromous acid (HOBr)",
                "Chlorine gas (Cl2)",
                "Ozone (O3)",
                "Lipid peroxide (LOOH)",
                "Aldehyde (R-CHO)",
                "Ketone (R-CO-R)",
                "Tetrachloroethene oxide",
                "Dioxin",
                "Quinone",
                "Semiquinone",
                "Phenoxy radical",
                "Alkylperoxy radical",
                "Dialkylperoxide",
                "Tertiary alkylperoxy radical"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldehyde (R-CHO)",
                "Alkoxyl radical (RO)",
                "Alkylperoxy radical",
                "Chlorine gas (Cl2)",
                "Dialkylperoxide",
                "Dioxin",
                "Hydrogen peroxide (H2O2)",
                "Hydroxyl radical (OH)",
                "Hypobromous acid (HOBr)",
                "Hypochlorous acid (HOCl)",
                "Ketone (R-CO-R)",
                "Lipid peroxide (LOOH)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Ozone (O3)",
                "Peroxyl radical (ROO)",
                "Peroxynitrite (ONOO-)",
                "Phenoxy radical",
                "Quinone",
                "Semiquinone",
                "Singlet oxygen (1O2)",
                "Superoxide (O2-)",
                "Tertiary alkylperoxy radical",
                "Tetrachloroethene oxide"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "reactive oxygen species",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "superoxide dismutase",
                "Abbreviated identifier": "SOD"
            },
            {
                "Referent": "catalase",
                "Abbreviated identifier": "CAT"
            },
            {
                "Referent": "glutathione peroxidase",
                "Abbreviated identifier": "GPx"
            },
            {
                "Referent": "thioredoxin reductase",
                "Abbreviated identifier": "TrxR"
            },
            {
                "Referent": "thioredoxin",
                "Abbreviated identifier": "Trx"
            },
            {
                "Referent": "peroxiredoxin",
                "Abbreviated identifier": "Prx"
            },
            {
                "Referent": "glutathione peroxidase",
                "Abbreviated identifier": "GPx"
            },
            {
                "Referent": "glutathione reductase",
                "Abbreviated identifier": "GR"
            },
            {
                "Referent": "thioredoxin reductase 2",
                "Abbreviated identifier": "TrxR2"
            },
            {
                "Referent": "glutathione peroxidase 4",
                "Abbreviated identifier": "GPx4"
            },
            {
                "Referent": "glutathione peroxidase 5",
                "Abbreviated identifier": "GPx5"
            },
            {
                "Referent": "glutathione peroxidase 6",
                "Abbreviated identifier": "GPx6"
            },
            {
                "Referent": "glutathione peroxidase 7",
                "Abbreviated identifier": "GPx7"
            },
            {
                "Referent": "glutathione peroxidase 8",
                "Abbreviated identifier": "GPx8"
            },
            {
                "Referent": "glutathione peroxidase 9",
                "Abbreviated identifier": "GPx9"
            },
            {
                "Referent": "glutathione peroxidase 10",
                "Abbreviated identifier": "GPx10"
            },
            {
                "Referent": "glutathione peroxidase 11",
                "Abbreviated identifier": "GPx11"
            },
            {
                "Referent": "glutathione peroxidase 12",
                "Abbreviated identifier": "GPx12"
            },
            {
                "Referent": "glutathione peroxidase 13",
                "Abbreviated identifier": "GPx13"
            },
            {
                "Referent": "glutathione peroxidase 14",
                "Abbreviated identifier": "GPx14"
            },
            {
                "Referent": "glutathione peroxidase 15",
                "Abbreviated identifier": "GPx15"
            },
            {
                "Referent": "glutathione peroxidase 16",
                "Abbreviated identifier": "GPx16"
            },
            {
                "Referent": "glutathione peroxidase 17",
                "Abbreviated identifier": "GPx17"
            },
            {
                "Referent": "glutathione peroxidase 18",
                "Abbreviated identifier": "GPx18"
            },
            {
                "Referent": "glutathione peroxidase 19",
                "Abbreviated identifier": "GPx19"
            },
            {
                "Referent": "glutathione peroxidase 20",
                "Abbreviated identifier": "GPx20"
            },
            {
                "Referent": "glutathione peroxidase 21",
                "Abbreviated identifier": "GPx21"
            },
            {
                "Referent": "glutathione peroxidase 22",
                "Abbreviated identifier": "GPx22"
            },
            {
                "Referent": "glutathione peroxidase 23",
                "Abbreviated identifier": "GPx23"
            },
            {
                "Referent": "glutathione peroxidase 24",
                "Abbreviated identifier": "GPx24"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "superoxide dismutase",
                "catalase",
                "glutathione peroxidase",
                "thioredoxin reductase",
                "thioredoxin",
                "peroxiredoxin",
                "glutathione reductase",
                "thioredoxin reductase 2",
                "glutathione peroxidase 4",
                "glutathione peroxidase 5",
                "glutathione peroxidase 6",
                "glutathione peroxidase 7",
                "glutathione peroxidase 8",
                "glutathione peroxidase 9",
                "glutathione peroxidase 10",
                "glutathione peroxidase 11",
                "glutathione peroxidase 12",
                "glutathione peroxidase 13",
                "glutathione peroxidase 14",
                "glutathione peroxidase 15",
                "glutathione peroxidase 16",
                "glutathione peroxidase 17",
                "glutathione peroxidase 18"
            ],
            "mismatches": [
                "glutathione peroxidase 19",
                "glutathione peroxidase 20",
                "glutathione peroxidase 21",
                "glutathione peroxidase 22",
                "glutathione peroxidase 23",
                "glutathione peroxidase 24"
            ],
            "true_referents": [
                "{\"Referent\": \"catalase\", \"Abbreviated identifier\": \"CAT\"}",
                "{\"Referent\": \"glutathione peroxidase 10\", \"Abbreviated identifier\": \"GPx10\"}",
                "{\"Referent\": \"glutathione peroxidase 11\", \"Abbreviated identifier\": \"GPx11\"}",
                "{\"Referent\": \"glutathione peroxidase 12\", \"Abbreviated identifier\": \"GPx12\"}",
                "{\"Referent\": \"glutathione peroxidase 13\", \"Abbreviated identifier\": \"GPx13\"}",
                "{\"Referent\": \"glutathione peroxidase 14\", \"Abbreviated identifier\": \"GPx14\"}",
                "{\"Referent\": \"glutathione peroxidase 15\", \"Abbreviated identifier\": \"GPx15\"}",
                "{\"Referent\": \"glutathione peroxidase 16\", \"Abbreviated identifier\": \"GPx16\"}",
                "{\"Referent\": \"glutathione peroxidase 17\", \"Abbreviated identifier\": \"GPx17\"}",
                "{\"Referent\": \"glutathione peroxidase 18\", \"Abbreviated identifier\": \"GPx18\"}",
                "{\"Referent\": \"glutathione peroxidase 4\", \"Abbreviated identifier\": \"GPx4\"}",
                "{\"Referent\": \"glutathione peroxidase 5\", \"Abbreviated identifier\": \"GPx5\"}",
                "{\"Referent\": \"glutathione peroxidase 6\", \"Abbreviated identifier\": \"GPx6\"}",
                "{\"Referent\": \"glutathione peroxidase 7\", \"Abbreviated identifier\": \"GPx7\"}",
                "{\"Referent\": \"glutathione peroxidase 8\", \"Abbreviated identifier\": \"GPx8\"}",
                "{\"Referent\": \"glutathione peroxidase 9\", \"Abbreviated identifier\": \"GPx9\"}",
                "{\"Referent\": \"glutathione peroxidase\", \"Abbreviated identifier\": \"GPx\"}",
                "{\"Referent\": \"glutathione reductase\", \"Abbreviated identifier\": \"GR\"}",
                "{\"Referent\": \"peroxiredoxin\", \"Abbreviated identifier\": \"Prx\"}",
                "{\"Referent\": \"superoxide dismutase\", \"Abbreviated identifier\": \"SOD\"}",
                "{\"Referent\": \"thioredoxin reductase 2\", \"Abbreviated identifier\": \"TrxR2\"}",
                "{\"Referent\": \"thioredoxin reductase\", \"Abbreviated identifier\": \"TrxR\"}",
                "{\"Referent\": \"thioredoxin\", \"Abbreviated identifier\": \"Trx\"}"
            ],
            "TP": 23,
            "FP": 6,
            "FN": 0
        }
    }
]