[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Photosynthesis (PS)",
            "Respiration (R)",
            "Combustion of fossil fuels (CF)",
            "Rust formation (RF)",
            "Corrosion of metals (CM)",
            "Browning of cut apples (BOCA)",
            "Ethanol fermentation (EF)",
            "Decomposition of organic matter (DOM)",
            "Synthesis of epoxides (SE)",
            "Nitric oxide production (NOP)",
            "Ozonolysis of alkenes (OA)",
            "Hydroxylation of aromatic compounds (HAC)",
            "Dissolution of hydrogen peroxide (DHP)",
            "Chlorination of organic compounds (COOC)",
            "Synthesis of vitamin C (SVC)",
            "Metabolism of ethanol (ME)",
            "Formation of peroxyacetyl nitrate (PAN)",
            "Synthesis of adrenaline (SA)",
            "Degradation of dyes (DD)",
            "Production of hydrogen peroxide (PHP)",
            "Synthesis of aspirin (SAI)",
            "Oxidation of cholesterol (OC)",
            "Formation of acrolein (FA)",
            "Synthesis of formaldehyde (SF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Photosynthesis (PS)",
                "Respiration (R)",
                "Combustion of fossil fuels (CF)",
                "Rust formation (RF)",
                "Corrosion of metals (CM)",
                "Browning of cut apples (BOCA)",
                "Ethanol fermentation (EF)",
                "Decomposition of organic matter (DOM)",
                "Synthesis of epoxides (SE)",
                "Nitric oxide production (NOP)",
                "Ozonolysis of alkenes (OA)",
                "Hydroxylation of aromatic compounds (HAC)",
                "Dissolution of hydrogen peroxide (DHP)",
                "Chlorination of organic compounds (COOC)",
                "Synthesis of vitamin C (SVC)",
                "Metabolism of ethanol (ME)",
                "Formation of peroxyacetyl nitrate (PAN)",
                "Synthesis of adrenaline (SA)",
                "Degradation of dyes (DD)",
                "Production of hydrogen peroxide (PHP)",
                "Synthesis of aspirin (SAI)",
                "Oxidation of cholesterol (OC)",
                "Formation of acrolein (FA)",
                "Synthesis of formaldehyde (SF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Browning of cut apples (BOCA)",
                "Chlorination of organic compounds (COOC)",
                "Combustion of fossil fuels (CF)",
                "Corrosion of metals (CM)",
                "Decomposition of organic matter (DOM)",
                "Degradation of dyes (DD)",
                "Dissolution of hydrogen peroxide (DHP)",
                "Ethanol fermentation (EF)",
                "Formation of acrolein (FA)",
                "Formation of peroxyacetyl nitrate (PAN)",
                "Hydroxylation of aromatic compounds (HAC)",
                "Metabolism of ethanol (ME)",
                "Nitric oxide production (NOP)",
                "Oxidation of cholesterol (OC)",
                "Ozonolysis of alkenes (OA)",
                "Photosynthesis (PS)",
                "Production of hydrogen peroxide (PHP)",
                "Respiration (R)",
                "Rust formation (RF)",
                "Synthesis of adrenaline (SA)",
                "Synthesis of aspirin (SAI)",
                "Synthesis of epoxides (SE)",
                "Synthesis of formaldehyde (SF)",
                "Synthesis of vitamin C (SVC)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Cellular respiration (Catabolism)",
            "Oxidative phosphorylation",
            "Krebs cycle (Citric Acid Cycle)",
            "Beta-oxidation of fatty acids",
            "Oxidative stress",
            "Lipid peroxidation",
            "Glucose oxidation in glycolysis",
            "Oxidation of ethanol to acetaldehyde",
            "Oxidation of methanol to formaldehyde",
            "Oxidative decarboxylation of pyruvate",
            "Oxidation of isocitrate to alpha-ketoglutarate",
            "Oxidation of alpha-ketoglutarate to succinyl-CoA",
            "Oxidation of succinate to fumarate",
            "Oxidation of malate to oxaloacetate",
            "Oxidation of NADH to NAD+",
            "Oxidation of FADH2 to FAD",
            "Oxidative deamination of amino acids",
            "Oxidative phosphorylation in mitochondria",
            "Oxidative burst in phagocytes",
            "Oxidative cleavage of alkenes",
            "Oxidation of alcohols to aldehydes/ketones",
            "Oxidation of aldehydes to carboxylic acids",
            "Oxidation of sulfides to sulfoxides",
            "Oxidation of sulfides to sulfones",
            "Oxidation of thiols to disulfides"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cellular respiration (Catabolism)",
                "Oxidative phosphorylation",
                "Krebs cycle (Citric Acid Cycle)",
                "Beta-oxidation of fatty acids",
                "Oxidative stress",
                "Lipid peroxidation",
                "Glucose oxidation in glycolysis",
                "Oxidation of ethanol to acetaldehyde",
                "Oxidation of methanol to formaldehyde",
                "Oxidative decarboxylation of pyruvate",
                "Oxidation of isocitrate to alpha-ketoglutarate",
                "Oxidation of alpha-ketoglutarate to succinyl-CoA",
                "Oxidation of succinate to fumarate",
                "Oxidation of malate to oxaloacetate",
                "Oxidation of NADH to NAD+",
                "Oxidation of FADH2 to FAD",
                "Oxidative deamination of amino acids",
                "Oxidative phosphorylation in mitochondria",
                "Oxidative burst in phagocytes",
                "Oxidative cleavage of alkenes",
                "Oxidation of alcohols to aldehydes/ketones",
                "Oxidation of aldehydes to carboxylic acids",
                "Oxidation of sulfides to sulfoxides",
                "Oxidation of sulfides to sulfones"
            ],
            "mismatches": [
                "Oxidation of thiols to disulfides"
            ],
            "true_referents": [
                "Beta-oxidation of fatty acids",
                "Cellular respiration (Catabolism)",
                "Glucose oxidation in glycolysis",
                "Krebs cycle (Citric Acid Cycle)",
                "Lipid peroxidation",
                "Oxidation of FADH2 to FAD",
                "Oxidation of NADH to NAD+",
                "Oxidation of alcohols to aldehydes/ketones",
                "Oxidation of aldehydes to carboxylic acids",
                "Oxidation of alpha-ketoglutarate to succinyl-CoA",
                "Oxidation of ethanol to acetaldehyde",
                "Oxidation of isocitrate to alpha-ketoglutarate",
                "Oxidation of malate to oxaloacetate",
                "Oxidation of methanol to formaldehyde",
                "Oxidation of succinate to fumarate",
                "Oxidation of sulfides to sulfones",
                "Oxidation of sulfides to sulfoxides",
                "Oxidative burst in phagocytes",
                "Oxidative cleavage of alkenes",
                "Oxidative deamination of amino acids",
                "Oxidative decarboxylation of pyruvate",
                "Oxidative phosphorylation",
                "Oxidative phosphorylation in mitochondria",
                "Oxidative stress"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of Methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
            "Rusting of Iron (4Fe + 3O2 \u2192 2Fe2O3)",
            "Oxidation of Glucose (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
            "Oxidation of Alcohol to Aldehyde or Ketone (RCH2OH + [O] \u2192 RCHO + H2O)",
            "Oxidation of Alcohol to Carboxylic Acid (RCH2OH + [O] \u2192 RCOOH + H2O)",
            "Oxidation of Sulfur to Sulfur Dioxide (S + O2 \u2192 SO2)",
            "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2 + 1/2O2 \u2192 SO3)",
            "Oxidation of Ammonia to Nitric Acid (4NH3 + 5O2 \u2192 4NO + 6H2O)",
            "Oxidation of Carbon Monoxide to Carbon Dioxide (2CO + O2 \u2192 2CO2)",
            "Oxidation of Hydrogen Peroxide to Oxygen and Water (2H2O2 \u2192 2H2O + O2)",
            "Oxidation of Ascorbic Acid (Vitamin C) (C6H8O6 \u2192 C6H6O6 + 2H+ + 2e-)",
            "Oxidation of Lactic Acid to Pyruvic Acid (C3H6O3 \u2192 C3H4O3 + 2H+ + 2e-)",
            "Oxidation of Ethanol to Acetaldehyde (C2H5OH + [O] \u2192 CH3CHO + H2O)",
            "Oxidation of Ethanol to Acetic Acid (C2H5OH + [O] \u2192 CH3COOH + H2O)",
            "Oxidation of Methanol to Formaldehyde (CH3OH + [O] \u2192 HCHO + H2O)",
            "Oxidation of Methanol to Formic Acid (CH3OH + [O] \u2192 HCOOH + H2O)",
            "Oxidation of Butane to Butanol (C4H10 + [O] \u2192 C4H9OH)",
            "Oxidation of Butanol to Butanal (C4H9OH + [O] \u2192 C4H8O)",
            "Oxidation of Butanal to Butanoic Acid (C4H8O + [O] \u2192 C4H8O2)",
            "Oxidation of Propene to Propene Oxide (C3H6 + [O] \u2192 C3H6O)",
            "Oxidation of Propene Oxide to Propionaldehyde (C3H6O + [O] \u2192 C3H6O2)",
            "Oxidation of Propionaldehyde to Propanoic Acid (C3H6O2 + [O] \u2192 C3H6O3)",
            "Oxidation of Ethene to Ethene Oxide (C2H4 + [O] \u2192 C2H4O)",
            "Oxidation of Ethene Oxide to Ethanal (C2H4O + [O] \u2192 C2H4O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of Methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
                "Rusting of Iron (4Fe + 3O2 \u2192 2Fe2O3)",
                "Oxidation of Glucose (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
                "Oxidation of Alcohol to Aldehyde or Ketone (RCH2OH + [O] \u2192 RCHO + H2O)",
                "Oxidation of Alcohol to Carboxylic Acid (RCH2OH + [O] \u2192 RCOOH + H2O)",
                "Oxidation of Sulfur to Sulfur Dioxide (S + O2 \u2192 SO2)",
                "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2 + 1/2O2 \u2192 SO3)",
                "Oxidation of Ammonia to Nitric Acid (4NH3 + 5O2 \u2192 4NO + 6H2O)",
                "Oxidation of Carbon Monoxide to Carbon Dioxide (2CO + O2 \u2192 2CO2)",
                "Oxidation of Hydrogen Peroxide to Oxygen and Water (2H2O2 \u2192 2H2O + O2)",
                "Oxidation of Ascorbic Acid (Vitamin C) (C6H8O6 \u2192 C6H6O6 + 2H+ + 2e-)",
                "Oxidation of Lactic Acid to Pyruvic Acid (C3H6O3 \u2192 C3H4O3 + 2H+ + 2e-)",
                "Oxidation of Ethanol to Acetaldehyde (C2H5OH + [O] \u2192 CH3CHO + H2O)",
                "Oxidation of Ethanol to Acetic Acid (C2H5OH + [O] \u2192 CH3COOH + H2O)",
                "Oxidation of Methanol to Formaldehyde (CH3OH + [O] \u2192 HCHO + H2O)",
                "Oxidation of Methanol to Formic Acid (CH3OH + [O] \u2192 HCOOH + H2O)",
                "Oxidation of Butane to Butanol (C4H10 + [O] \u2192 C4H9OH)",
                "Oxidation of Butanol to Butanal (C4H9OH + [O] \u2192 C4H8O)",
                "Oxidation of Butanal to Butanoic Acid (C4H8O + [O] \u2192 C4H8O2)",
                "Oxidation of Propene to Propene Oxide (C3H6 + [O] \u2192 C3H6O)",
                "Oxidation of Propene Oxide to Propionaldehyde (C3H6O + [O] \u2192 C3H6O2)",
                "Oxidation of Propionaldehyde to Propanoic Acid (C3H6O2 + [O] \u2192 C3H6O3)",
                "Oxidation of Ethene to Ethene Oxide (C2H4 + [O] \u2192 C2H4O)",
                "Oxidation of Ethene Oxide to Ethanal (C2H4O + [O] \u2192 C2H4O2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Combustion of Methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
                "Oxidation of Alcohol to Aldehyde or Ketone (RCH2OH + [O] \u2192 RCHO + H2O)",
                "Oxidation of Alcohol to Carboxylic Acid (RCH2OH + [O] \u2192 RCOOH + H2O)",
                "Oxidation of Ammonia to Nitric Acid (4NH3 + 5O2 \u2192 4NO + 6H2O)",
                "Oxidation of Ascorbic Acid (Vitamin C) (C6H8O6 \u2192 C6H6O6 + 2H+ + 2e-)",
                "Oxidation of Butanal to Butanoic Acid (C4H8O + [O] \u2192 C4H8O2)",
                "Oxidation of Butane to Butanol (C4H10 + [O] \u2192 C4H9OH)",
                "Oxidation of Butanol to Butanal (C4H9OH + [O] \u2192 C4H8O)",
                "Oxidation of Carbon Monoxide to Carbon Dioxide (2CO + O2 \u2192 2CO2)",
                "Oxidation of Ethanol to Acetaldehyde (C2H5OH + [O] \u2192 CH3CHO + H2O)",
                "Oxidation of Ethanol to Acetic Acid (C2H5OH + [O] \u2192 CH3COOH + H2O)",
                "Oxidation of Ethene Oxide to Ethanal (C2H4O + [O] \u2192 C2H4O2)",
                "Oxidation of Ethene to Ethene Oxide (C2H4 + [O] \u2192 C2H4O)",
                "Oxidation of Glucose (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
                "Oxidation of Hydrogen Peroxide to Oxygen and Water (2H2O2 \u2192 2H2O + O2)",
                "Oxidation of Lactic Acid to Pyruvic Acid (C3H6O3 \u2192 C3H4O3 + 2H+ + 2e-)",
                "Oxidation of Methanol to Formaldehyde (CH3OH + [O] \u2192 HCHO + H2O)",
                "Oxidation of Methanol to Formic Acid (CH3OH + [O] \u2192 HCOOH + H2O)",
                "Oxidation of Propene Oxide to Propionaldehyde (C3H6O + [O] \u2192 C3H6O2)",
                "Oxidation of Propene to Propene Oxide (C3H6 + [O] \u2192 C3H6O)",
                "Oxidation of Propionaldehyde to Propanoic Acid (C3H6O2 + [O] \u2192 C3H6O3)",
                "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2 + 1/2O2 \u2192 SO3)",
                "Oxidation of Sulfur to Sulfur Dioxide (S + O2 \u2192 SO2)",
                "Rusting of Iron (4Fe + 3O2 \u2192 2Fe2O3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Cellular Respiration (Glycolysis, Krebs Cycle, ETC)",
            "Hemoglobin Oxidation (Hb -> MetHb)",
            "Lipid Peroxidation (LPO)",
            "Glucose Oxidation (C6H12O6 + 6O2 -> 6CO2 + 6H2O)",
            "Iron Rusting (Fe -> Fe2O3)",
            "Combustion (e.g., burning wood)",
            "Bleaching with Hydrogen Peroxide (H2O2)",
            "Wine Aging (Ethanol oxidation)",
            "Fruit Browning (Polyphenol oxidase)",
            "Formation of Free Radicals (ROS)",
            "Mitochondrial Dysfunction (increased ROS)",
            "Myeloperoxidase Activity (Neutrophil defense)",
            "Dopamine to DOPAC conversion (Monoamine Oxidase)",
            "GSH Oxidation (Glutathione)",
            "Cholesterol Oxidation (Cholesterol -> Cholestane)",
            "Aldehyde Dehydrogenase Activity (Ethanol metabolism)",
            "Xanthine Oxidase Activity (Uric acid formation)",
            "Cytochrome P450 Metabolism (Drug metabolism)",
            "Autooxidation of Unsaturated Fats",
            "Copper Oxidation (Cu -> CuO)",
            "Silver Tarnish (Ag -> Ag2S)",
            "Oxidative Stress in Neurodegenerative Diseases",
            "Photooxidation of Chlorophyll (Plant senescence)",
            "Lignin Degradation (in wood)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cellular Respiration (Glycolysis, Krebs Cycle, ETC)",
                "Hemoglobin Oxidation (Hb -> MetHb)",
                "Lipid Peroxidation (LPO)",
                "Glucose Oxidation (C6H12O6 + 6O2 -> 6CO2 + 6H2O)",
                "Iron Rusting (Fe -> Fe2O3)",
                "Combustion (e.g., burning wood)",
                "Bleaching with Hydrogen Peroxide (H2O2)",
                "Wine Aging (Ethanol oxidation)",
                "Fruit Browning (Polyphenol oxidase)",
                "Formation of Free Radicals (ROS)",
                "Mitochondrial Dysfunction (increased ROS)",
                "Myeloperoxidase Activity (Neutrophil defense)",
                "Dopamine to DOPAC conversion (Monoamine Oxidase)",
                "GSH Oxidation (Glutathione)",
                "Cholesterol Oxidation (Cholesterol -> Cholestane)",
                "Aldehyde Dehydrogenase Activity (Ethanol metabolism)",
                "Xanthine Oxidase Activity (Uric acid formation)",
                "Cytochrome P450 Metabolism (Drug metabolism)",
                "Autooxidation of Unsaturated Fats",
                "Copper Oxidation (Cu -> CuO)",
                "Silver Tarnish (Ag -> Ag2S)",
                "Oxidative Stress in Neurodegenerative Diseases",
                "Photooxidation of Chlorophyll (Plant senescence)",
                "Lignin Degradation (in wood)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldehyde Dehydrogenase Activity (Ethanol metabolism)",
                "Autooxidation of Unsaturated Fats",
                "Bleaching with Hydrogen Peroxide (H2O2)",
                "Cellular Respiration (Glycolysis, Krebs Cycle, ETC)",
                "Cholesterol Oxidation (Cholesterol -> Cholestane)",
                "Combustion (e.g., burning wood)",
                "Copper Oxidation (Cu -> CuO)",
                "Cytochrome P450 Metabolism (Drug metabolism)",
                "Dopamine to DOPAC conversion (Monoamine Oxidase)",
                "Formation of Free Radicals (ROS)",
                "Fruit Browning (Polyphenol oxidase)",
                "GSH Oxidation (Glutathione)",
                "Glucose Oxidation (C6H12O6 + 6O2 -> 6CO2 + 6H2O)",
                "Hemoglobin Oxidation (Hb -> MetHb)",
                "Iron Rusting (Fe -> Fe2O3)",
                "Lignin Degradation (in wood)",
                "Lipid Peroxidation (LPO)",
                "Mitochondrial Dysfunction (increased ROS)",
                "Myeloperoxidase Activity (Neutrophil defense)",
                "Oxidative Stress in Neurodegenerative Diseases",
                "Photooxidation of Chlorophyll (Plant senescence)",
                "Silver Tarnish (Ag -> Ag2S)",
                "Wine Aging (Ethanol oxidation)",
                "Xanthine Oxidase Activity (Uric acid formation)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of hydrocarbons",
            "Rusting of iron",
            "Metabolism of glucose",
            "Formation of disulfide bonds",
            "Oxidation of alcohols to aldehydes",
            "Oxidation of aldehydes to carboxylic acids",
            "Oxidative phosphorylation",
            "Photosynthesis light reactions",
            "Ozone formation",
            "Bleaching with hydrogen peroxide",
            "Oxidation of NADH to NAD+",
            "Oxidation of FADH2 to FAD",
            "Respiratory burst in phagocytes",
            "Oxidation of sulfides to sulfoxides",
            "Oxidation of ammonia to nitrate",
            "Oxidation of thiols to disulfides",
            "Oxidation of ascorbic acid",
            "Oxidation of cholesterol",
            "Oxidation of lignin",
            "Oxidation of catecholamines",
            "Krebs cycle reactions",
            "Oxidation of fatty acids",
            "Oxidation of phenols",
            "Oxidation of primary amines to nitro compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of hydrocarbons",
                "Rusting of iron",
                "Metabolism of glucose",
                "Formation of disulfide bonds",
                "Oxidation of alcohols to aldehydes",
                "Oxidation of aldehydes to carboxylic acids",
                "Oxidative phosphorylation",
                "Photosynthesis light reactions",
                "Ozone formation",
                "Bleaching with hydrogen peroxide",
                "Oxidation of NADH to NAD+",
                "Oxidation of FADH2 to FAD",
                "Respiratory burst in phagocytes",
                "Oxidation of sulfides to sulfoxides",
                "Oxidation of ammonia to nitrate",
                "Oxidation of thiols to disulfides",
                "Oxidation of ascorbic acid",
                "Oxidation of cholesterol",
                "Oxidation of lignin",
                "Oxidation of catecholamines",
                "Krebs cycle reactions",
                "Oxidation of fatty acids",
                "Oxidation of phenols",
                "Oxidation of primary amines to nitro compounds"
            ],
            "mismatches": [],
            "true_referents": [
                "Bleaching with hydrogen peroxide",
                "Combustion of hydrocarbons",
                "Formation of disulfide bonds",
                "Krebs cycle reactions",
                "Metabolism of glucose",
                "Oxidation of FADH2 to FAD",
                "Oxidation of NADH to NAD+",
                "Oxidation of alcohols to aldehydes",
                "Oxidation of aldehydes to carboxylic acids",
                "Oxidation of ammonia to nitrate",
                "Oxidation of ascorbic acid",
                "Oxidation of catecholamines",
                "Oxidation of cholesterol",
                "Oxidation of fatty acids",
                "Oxidation of lignin",
                "Oxidation of phenols",
                "Oxidation of primary amines to nitro compounds",
                "Oxidation of sulfides to sulfoxides",
                "Oxidation of thiols to disulfides",
                "Oxidative phosphorylation",
                "Ozone formation",
                "Photosynthesis light reactions",
                "Respiratory burst in phagocytes",
                "Rusting of iron"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of Hydrocarbons (CHC)",
            "Rusting of Iron (Fe-Ox)",
            "Oxidation of Glucose in Cellular Respiration (C6H12O6-CR)",
            "Hydrogen Peroxide Decomposition (H2O2-D)",
            "Oxidation of Ethanol to Acetaldehyde (C2H5OH-Ox)",
            "Oxidation of Fatty Acids in Beta-Oxidation (FA-Ox)",
            "Photosynthesis: Water Oxidation (H2O-PS)",
            "Oxidation of Lactic Acid to Pyruvate (C3H6O3-Ox)",
            "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2-Ox)",
            "Oxidation of Ammonia to Nitrite (NH3-Ox)",
            "Oxidation of Glutamate to Alpha-Ketoglutarate (C5H9NO4-Ox)",
            "Oxidation of Cholesterol to Bile Acids (C27H46O-Ox)",
            "Oxidation of Ascorbic Acid to Dehydroascorbic Acid (C6H8O6-Ox)",
            "Oxidation of Uric Acid to Allantoin (C5H4N4O3-Ox)",
            "Oxidation of Aldehydes to Carboxylic Acids (RCHO-Ox)",
            "Oxidation of Methane to Methanol (CH4-Ox)",
            "Oxidation of Ferrous Iron to Ferric Iron (Fe2+-Fe3+)",
            "Oxidation of Organic Compounds in Ozone Layer (O3-Ox)",
            "Oxidation of Phenols to Quinones (C6H5OH-Ox)",
            "Oxidation of Glutathione to Oxidized Glutathione (GSH-Ox)",
            "Oxidation of Dihydroxyacetone Phosphate to Glyceraldehyde-3-Phosphate (DHAP-Ox)",
            "Oxidation of Hydroquinone to Benzoquinone (C6H4O2-Ox)",
            "Oxidation of Sucrose to Invert Sugar (C12H22O11-Ox)",
            "Oxidation of Acetone to Acetic Acid (C3H6O-Ox)",
            "Oxidation of 2-Propanol to Acetone (C3H8O-Ox)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of Hydrocarbons (CHC)",
                "Rusting of Iron (Fe-Ox)",
                "Oxidation of Glucose in Cellular Respiration (C6H12O6-CR)",
                "Hydrogen Peroxide Decomposition (H2O2-D)",
                "Oxidation of Ethanol to Acetaldehyde (C2H5OH-Ox)",
                "Oxidation of Fatty Acids in Beta-Oxidation (FA-Ox)",
                "Photosynthesis: Water Oxidation (H2O-PS)",
                "Oxidation of Lactic Acid to Pyruvate (C3H6O3-Ox)",
                "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2-Ox)",
                "Oxidation of Ammonia to Nitrite (NH3-Ox)",
                "Oxidation of Glutamate to Alpha-Ketoglutarate (C5H9NO4-Ox)",
                "Oxidation of Cholesterol to Bile Acids (C27H46O-Ox)",
                "Oxidation of Ascorbic Acid to Dehydroascorbic Acid (C6H8O6-Ox)",
                "Oxidation of Uric Acid to Allantoin (C5H4N4O3-Ox)",
                "Oxidation of Aldehydes to Carboxylic Acids (RCHO-Ox)",
                "Oxidation of Methane to Methanol (CH4-Ox)",
                "Oxidation of Ferrous Iron to Ferric Iron (Fe2+-Fe3+)",
                "Oxidation of Organic Compounds in Ozone Layer (O3-Ox)",
                "Oxidation of Phenols to Quinones (C6H5OH-Ox)",
                "Oxidation of Glutathione to Oxidized Glutathione (GSH-Ox)",
                "Oxidation of Dihydroxyacetone Phosphate to Glyceraldehyde-3-Phosphate (DHAP-Ox)",
                "Oxidation of Hydroquinone to Benzoquinone (C6H4O2-Ox)",
                "Oxidation of Sucrose to Invert Sugar (C12H22O11-Ox)",
                "Oxidation of Acetone to Acetic Acid (C3H6O-Ox)"
            ],
            "mismatches": [
                "Oxidation of 2-Propanol to Acetone (C3H8O-Ox)"
            ],
            "true_referents": [
                "Combustion of Hydrocarbons (CHC)",
                "Hydrogen Peroxide Decomposition (H2O2-D)",
                "Oxidation of Acetone to Acetic Acid (C3H6O-Ox)",
                "Oxidation of Aldehydes to Carboxylic Acids (RCHO-Ox)",
                "Oxidation of Ammonia to Nitrite (NH3-Ox)",
                "Oxidation of Ascorbic Acid to Dehydroascorbic Acid (C6H8O6-Ox)",
                "Oxidation of Cholesterol to Bile Acids (C27H46O-Ox)",
                "Oxidation of Dihydroxyacetone Phosphate to Glyceraldehyde-3-Phosphate (DHAP-Ox)",
                "Oxidation of Ethanol to Acetaldehyde (C2H5OH-Ox)",
                "Oxidation of Fatty Acids in Beta-Oxidation (FA-Ox)",
                "Oxidation of Ferrous Iron to Ferric Iron (Fe2+-Fe3+)",
                "Oxidation of Glucose in Cellular Respiration (C6H12O6-CR)",
                "Oxidation of Glutamate to Alpha-Ketoglutarate (C5H9NO4-Ox)",
                "Oxidation of Glutathione to Oxidized Glutathione (GSH-Ox)",
                "Oxidation of Hydroquinone to Benzoquinone (C6H4O2-Ox)",
                "Oxidation of Lactic Acid to Pyruvate (C3H6O3-Ox)",
                "Oxidation of Methane to Methanol (CH4-Ox)",
                "Oxidation of Organic Compounds in Ozone Layer (O3-Ox)",
                "Oxidation of Phenols to Quinones (C6H5OH-Ox)",
                "Oxidation of Sucrose to Invert Sugar (C12H22O11-Ox)",
                "Oxidation of Sulfur Dioxide to Sulfur Trioxide (SO2-Ox)",
                "Oxidation of Uric Acid to Allantoin (C5H4N4O3-Ox)",
                "Photosynthesis: Water Oxidation (H2O-PS)",
                "Rusting of Iron (Fe-Ox)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of methane (CH4)",
            "Rusting of iron (Fe)",
            "Oxidation of hydrogen peroxide (H2O2)",
            "Oxidation of glucose in cellular respiration",
            "Oxidation of ethanol (C2H5OH) in the liver",
            "Bleaching of hair with hydrogen peroxide (H2O2)",
            "Oxidation of sodium (Na) in air",
            "Oxidation of ascorbic acid (vitamin C)",
            "Oxidation of NADH in electron transport chain",
            "Oxidation of sulfite to sulfate in wine",
            "Oxidation of nitrogen (N2) to nitrate in the nitrogen cycle",
            "Oxidation of pyruvate in Krebs cycle",
            "Oxidation of fatty acids in beta-oxidation",
            "Oxidation of ammonia (NH3) in the Ostwald process",
            "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
            "Oxidation of sulfur dioxide (SO2) in the production of sulfuric acid",
            "Oxidation of water (H2O) in photosynthesis",
            "Oxidation of hydrogen sulfide (H2S) in the Claus process",
            "Oxidation of benzene (C6H6) to phenol",
            "Oxidation of acetone (C3H6O) to acetic acid",
            "Oxidation of propene (C3H6) to acrylic acid",
            "Oxidation of ethylene (C2H4) to ethylene oxide",
            "Oxidation of iodide (I-) to iodine (I2) in the iodine clock reaction",
            "Oxidation of hydroquinone (C6H4(OH)2) to benzoquinone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of methane (CH4)",
                "Rusting of iron (Fe)",
                "Oxidation of hydrogen peroxide (H2O2)",
                "Oxidation of glucose in cellular respiration",
                "Oxidation of ethanol (C2H5OH) in the liver",
                "Bleaching of hair with hydrogen peroxide (H2O2)",
                "Oxidation of sodium (Na) in air",
                "Oxidation of ascorbic acid (vitamin C)",
                "Oxidation of NADH in electron transport chain",
                "Oxidation of sulfite to sulfate in wine",
                "Oxidation of nitrogen (N2) to nitrate in the nitrogen cycle",
                "Oxidation of pyruvate in Krebs cycle",
                "Oxidation of fatty acids in beta-oxidation",
                "Oxidation of ammonia (NH3) in the Ostwald process",
                "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
                "Oxidation of sulfur dioxide (SO2) in the production of sulfuric acid",
                "Oxidation of water (H2O) in photosynthesis",
                "Oxidation of hydrogen sulfide (H2S) in the Claus process",
                "Oxidation of benzene (C6H6) to phenol",
                "Oxidation of acetone (C3H6O) to acetic acid",
                "Oxidation of propene (C3H6) to acrylic acid",
                "Oxidation of ethylene (C2H4) to ethylene oxide",
                "Oxidation of iodide (I-) to iodine (I2) in the iodine clock reaction",
                "Oxidation of hydroquinone (C6H4(OH)2) to benzoquinone"
            ],
            "mismatches": [],
            "true_referents": [
                "Bleaching of hair with hydrogen peroxide (H2O2)",
                "Combustion of methane (CH4)",
                "Oxidation of NADH in electron transport chain",
                "Oxidation of acetone (C3H6O) to acetic acid",
                "Oxidation of ammonia (NH3) in the Ostwald process",
                "Oxidation of ascorbic acid (vitamin C)",
                "Oxidation of benzene (C6H6) to phenol",
                "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
                "Oxidation of ethanol (C2H5OH) in the liver",
                "Oxidation of ethylene (C2H4) to ethylene oxide",
                "Oxidation of fatty acids in beta-oxidation",
                "Oxidation of glucose in cellular respiration",
                "Oxidation of hydrogen peroxide (H2O2)",
                "Oxidation of hydrogen sulfide (H2S) in the Claus process",
                "Oxidation of hydroquinone (C6H4(OH)2) to benzoquinone",
                "Oxidation of iodide (I-) to iodine (I2) in the iodine clock reaction",
                "Oxidation of nitrogen (N2) to nitrate in the nitrogen cycle",
                "Oxidation of propene (C3H6) to acrylic acid",
                "Oxidation of pyruvate in Krebs cycle",
                "Oxidation of sodium (Na) in air",
                "Oxidation of sulfite to sulfate in wine",
                "Oxidation of sulfur dioxide (SO2) in the production of sulfuric acid",
                "Oxidation of water (H2O) in photosynthesis",
                "Rusting of iron (Fe)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of gasoline (combustion_gasoline)",
            "Rusting of iron (rusting_iron)",
            "Browning of an apple (browning_apple)",
            "Oxidation of glucose in cellular respiration (oxidation_glucose)",
            "Oxidation of fats in the body (oxidation_fats)",
            "Oxidation of alcohol in the liver (oxidation_alcohol)",
            "Oxidation of sulfur to form sulfur dioxide (oxidation_sulfur)",
            "Oxidation of copper to form copper oxide (oxidation_copper)",
            "Oxidation of magnesium to form magnesium oxide (oxidation_magnesium)",
            "Oxidation of hydrogen peroxide to form water and oxygen (oxidation_hydrogen_peroxide)",
            "Oxidation of ethanol to form acetic acid (oxidation_ethanol)",
            "Oxidation of carbon monoxide to form carbon dioxide (oxidation_carbon_monoxide)",
            "Oxidation of methane to form carbon dioxide and water (oxidation_methane)",
            "Oxidation of ethylene to form ethylene oxide (oxidation_ethylene)",
            "Oxidation of ammonia to form nitrogen dioxide (oxidation_ammonia)",
            "Oxidation of sulfur dioxide to form sulfur trioxide (oxidation_sulfur_dioxide)",
            "Oxidation of benzene to form phenol (oxidation_benzene)",
            "Oxidation of aldehydes to form carboxylic acids (oxidation_aldehydes)",
            "Oxidation of primary alcohols to form aldehydes (oxidation_primary_alcohols)",
            "Oxidation of secondary alcohols to form ketones (oxidation_secondary_alcohols)",
            "Oxidation of tertiary alcohols to form ketones (oxidation_tertiary_alcohols)",
            "Oxidation of alkenes to form diols (oxidation_alkenes)",
            "Oxidation of aldehydes to form carboxylic acids (oxidation_aldehydes)",
            "Oxidation of primary amines to form nitro compounds (oxidation_primary_amines)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of gasoline (combustion_gasoline)",
                "Rusting of iron (rusting_iron)",
                "Browning of an apple (browning_apple)",
                "Oxidation of glucose in cellular respiration (oxidation_glucose)",
                "Oxidation of fats in the body (oxidation_fats)",
                "Oxidation of alcohol in the liver (oxidation_alcohol)",
                "Oxidation of sulfur to form sulfur dioxide (oxidation_sulfur)",
                "Oxidation of copper to form copper oxide (oxidation_copper)",
                "Oxidation of magnesium to form magnesium oxide (oxidation_magnesium)",
                "Oxidation of hydrogen peroxide to form water and oxygen (oxidation_hydrogen_peroxide)",
                "Oxidation of ethanol to form acetic acid (oxidation_ethanol)",
                "Oxidation of carbon monoxide to form carbon dioxide (oxidation_carbon_monoxide)",
                "Oxidation of methane to form carbon dioxide and water (oxidation_methane)",
                "Oxidation of ethylene to form ethylene oxide (oxidation_ethylene)",
                "Oxidation of ammonia to form nitrogen dioxide (oxidation_ammonia)",
                "Oxidation of sulfur dioxide to form sulfur trioxide (oxidation_sulfur_dioxide)",
                "Oxidation of benzene to form phenol (oxidation_benzene)",
                "Oxidation of aldehydes to form carboxylic acids (oxidation_aldehydes)",
                "Oxidation of primary alcohols to form aldehydes (oxidation_primary_alcohols)",
                "Oxidation of secondary alcohols to form ketones (oxidation_secondary_alcohols)",
                "Oxidation of tertiary alcohols to form ketones (oxidation_tertiary_alcohols)",
                "Oxidation of alkenes to form diols (oxidation_alkenes)",
                "Oxidation of primary amines to form nitro compounds (oxidation_primary_amines)"
            ],
            "mismatches": [],
            "true_referents": [
                "Browning of an apple (browning_apple)",
                "Combustion of gasoline (combustion_gasoline)",
                "Oxidation of alcohol in the liver (oxidation_alcohol)",
                "Oxidation of aldehydes to form carboxylic acids (oxidation_aldehydes)",
                "Oxidation of alkenes to form diols (oxidation_alkenes)",
                "Oxidation of ammonia to form nitrogen dioxide (oxidation_ammonia)",
                "Oxidation of benzene to form phenol (oxidation_benzene)",
                "Oxidation of carbon monoxide to form carbon dioxide (oxidation_carbon_monoxide)",
                "Oxidation of copper to form copper oxide (oxidation_copper)",
                "Oxidation of ethanol to form acetic acid (oxidation_ethanol)",
                "Oxidation of ethylene to form ethylene oxide (oxidation_ethylene)",
                "Oxidation of fats in the body (oxidation_fats)",
                "Oxidation of glucose in cellular respiration (oxidation_glucose)",
                "Oxidation of hydrogen peroxide to form water and oxygen (oxidation_hydrogen_peroxide)",
                "Oxidation of magnesium to form magnesium oxide (oxidation_magnesium)",
                "Oxidation of methane to form carbon dioxide and water (oxidation_methane)",
                "Oxidation of primary alcohols to form aldehydes (oxidation_primary_alcohols)",
                "Oxidation of primary amines to form nitro compounds (oxidation_primary_amines)",
                "Oxidation of secondary alcohols to form ketones (oxidation_secondary_alcohols)",
                "Oxidation of sulfur dioxide to form sulfur trioxide (oxidation_sulfur_dioxide)",
                "Oxidation of sulfur to form sulfur dioxide (oxidation_sulfur)",
                "Oxidation of tertiary alcohols to form ketones (oxidation_tertiary_alcohols)",
                "Rusting of iron (rusting_iron)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of Methane (CH\u2084)",
            "Oxidation of Glucose (C\u2086H\u2081\u2082O\u2086) in Cellular Respiration",
            "Rusting of Iron (Fe to Fe\u2082O\u2083)",
            "Oxidation of Ethanol to Acetaldehyde",
            "Oxidation of Acetaldehyde to Acetic Acid",
            "Oxidation of Benzaldehyde to Benzoic Acid",
            "Oxidation of Ascorbic Acid (Vitamin C) to Dehydroascorbic Acid",
            "Oxidation of NADH to NAD\u207a in the Electron Transport Chain",
            "Beta-Oxidation of Palmitic Acid",
            "Oxidation of Methanol to Formaldehyde",
            "Oxidation of Formaldehyde to Formic Acid",
            "Oxidation of Secondary Alcohols to Ketones",
            "Oxidation of Sulfides to Sulfoxides",
            "Oxidation of Toluene to Benzyl Alcohol",
            "Oxidation of Citric Acid in the Citric Acid Cycle",
            "Oxidation of Fatty Acids in Lipid Metabolism",
            "Oxidation of Pyruvate to Acetyl-CoA",
            "Oxidation of NADPH in Anabolic Reactions",
            "Oxidation of Iron in Hemoglobin",
            "Oxidation of Cholesterol in the Body",
            "Oxidation of Methane by Methanotrophs",
            "Oxidation of Propanol to Propionaldehyde",
            "Oxidation of Lignin in Plant Cell Walls",
            "Oxidation of Dopamine to Dopamine Quinone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of Methane (CH\u2084)",
                "Oxidation of Glucose (C\u2086H\u2081\u2082O\u2086) in Cellular Respiration",
                "Rusting of Iron (Fe to Fe\u2082O\u2083)",
                "Oxidation of Ethanol to Acetaldehyde",
                "Oxidation of Acetaldehyde to Acetic Acid",
                "Oxidation of Benzaldehyde to Benzoic Acid",
                "Oxidation of Ascorbic Acid (Vitamin C) to Dehydroascorbic Acid",
                "Oxidation of NADH to NAD\u207a in the Electron Transport Chain",
                "Beta-Oxidation of Palmitic Acid",
                "Oxidation of Methanol to Formaldehyde",
                "Oxidation of Formaldehyde to Formic Acid",
                "Oxidation of Secondary Alcohols to Ketones",
                "Oxidation of Sulfides to Sulfoxides",
                "Oxidation of Toluene to Benzyl Alcohol",
                "Oxidation of Citric Acid in the Citric Acid Cycle",
                "Oxidation of Fatty Acids in Lipid Metabolism",
                "Oxidation of Pyruvate to Acetyl-CoA",
                "Oxidation of NADPH in Anabolic Reactions",
                "Oxidation of Iron in Hemoglobin",
                "Oxidation of Cholesterol in the Body",
                "Oxidation of Methane by Methanotrophs",
                "Oxidation of Propanol to Propionaldehyde",
                "Oxidation of Lignin in Plant Cell Walls",
                "Oxidation of Dopamine to Dopamine Quinone"
            ],
            "mismatches": [],
            "true_referents": [
                "Beta-Oxidation of Palmitic Acid",
                "Combustion of Methane (CH\u2084)",
                "Oxidation of Acetaldehyde to Acetic Acid",
                "Oxidation of Ascorbic Acid (Vitamin C) to Dehydroascorbic Acid",
                "Oxidation of Benzaldehyde to Benzoic Acid",
                "Oxidation of Cholesterol in the Body",
                "Oxidation of Citric Acid in the Citric Acid Cycle",
                "Oxidation of Dopamine to Dopamine Quinone",
                "Oxidation of Ethanol to Acetaldehyde",
                "Oxidation of Fatty Acids in Lipid Metabolism",
                "Oxidation of Formaldehyde to Formic Acid",
                "Oxidation of Glucose (C\u2086H\u2081\u2082O\u2086) in Cellular Respiration",
                "Oxidation of Iron in Hemoglobin",
                "Oxidation of Lignin in Plant Cell Walls",
                "Oxidation of Methane by Methanotrophs",
                "Oxidation of Methanol to Formaldehyde",
                "Oxidation of NADH to NAD\u207a in the Electron Transport Chain",
                "Oxidation of NADPH in Anabolic Reactions",
                "Oxidation of Propanol to Propionaldehyde",
                "Oxidation of Pyruvate to Acetyl-CoA",
                "Oxidation of Secondary Alcohols to Ketones",
                "Oxidation of Sulfides to Sulfoxides",
                "Oxidation of Toluene to Benzyl Alcohol",
                "Rusting of Iron (Fe to Fe\u2082O\u2083)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Cellular respiration (CR)",
            "Lipid peroxidation",
            "Photosynthesis (light-dependent reactions)",
            "Rusting of iron",
            "Combustion of hydrocarbons",
            "Oxidative phosphorylation",
            "Hydroxylation reactions",
            "Dehydrogenation reactions",
            "Epoxidation",
            "Ozonolysis",
            "Alcohol oxidation",
            "Aldehyde oxidation",
            "Sulfoxidation",
            "N-oxidation",
            "Baeyer-Villiger oxidation",
            "Swern oxidation",
            "Jones oxidation",
            "Dess-Martin oxidation",
            "Oppenauer oxidation",
            "Wacker oxidation",
            "Oxidative coupling",
            "Autoxidation",
            "Electrochemical oxidation",
            "Oxidative stress in biological systems"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cellular respiration (CR)",
                "Lipid peroxidation",
                "Photosynthesis (light-dependent reactions)",
                "Rusting of iron",
                "Combustion of hydrocarbons",
                "Oxidative phosphorylation",
                "Hydroxylation reactions",
                "Dehydrogenation reactions",
                "Epoxidation",
                "Ozonolysis",
                "Alcohol oxidation",
                "Aldehyde oxidation",
                "Sulfoxidation",
                "N-oxidation",
                "Baeyer-Villiger oxidation",
                "Swern oxidation",
                "Jones oxidation",
                "Dess-Martin oxidation",
                "Oppenauer oxidation",
                "Wacker oxidation",
                "Oxidative coupling",
                "Autoxidation",
                "Electrochemical oxidation",
                "Oxidative stress in biological systems"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol oxidation",
                "Aldehyde oxidation",
                "Autoxidation",
                "Baeyer-Villiger oxidation",
                "Cellular respiration (CR)",
                "Combustion of hydrocarbons",
                "Dehydrogenation reactions",
                "Dess-Martin oxidation",
                "Electrochemical oxidation",
                "Epoxidation",
                "Hydroxylation reactions",
                "Jones oxidation",
                "Lipid peroxidation",
                "N-oxidation",
                "Oppenauer oxidation",
                "Oxidative coupling",
                "Oxidative phosphorylation",
                "Oxidative stress in biological systems",
                "Ozonolysis",
                "Photosynthesis (light-dependent reactions)",
                "Rusting of iron",
                "Sulfoxidation",
                "Swern oxidation",
                "Wacker oxidation"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion of methane (CH4)",
            "Rusting of iron (Fe)",
            "Tarnishing of silver (Ag)",
            "Browning of cut apples",
            "Alcohol metabolism in the liver",
            "Citric acid cycle (TCA cycle)",
            "Electron transport chain (ETC)",
            "Oxidation of glucose in cells",
            "Oxidation of fatty acids (beta-oxidation)",
            "Oxidation of amino acids",
            "Oxidation of hemoglobin (Hb)",
            "Oxidation of low-density lipoprotein (LDL)",
            "Oxidation of neurotransmitters (e.g., dopamine)",
            "Oxidation of vitamins (e.g., vitamin C)",
            "Oxidation of xenobiotics by cytochrome P450 enzymes (CYPs)",
            "Oxidation of sulfur dioxide (SO2) in the atmosphere",
            "Oxidation of hydrogen sulfide (H2S) in the presence of oxygen",
            "Oxidation of ammonia (NH3) to nitrite (NO2-) by nitrifying bacteria",
            "Oxidation of ferrous iron (Fe2+) to ferric iron (Fe3+)",
            "Oxidation of sulfide minerals (e.g., pyrite, FeS2)",
            "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
            "Oxidation of ethanol to acetaldehyde by alcohol dehydrogenase (ADH)",
            "Oxidation of lactate to pyruvate by lactate dehydrogenase (LDH)",
            "Oxidation of NADH to NAD+ in redox reactions"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion of methane (CH4)",
                "Rusting of iron (Fe)",
                "Tarnishing of silver (Ag)",
                "Browning of cut apples",
                "Alcohol metabolism in the liver",
                "Citric acid cycle (TCA cycle)",
                "Electron transport chain (ETC)",
                "Oxidation of glucose in cells",
                "Oxidation of fatty acids (beta-oxidation)",
                "Oxidation of amino acids",
                "Oxidation of hemoglobin (Hb)",
                "Oxidation of low-density lipoprotein (LDL)",
                "Oxidation of neurotransmitters (e.g., dopamine)",
                "Oxidation of vitamins (e.g., vitamin C)",
                "Oxidation of xenobiotics by cytochrome P450 enzymes (CYPs)",
                "Oxidation of sulfur dioxide (SO2) in the atmosphere",
                "Oxidation of hydrogen sulfide (H2S) in the presence of oxygen",
                "Oxidation of ammonia (NH3) to nitrite (NO2-) by nitrifying bacteria",
                "Oxidation of ferrous iron (Fe2+) to ferric iron (Fe3+)",
                "Oxidation of sulfide minerals (e.g., pyrite, FeS2)",
                "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
                "Oxidation of ethanol to acetaldehyde by alcohol dehydrogenase (ADH)",
                "Oxidation of lactate to pyruvate by lactate dehydrogenase (LDH)",
                "Oxidation of NADH to NAD+ in redox reactions"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol metabolism in the liver",
                "Browning of cut apples",
                "Citric acid cycle (TCA cycle)",
                "Combustion of methane (CH4)",
                "Electron transport chain (ETC)",
                "Oxidation of NADH to NAD+ in redox reactions",
                "Oxidation of amino acids",
                "Oxidation of ammonia (NH3) to nitrite (NO2-) by nitrifying bacteria",
                "Oxidation of carbon monoxide (CO) to carbon dioxide (CO2)",
                "Oxidation of ethanol to acetaldehyde by alcohol dehydrogenase (ADH)",
                "Oxidation of fatty acids (beta-oxidation)",
                "Oxidation of ferrous iron (Fe2+) to ferric iron (Fe3+)",
                "Oxidation of glucose in cells",
                "Oxidation of hemoglobin (Hb)",
                "Oxidation of hydrogen sulfide (H2S) in the presence of oxygen",
                "Oxidation of lactate to pyruvate by lactate dehydrogenase (LDH)",
                "Oxidation of low-density lipoprotein (LDL)",
                "Oxidation of neurotransmitters (e.g., dopamine)",
                "Oxidation of sulfide minerals (e.g., pyrite, FeS2)",
                "Oxidation of sulfur dioxide (SO2) in the atmosphere",
                "Oxidation of vitamins (e.g., vitamin C)",
                "Oxidation of xenobiotics by cytochrome P450 enzymes (CYPs)",
                "Rusting of iron (Fe)",
                "Tarnishing of silver (Ag)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Rusting of iron (Fe2O3)",
            "Combustion of methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
            "Cellular respiration in living organisms",
            "Oxidation of ethanol to acetic acid (CH3CH2OH + O2 \u2192 CH3COOH + H2O)",
            "Browning of fruits and vegetables due to enzymatic oxidation",
            "Corrosion of metals",
            "Metabolic breakdown of fats and carbohydrates",
            "Oxidation of glucose in glycolysis (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
            "Bleaching of hair and wood pulp using oxidizing agents",
            "Oxidation of iron in the presence of oxygen and moisture (4Fe + 3O2 \u2192 2Fe2O3)",
            "Oxidation of alcohols to aldehydes and ketones",
            "Oxidation of ammonia to nitric acid in the Ostwald process",
            "Oxidation of sulfur dioxide to sulfur trioxide in the Contact process",
            "Oxidation of phenols in the synthesis of dyes and pharmaceuticals",
            "Oxidation of alkenes to epoxides using peroxyacids",
            "Oxidation of primary alcohols to carboxylic acids using chromic acid",
            "Oxidation of aldehydes to carboxylic acids using mild oxidizing agents",
            "Oxidation of alkanes to alcohols and ketones using strong oxidizing agents",
            "Oxidation of amines to nitro compounds",
            "Oxidation of sulfides to sulfoxides and sulfones",
            "Oxidation of alkenes to diols using cold alkaline potassium permanganate",
            "Oxidation of glucose to gluconic acid by glucose oxidase enzyme",
            "Oxidation of ethylene to ethylene oxide in the production of polyethylene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Rusting of iron (Fe2O3)",
                "Combustion of methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
                "Cellular respiration in living organisms",
                "Oxidation of ethanol to acetic acid (CH3CH2OH + O2 \u2192 CH3COOH + H2O)",
                "Browning of fruits and vegetables due to enzymatic oxidation",
                "Corrosion of metals",
                "Metabolic breakdown of fats and carbohydrates",
                "Oxidation of glucose in glycolysis (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
                "Bleaching of hair and wood pulp using oxidizing agents",
                "Oxidation of iron in the presence of oxygen and moisture (4Fe + 3O2 \u2192 2Fe2O3)",
                "Oxidation of alcohols to aldehydes and ketones",
                "Oxidation of ammonia to nitric acid in the Ostwald process",
                "Oxidation of sulfur dioxide to sulfur trioxide in the Contact process",
                "Oxidation of phenols in the synthesis of dyes and pharmaceuticals",
                "Oxidation of alkenes to epoxides using peroxyacids",
                "Oxidation of primary alcohols to carboxylic acids using chromic acid",
                "Oxidation of aldehydes to carboxylic acids using mild oxidizing agents",
                "Oxidation of alkanes to alcohols and ketones using strong oxidizing agents",
                "Oxidation of amines to nitro compounds",
                "Oxidation of sulfides to sulfoxides and sulfones",
                "Oxidation of alkenes to diols using cold alkaline potassium permanganate",
                "Oxidation of glucose to gluconic acid by glucose oxidase enzyme"
            ],
            "mismatches": [
                "Oxidation of ethylene to ethylene oxide in the production of polyethylene"
            ],
            "true_referents": [
                "Bleaching of hair and wood pulp using oxidizing agents",
                "Browning of fruits and vegetables due to enzymatic oxidation",
                "Cellular respiration in living organisms",
                "Combustion of methane (CH4 + 2O2 \u2192 CO2 + 2H2O)",
                "Corrosion of metals",
                "Metabolic breakdown of fats and carbohydrates",
                "Oxidation of alcohols to aldehydes and ketones",
                "Oxidation of aldehydes to carboxylic acids using mild oxidizing agents",
                "Oxidation of alkanes to alcohols and ketones using strong oxidizing agents",
                "Oxidation of alkenes to diols using cold alkaline potassium permanganate",
                "Oxidation of alkenes to epoxides using peroxyacids",
                "Oxidation of amines to nitro compounds",
                "Oxidation of ammonia to nitric acid in the Ostwald process",
                "Oxidation of ethanol to acetic acid (CH3CH2OH + O2 \u2192 CH3COOH + H2O)",
                "Oxidation of ethylene to ethylene oxide in the production of polyethylene",
                "Oxidation of glucose in glycolysis (C6H12O6 + 6O2 \u2192 6CO2 + 6H2O)",
                "Oxidation of glucose to gluconic acid by glucose oxidase enzyme",
                "Oxidation of iron in the presence of oxygen and moisture (4Fe + 3O2 \u2192 2Fe2O3)",
                "Oxidation of phenols in the synthesis of dyes and pharmaceuticals",
                "Oxidation of primary alcohols to carboxylic acids using chromic acid",
                "Oxidation of sulfides to sulfoxides and sulfones",
                "Oxidation of sulfur dioxide to sulfur trioxide in the Contact process",
                "Rusting of iron (Fe2O3)"
            ],
            "TP": 22,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Cellular respiration (CR)",
            "Photosynthesis (PS)",
            "Combustion of hydrocarbons (CH)",
            "Rusting of iron (RI)",
            "Alcohol oxidation (AO)",
            "Lipid peroxidation (LP)",
            "Hydrogen peroxide formation (HPF)",
            "Cytochrome c oxidase reaction (CCO)",
            "Methane oxidation (MO)",
            "Glucose oxidation (GO)",
            "Ethanol oxidation (EO)",
            "Ascorbic acid oxidation (AAO)",
            "Glutathione oxidation (GO2)",
            "Hemoglobin oxidation (HO)",
            "Dopamine oxidation (DO)",
            "Uric acid oxidation (UAO)",
            "Sulfur oxidation (SO)",
            "Nitrogen fixation (NF)",
            "Methanol oxidation (MOX)",
            "Fatty acid beta-oxidation (FAO)",
            "Acetaldehyde oxidation (AcO)",
            "Xanthine oxidase reaction (XOR)",
            "Monoamine oxidase reaction (MAO)",
            "Glyceraldehyde-3-phosphate dehydrogenase (G3PD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cellular respiration (CR)",
                "Photosynthesis (PS)",
                "Combustion of hydrocarbons (CH)",
                "Rusting of iron (RI)",
                "Alcohol oxidation (AO)",
                "Lipid peroxidation (LP)",
                "Hydrogen peroxide formation (HPF)",
                "Cytochrome c oxidase reaction (CCO)",
                "Methane oxidation (MO)",
                "Glucose oxidation (GO)",
                "Ethanol oxidation (EO)",
                "Ascorbic acid oxidation (AAO)",
                "Glutathione oxidation (GO2)",
                "Hemoglobin oxidation (HO)",
                "Dopamine oxidation (DO)",
                "Uric acid oxidation (UAO)",
                "Sulfur oxidation (SO)",
                "Nitrogen fixation (NF)",
                "Methanol oxidation (MOX)",
                "Fatty acid beta-oxidation (FAO)",
                "Acetaldehyde oxidation (AcO)",
                "Xanthine oxidase reaction (XOR)",
                "Monoamine oxidase reaction (MAO)",
                "Glyceraldehyde-3-phosphate dehydrogenase (G3PD)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde oxidation (AcO)",
                "Alcohol oxidation (AO)",
                "Ascorbic acid oxidation (AAO)",
                "Cellular respiration (CR)",
                "Combustion of hydrocarbons (CH)",
                "Cytochrome c oxidase reaction (CCO)",
                "Dopamine oxidation (DO)",
                "Ethanol oxidation (EO)",
                "Fatty acid beta-oxidation (FAO)",
                "Glucose oxidation (GO)",
                "Glutathione oxidation (GO2)",
                "Glyceraldehyde-3-phosphate dehydrogenase (G3PD)",
                "Hemoglobin oxidation (HO)",
                "Hydrogen peroxide formation (HPF)",
                "Lipid peroxidation (LP)",
                "Methane oxidation (MO)",
                "Methanol oxidation (MOX)",
                "Monoamine oxidase reaction (MAO)",
                "Nitrogen fixation (NF)",
                "Photosynthesis (PS)",
                "Rusting of iron (RI)",
                "Sulfur oxidation (SO)",
                "Uric acid oxidation (UAO)",
                "Xanthine oxidase reaction (XOR)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": "[\n\"Enzymatic oxidation of cholesterol to bile acids (bile acid synthesis)\" (Bile acid synthesis),\n\"Oxidation of glucose to gluconic acid (glucose oxidation)\" (Gluconic acid synthesis),\n\"Oxidation of fatty acids to acyl-CoA (fatty acid oxidation)\" (Fatty acid metabolism),\n\"Oxidation of ethanol to acetaldehyde (alcohol metabolism)\" (Alcohol metabolism),\n\"Oxidation of tryptophan to indole-3-acetaldehyde (tryptophan metabolism)\" (Tryptophan metabolism),\n\"Oxidation of phenylalanine to tyrosine (phenylalanine metabolism)\" (Phenylalanine metabolism),\n\"Oxidation of cysteine to cystine (cysteine oxidation)\" (Cysteine metabolism),\n\"Oxidation of ascorbic acid to dehydroascorbic acid (ascorbic acid oxidation)\" (Vitamin C metabolism),\n\"Oxidation of dopamine to dopaminochrome (dopamine oxidation)\" (Dopamine metabolism),\n\"Oxidation of serotonin to 5-hydroxyindoleacetic acid (serotonin metabolism)\" (Serotonin metabolism),\n\"Oxidation of epinephrine to adrenochrome (epinephrine oxidation)\" (Epinephrine metabolism),\n\"Oxidation of tyrosine to dopachrome (tyrosine oxidation)\" (Tyrosine metabolism),\n\"Oxidation of cholesterol to 7-ketocholesterol (cholesterol oxidation)\" (Cholesterol metabolism),\n\"Oxidation of linoleic acid to 13-hydroxyoctadecadienoic acid (linoleic acid oxidation)\" (Linoleic acid metabolism),\n\"Oxidation of vitamin E to alpha-tocopherolquinone (vitamin E oxidation)\" (Vitamin E metabolism),\n\"Oxidation of glutathione to glutathione disulfide (glutathione oxidation)\" (Glutathione metabolism),\n\"Oxidation of cystine to cysteine (cystine oxidation)\" (Cystine metabolism),\n\"Oxidation of hydrogen peroxide to oxygen and water (hydrogen peroxide decomposition)\" (Hydrogen peroxide decomposition),\n\"Oxidation of nitric oxide to nitrogen dioxide (nitric oxide oxidation)\" (Nitric oxide metabolism),\n\"Oxidation of nitrite to nitrate (nitrite oxidation)\" (Nitrite metabolism),\n\"Oxidation of sulfite to sulfate (sulfite oxidation)\" (Sulfite metabolism),\n\"Oxidation of lactate to pyruvate (lactate oxidation)\" (Lactate metabolism),\n\"Oxidation of malate to oxaloacetate (malate oxidation)\" (Malate metabolism),\n\"Oxidation of succinate to fumarate (succinate oxidation)\" (Succinate metabolism),\n\"Oxidation of citrate to isocitrate (citrate oxidation)\" (Citrate metabolism),\n\"Oxidation of isocitrate to alpha-ketoglutarate (isocitrate oxidation)\" (Isocitrate metabolism)\n]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": [
            "Combustion reaction (CR)",
            "Fatty acid beta-oxidation (FAO)",
            "Citric acid cycle (CAC)",
            "Electron transport chain (ETC)",
            "Fenton reaction (FR)",
            "Haber-Weiss reaction (HWR)",
            "Lipid peroxidation (LPO)",
            "Catalase reaction (CR)",
            "Glutathione oxidation (GO)",
            "Cytochrome P450 oxidation (CYP450)",
            "Alcohol dehydrogenase reaction (ADH)",
            "Aldehyde dehydrogenase reaction (ALDH)",
            "Catecholamine oxidation (CAO)",
            "Dopamine oxidation (DO)",
            "Serotonin oxidation (SO)",
            "Tyrosinase reaction (TR)",
            "Laccase reaction (LR)",
            "Ascorbic acid oxidation (AAO)",
            "Glucose oxidation (GOX)",
            "Glycolysis (GLY)",
            "Pentose phosphate pathway (PPP)",
            "Oxidative phosphorylation (OXPHOS)",
            "Autoxidation reaction (AR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Combustion reaction (CR)",
                "Fatty acid beta-oxidation (FAO)",
                "Citric acid cycle (CAC)",
                "Electron transport chain (ETC)",
                "Fenton reaction (FR)",
                "Haber-Weiss reaction (HWR)",
                "Lipid peroxidation (LPO)",
                "Catalase reaction (CR)",
                "Glutathione oxidation (GO)",
                "Cytochrome P450 oxidation (CYP450)",
                "Alcohol dehydrogenase reaction (ADH)",
                "Aldehyde dehydrogenase reaction (ALDH)",
                "Catecholamine oxidation (CAO)",
                "Dopamine oxidation (DO)",
                "Serotonin oxidation (SO)",
                "Tyrosinase reaction (TR)",
                "Laccase reaction (LR)",
                "Ascorbic acid oxidation (AAO)",
                "Glucose oxidation (GOX)",
                "Glycolysis (GLY)",
                "Pentose phosphate pathway (PPP)",
                "Oxidative phosphorylation (OXPHOS)",
                "Autoxidation reaction (AR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase reaction (ADH)",
                "Aldehyde dehydrogenase reaction (ALDH)",
                "Ascorbic acid oxidation (AAO)",
                "Autoxidation reaction (AR)",
                "Catalase reaction (CR)",
                "Catecholamine oxidation (CAO)",
                "Citric acid cycle (CAC)",
                "Combustion reaction (CR)",
                "Cytochrome P450 oxidation (CYP450)",
                "Dopamine oxidation (DO)",
                "Electron transport chain (ETC)",
                "Fatty acid beta-oxidation (FAO)",
                "Fenton reaction (FR)",
                "Glucose oxidation (GOX)",
                "Glutathione oxidation (GO)",
                "Glycolysis (GLY)",
                "Haber-Weiss reaction (HWR)",
                "Laccase reaction (LR)",
                "Lipid peroxidation (LPO)",
                "Oxidative phosphorylation (OXPHOS)",
                "Pentose phosphate pathway (PPP)",
                "Serotonin oxidation (SO)",
                "Tyrosinase reaction (TR)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "oxidation reaction",
        "domain": "chemistry",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]