[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Hydrogen Peroxide (H2O2 - note: this does not fit the criteria as it is a liquid at room temperature, but is included for illustrative purposes of the format)",
            "Water Vapor (H2O)",
            "Nitric Oxide (NO)",
            "Nitrogen Dioxide (NO2)",
            "Sulfur Dioxide (SO2)",
            "Carbon Monoxide (CO)",
            "Carbon Dioxide (CO2)",
            "Ozone (O3 - note: does not fully meet the criteria as it does not contain an element other than oxygen)",
            "Ammonia (NH3 - note: does not fully meet the criteria as it does not contain oxygen)",
            "Methanol Vapor (CH3OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
            "Ethanol Vapor (C2H5OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
            "Acetaldehyde Vapor (C2H4O - note: does not fully meet the criteria as it is a liquid at room temperature)",
            "Formaldehyde (CH2O - note: does not fully meet the criteria as it is a gas at room temperature but does not contain an element other than carbon and oxygen)",
            "Acetone Vapor (C3H6O - note: does not fully meet the criteria as it is a liquid at room temperature)",
            "Acetic Acid Vapor (C2H4O2 - note: does not fully meet the criteria as it is a liquid at room temperature)",
            "Hydrogen Cyanide (HCN - note: does not fully meet the criteria as it does not contain oxygen)",
            "Nitrous Oxide (N2O)",
            "Sulfur Trioxide (SO3)",
            "Phosgene (COCl2 - note: does not fully meet the criteria as it does not contain oxygen)",
            "Olefines (C2H4 - note: does not fully meet the criteria as it does not contain oxygen)",
            "Hydrogen Sulfide (H2S - note: does not fully meet the criteria as it does not contain oxygen)",
            "Carbon Disulfide (CS2 - note: does not fully meet the criteria as it does not contain oxygen)",
            "Ethylene Oxide (C2H4O)",
            "Methyl Ethyl Ketone Vapor (C4H8O - note: does not fully meet the criteria as it is a liquid at room temperature)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen Peroxide (H2O2 - note: this does not fit the criteria as it is a liquid at room temperature, but is included for illustrative purposes of the format)",
                "Water Vapor (H2O)",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Sulfur Dioxide (SO2)",
                "Carbon Monoxide (CO)",
                "Carbon Dioxide (CO2)",
                "Ozone (O3 - note: does not fully meet the criteria as it does not contain an element other than oxygen)",
                "Ammonia (NH3 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Methanol Vapor (CH3OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Ethanol Vapor (C2H5OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Acetaldehyde Vapor (C2H4O - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Formaldehyde (CH2O - note: does not fully meet the criteria as it is a gas at room temperature but does not contain an element other than carbon and oxygen)",
                "Acetone Vapor (C3H6O - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Acetic Acid Vapor (C2H4O2 - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Hydrogen Cyanide (HCN - note: does not fully meet the criteria as it does not contain oxygen)",
                "Nitrous Oxide (N2O)",
                "Sulfur Trioxide (SO3)",
                "Phosgene (COCl2 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Olefines (C2H4 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Hydrogen Sulfide (H2S - note: does not fully meet the criteria as it does not contain oxygen)",
                "Carbon Disulfide (CS2 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Ethylene Oxide (C2H4O)",
                "Methyl Ethyl Ketone Vapor (C4H8O - note: does not fully meet the criteria as it is a liquid at room temperature)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde Vapor (C2H4O - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Acetic Acid Vapor (C2H4O2 - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Acetone Vapor (C3H6O - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Ammonia (NH3 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Carbon Dioxide (CO2)",
                "Carbon Disulfide (CS2 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Carbon Monoxide (CO)",
                "Ethanol Vapor (C2H5OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Ethylene Oxide (C2H4O)",
                "Formaldehyde (CH2O - note: does not fully meet the criteria as it is a gas at room temperature but does not contain an element other than carbon and oxygen)",
                "Hydrogen Cyanide (HCN - note: does not fully meet the criteria as it does not contain oxygen)",
                "Hydrogen Peroxide (H2O2 - note: this does not fit the criteria as it is a liquid at room temperature, but is included for illustrative purposes of the format)",
                "Hydrogen Sulfide (H2S - note: does not fully meet the criteria as it does not contain oxygen)",
                "Methanol Vapor (CH3OH - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Methyl Ethyl Ketone Vapor (C4H8O - note: does not fully meet the criteria as it is a liquid at room temperature)",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Nitrous Oxide (N2O)",
                "Olefines (C2H4 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Ozone (O3 - note: does not fully meet the criteria as it does not contain an element other than oxygen)",
                "Phosgene (COCl2 - note: does not fully meet the criteria as it does not contain oxygen)",
                "Sulfur Dioxide (SO2)",
                "Sulfur Trioxide (SO3)",
                "Water Vapor (H2O)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon dioxide (CO2)",
            "Sulfur dioxide (SO2)",
            "Nitrogen dioxide (NO2)",
            "Ozone (O3)",
            "Water vapor (H2O)",
            "Carbon monoxide (CO)",
            "Phosgene (COCl2)",
            "Hydrogen peroxide (H2O2, gaseous form)",
            "Sulfur trioxide (SO3)",
            "Nitrous oxide (N2O)",
            "Phosphorus pentoxide (P4O10, dimer form P2O5)",
            "Dinitrogen trioxide (N2O3)",
            "Oxides of chlorine (Cl2O, ClO2, Cl2O6, Cl2O7)",
            "Oxides of fluorine (OF2)",
            "Oxides of bromine (Br2O5)",
            "Oxides of iodine (I2O5)",
            "Selenium dioxide (SeO2)",
            "Tellurium dioxide (TeO2)",
            "Diphosphorus pentoxide (P4O10, monomer form P2O5)",
            "Dinitrogen tetroxide (N2O4)",
            "Dichlorine monoxide (Cl2O)",
            "Dichlorine trioxide (Cl2O3)",
            "Diboron trioxide (B2O3)",
            "Dysprosium(III) oxide (Dy2O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon dioxide (CO2)",
                "Sulfur dioxide (SO2)",
                "Nitrogen dioxide (NO2)",
                "Ozone (O3)",
                "Water vapor (H2O)",
                "Carbon monoxide (CO)",
                "Phosgene (COCl2)",
                "Hydrogen peroxide (H2O2, gaseous form)",
                "Sulfur trioxide (SO3)",
                "Nitrous oxide (N2O)",
                "Phosphorus pentoxide (P4O10, dimer form P2O5)",
                "Dinitrogen trioxide (N2O3)",
                "Oxides of chlorine (Cl2O, ClO2, Cl2O6, Cl2O7)",
                "Oxides of fluorine (OF2)",
                "Oxides of bromine (Br2O5)",
                "Oxides of iodine (I2O5)",
                "Selenium dioxide (SeO2)",
                "Tellurium dioxide (TeO2)",
                "Diphosphorus pentoxide (P4O10, monomer form P2O5)",
                "Dinitrogen tetroxide (N2O4)",
                "Dichlorine monoxide (Cl2O)",
                "Dichlorine trioxide (Cl2O3)",
                "Diboron trioxide (B2O3)",
                "Dysprosium(III) oxide (Dy2O3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Diboron trioxide (B2O3)",
                "Dichlorine monoxide (Cl2O)",
                "Dichlorine trioxide (Cl2O3)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Diphosphorus pentoxide (P4O10, monomer form P2O5)",
                "Dysprosium(III) oxide (Dy2O3)",
                "Hydrogen peroxide (H2O2, gaseous form)",
                "Nitrogen dioxide (NO2)",
                "Nitrous oxide (N2O)",
                "Oxides of bromine (Br2O5)",
                "Oxides of chlorine (Cl2O, ClO2, Cl2O6, Cl2O7)",
                "Oxides of fluorine (OF2)",
                "Oxides of iodine (I2O5)",
                "Ozone (O3)",
                "Phosgene (COCl2)",
                "Phosphorus pentoxide (P4O10, dimer form P2O5)",
                "Selenium dioxide (SeO2)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Tellurium dioxide (TeO2)",
                "Water vapor (H2O)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon Dioxide (CO2)",
            "Carbon Monoxide (CO)",
            "Nitrous Oxide (N2O)",
            "Ozone (O3)",
            "Sulfur Dioxide (SO2)",
            "Nitric Oxide (NO)",
            "Nitrogen Dioxide (NO2)",
            "Hydrogen Peroxide (H2O2)",
            "Methyl Chloride (CH3Cl)",
            "Methyl Bromide (CH3Br)",
            "Dinitrogen Tetroxide (N2O4)",
            "Nitrogen Monoxide (NO)",
            "Methyl Fluoride (CH3F)",
            "Methyl Iodide (CH3I)",
            "Ethyl Chloride (C2H5Cl)",
            "Ethyl Bromide (C2H5Br)",
            "Ethyl Fluoride (C2H5F)",
            "Ethyl Iodide (C2H5I)",
            "Propyl Chloride (C3H7Cl)",
            "Propyl Bromide (C3H7Br)",
            "Propyl Fluoride (C3H7F)",
            "Propyl Iodide (C3H7I)",
            "Butyl Chloride (C4H9Cl)",
            "Butyl Bromide (C4H9Br)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon Dioxide (CO2)",
                "Carbon Monoxide (CO)",
                "Nitrous Oxide (N2O)",
                "Ozone (O3)",
                "Sulfur Dioxide (SO2)",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Hydrogen Peroxide (H2O2)",
                "Methyl Chloride (CH3Cl)",
                "Methyl Bromide (CH3Br)",
                "Dinitrogen Tetroxide (N2O4)",
                "Nitrogen Monoxide (NO)",
                "Methyl Fluoride (CH3F)",
                "Methyl Iodide (CH3I)",
                "Ethyl Chloride (C2H5Cl)",
                "Ethyl Bromide (C2H5Br)",
                "Ethyl Fluoride (C2H5F)",
                "Ethyl Iodide (C2H5I)",
                "Propyl Chloride (C3H7Cl)",
                "Propyl Bromide (C3H7Br)",
                "Propyl Fluoride (C3H7F)",
                "Propyl Iodide (C3H7I)",
                "Butyl Chloride (C4H9Cl)",
                "Butyl Bromide (C4H9Br)"
            ],
            "mismatches": [],
            "true_referents": [
                "Butyl Bromide (C4H9Br)",
                "Butyl Chloride (C4H9Cl)",
                "Carbon Dioxide (CO2)",
                "Carbon Monoxide (CO)",
                "Dinitrogen Tetroxide (N2O4)",
                "Ethyl Bromide (C2H5Br)",
                "Ethyl Chloride (C2H5Cl)",
                "Ethyl Fluoride (C2H5F)",
                "Ethyl Iodide (C2H5I)",
                "Hydrogen Peroxide (H2O2)",
                "Methyl Bromide (CH3Br)",
                "Methyl Chloride (CH3Cl)",
                "Methyl Fluoride (CH3F)",
                "Methyl Iodide (CH3I)",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Nitrogen Monoxide (NO)",
                "Nitrous Oxide (N2O)",
                "Ozone (O3)",
                "Propyl Bromide (C3H7Br)",
                "Propyl Chloride (C3H7Cl)",
                "Propyl Fluoride (C3H7F)",
                "Propyl Iodide (C3H7I)",
                "Sulfur Dioxide (SO2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Oxygen Difluoride (OF2)",
            "Nitrous Oxide (N2O)",
            "Nitrogen Dioxide (NO2)",
            "Carbon Monoxide (CO)",
            "Carbon Dioxide (CO2)",
            "Sulfur Dioxide (SO2)",
            "Hydrogen Peroxide (H2O2)",
            "Ozone (O3)",
            "Dinitrogen Pentoxide (N2O5)",
            "Chlorine Monoxide (ClO)",
            "Chlorine Dioxide (ClO2)",
            "Bromine Monoxide (BrO)",
            "Hydrogen Fluoride (HF)",
            "Hydrogen Chloride (HCl)",
            "Hydrogen Bromide (HBr)",
            "Hydrogen Iodide (HI)",
            "Carbonyl Sulfide (OCS)",
            "Phosphine (PH3)",
            "Arsine (AsH3)",
            "Stibine (SbH3)",
            "Bismuth Trioxide (Bi2O3, gaseous)",
            "Tellurium Hexafluoride (TeF6)",
            "Selenium Hexafluoride (SeF6)",
            "Xenon Difluoride (XeF2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Oxygen Difluoride (OF2)",
                "Nitrous Oxide (N2O)",
                "Nitrogen Dioxide (NO2)",
                "Carbon Monoxide (CO)",
                "Carbon Dioxide (CO2)",
                "Sulfur Dioxide (SO2)",
                "Hydrogen Peroxide (H2O2)",
                "Ozone (O3)",
                "Dinitrogen Pentoxide (N2O5)",
                "Chlorine Monoxide (ClO)",
                "Chlorine Dioxide (ClO2)",
                "Bromine Monoxide (BrO)",
                "Hydrogen Fluoride (HF)",
                "Hydrogen Chloride (HCl)",
                "Hydrogen Bromide (HBr)",
                "Hydrogen Iodide (HI)",
                "Carbonyl Sulfide (OCS)",
                "Phosphine (PH3)",
                "Arsine (AsH3)",
                "Stibine (SbH3)",
                "Bismuth Trioxide (Bi2O3, gaseous)",
                "Tellurium Hexafluoride (TeF6)",
                "Selenium Hexafluoride (SeF6)",
                "Xenon Difluoride (XeF2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Arsine (AsH3)",
                "Bismuth Trioxide (Bi2O3, gaseous)",
                "Bromine Monoxide (BrO)",
                "Carbon Dioxide (CO2)",
                "Carbon Monoxide (CO)",
                "Carbonyl Sulfide (OCS)",
                "Chlorine Dioxide (ClO2)",
                "Chlorine Monoxide (ClO)",
                "Dinitrogen Pentoxide (N2O5)",
                "Hydrogen Bromide (HBr)",
                "Hydrogen Chloride (HCl)",
                "Hydrogen Fluoride (HF)",
                "Hydrogen Iodide (HI)",
                "Hydrogen Peroxide (H2O2)",
                "Nitrogen Dioxide (NO2)",
                "Nitrous Oxide (N2O)",
                "Oxygen Difluoride (OF2)",
                "Ozone (O3)",
                "Phosphine (PH3)",
                "Selenium Hexafluoride (SeF6)",
                "Stibine (SbH3)",
                "Sulfur Dioxide (SO2)",
                "Tellurium Hexafluoride (TeF6)",
                "Xenon Difluoride (XeF2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Oxygen (O2)",
            "Carbon Dioxide (CO2)",
            "Ozone (O3)",
            "Nitric Oxide (NO)",
            "Nitrogen Dioxide (NO2)",
            "Sulfur Dioxide (SO2)",
            "Carbon Monoxide (CO)",
            "Nitrous Oxide (N2O)",
            "Hydrogen Peroxide (H2O2) vapor",
            "Ethylene Oxide (C2H4O)",
            "Acetaldehyde (C2H4O) vapor",
            "Formaldehyde (CH2O) vapor",
            "Chlorine Dioxide (ClO2)",
            "Dinitrogen Tetroxide (N2O4)",
            "Sulfur Trioxide (SO3)",
            "Dichlorine Monoxide (Cl2O)",
            "Dichlorine Dioxide (Cl2O2)",
            "Carbonyl Sulfide (COS)",
            "Dimethyl Ether (C2H6O) vapor",
            "Methanol (CH3OH) vapor",
            "Ethanol (C2H5OH) vapor",
            "Propylene Oxide (C3H6O) vapor",
            "Acetone (C3H6O) vapor",
            "Methyl Ethyl Ketone (C4H8O) vapor"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Oxygen (O2)",
                "Carbon Dioxide (CO2)",
                "Ozone (O3)",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Sulfur Dioxide (SO2)",
                "Carbon Monoxide (CO)",
                "Nitrous Oxide (N2O)",
                "Hydrogen Peroxide (H2O2) vapor",
                "Ethylene Oxide (C2H4O)",
                "Acetaldehyde (C2H4O) vapor",
                "Formaldehyde (CH2O) vapor",
                "Chlorine Dioxide (ClO2)",
                "Dinitrogen Tetroxide (N2O4)",
                "Sulfur Trioxide (SO3)",
                "Dichlorine Monoxide (Cl2O)",
                "Dichlorine Dioxide (Cl2O2)",
                "Carbonyl Sulfide (COS)",
                "Dimethyl Ether (C2H6O) vapor",
                "Methanol (CH3OH) vapor",
                "Ethanol (C2H5OH) vapor",
                "Propylene Oxide (C3H6O) vapor",
                "Acetone (C3H6O) vapor",
                "Methyl Ethyl Ketone (C4H8O) vapor"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde (C2H4O) vapor",
                "Acetone (C3H6O) vapor",
                "Carbon Dioxide (CO2)",
                "Carbon Monoxide (CO)",
                "Carbonyl Sulfide (COS)",
                "Chlorine Dioxide (ClO2)",
                "Dichlorine Dioxide (Cl2O2)",
                "Dichlorine Monoxide (Cl2O)",
                "Dimethyl Ether (C2H6O) vapor",
                "Dinitrogen Tetroxide (N2O4)",
                "Ethanol (C2H5OH) vapor",
                "Ethylene Oxide (C2H4O)",
                "Formaldehyde (CH2O) vapor",
                "Hydrogen Peroxide (H2O2) vapor",
                "Methanol (CH3OH) vapor",
                "Methyl Ethyl Ketone (C4H8O) vapor",
                "Nitric Oxide (NO)",
                "Nitrogen Dioxide (NO2)",
                "Nitrous Oxide (N2O)",
                "Oxygen (O2)",
                "Ozone (O3)",
                "Propylene Oxide (C3H6O) vapor",
                "Sulfur Dioxide (SO2)",
                "Sulfur Trioxide (SO3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon dioxide (CO2)",
            "Ozone (O3)",
            "Nitrogen dioxide (NO2)",
            "Sulfur dioxide (SO2)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Ethylene oxide (C2H4O)",
            "Hydrogen peroxide (H2O2)",
            "Nitrous oxide (N2O)",
            "Phosgene (COCl2)",
            "Chlorine dioxide (ClO2)",
            "Carbon monoxide (CO)",
            "Benzaldehyde (C7H6O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone (C3H6O)",
            "Dimethyl ether (C2H6O)",
            "Vinyl chloride (C2H3Cl)",
            "Propylene oxide (C3H6O)",
            "Butyraldehyde (C4H8O)",
            "Cyclopropanone (C3H4O)",
            "Acrolein (C3H4O)",
            "Isopropanol (C3H8O)",
            "Ethylene glycol (C2H6O2)",
            "Acetic acid (C2H4O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon dioxide (CO2)",
                "Ozone (O3)",
                "Nitrogen dioxide (NO2)",
                "Sulfur dioxide (SO2)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Ethylene oxide (C2H4O)",
                "Hydrogen peroxide (H2O2)",
                "Nitrous oxide (N2O)",
                "Phosgene (COCl2)",
                "Chlorine dioxide (ClO2)",
                "Carbon monoxide (CO)",
                "Benzaldehyde (C7H6O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Acetone (C3H6O)",
                "Dimethyl ether (C2H6O)",
                "Vinyl chloride (C2H3Cl)",
                "Propylene oxide (C3H6O)",
                "Butyraldehyde (C4H8O)",
                "Cyclopropanone (C3H4O)",
                "Acrolein (C3H4O)",
                "Isopropanol (C3H8O)",
                "Ethylene glycol (C2H6O2)"
            ],
            "mismatches": [
                "Acetic acid (C2H4O2)"
            ],
            "true_referents": [
                "Acetaldehyde (C2H4O)",
                "Acetone (C3H6O)",
                "Acrolein (C3H4O)",
                "Benzaldehyde (C7H6O)",
                "Butyraldehyde (C4H8O)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Chlorine dioxide (ClO2)",
                "Cyclopropanone (C3H4O)",
                "Dimethyl ether (C2H6O)",
                "Ethanol (C2H5OH)",
                "Ethylene glycol (C2H6O2)",
                "Ethylene oxide (C2H4O)",
                "Formaldehyde (CH2O)",
                "Hydrogen peroxide (H2O2)",
                "Isopropanol (C3H8O)",
                "Methanol (CH3OH)",
                "Nitrogen dioxide (NO2)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Phosgene (COCl2)",
                "Propylene oxide (C3H6O)",
                "Sulfur dioxide (SO2)",
                "Vinyl chloride (C2H3Cl)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Oxygen difluoride (OF2)",
            "Oxygen difluoride ion (OF2-)",
            "Ozone (O3)",
            "Dinitrogen pentoxide (N2O5)",
            "Dinitrogen tetroxide (N2O4)",
            "Dinitrogen trioxide (N2O3)",
            "Carbon monoxide (CO)",
            "Carbon dioxide (CO2)",
            "Sulfur hexafluoride (SF6)",
            "Sulfur dioxide (SO2)",
            "Sulfur trioxide (SO3)",
            "Chlorine monoxide (ClO)",
            "Chlorine dioxide (ClO2)",
            "Chlorine trioxide (ClO3)",
            "Chlorine heptoxide (Cl7O2)",
            "Chlorine hexoxide (Cl6O)",
            "Phosphorus pentoxide (P4O10)",
            "Phosphorus trioxide (P4O6)",
            "Phosphorus dioxide (PO2)",
            "Phosphorus monoxide (PO)",
            "Phosphorus hexoxide (P4O12)",
            "Phosphorus heptoxide (P4O14)",
            "Phosphorus octoxide (P4O16)",
            "Phosphorus nonoxide (P4O18)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Oxygen difluoride (OF2)",
                "Oxygen difluoride ion (OF2-)",
                "Ozone (O3)",
                "Dinitrogen pentoxide (N2O5)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Carbon monoxide (CO)",
                "Carbon dioxide (CO2)",
                "Sulfur hexafluoride (SF6)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Chlorine monoxide (ClO)",
                "Chlorine dioxide (ClO2)",
                "Chlorine trioxide (ClO3)",
                "Chlorine heptoxide (Cl7O2)",
                "Chlorine hexoxide (Cl6O)",
                "Phosphorus pentoxide (P4O10)",
                "Phosphorus trioxide (P4O6)",
                "Phosphorus dioxide (PO2)",
                "Phosphorus monoxide (PO)",
                "Phosphorus hexoxide (P4O12)",
                "Phosphorus heptoxide (P4O14)",
                "Phosphorus octoxide (P4O16)",
                "Phosphorus nonoxide (P4O18)"
            ],
            "mismatches": [],
            "true_referents": [
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Chlorine dioxide (ClO2)",
                "Chlorine heptoxide (Cl7O2)",
                "Chlorine hexoxide (Cl6O)",
                "Chlorine monoxide (ClO)",
                "Chlorine trioxide (ClO3)",
                "Dinitrogen pentoxide (N2O5)",
                "Dinitrogen tetroxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Oxygen difluoride (OF2)",
                "Oxygen difluoride ion (OF2-)",
                "Ozone (O3)",
                "Phosphorus dioxide (PO2)",
                "Phosphorus heptoxide (P4O14)",
                "Phosphorus hexoxide (P4O12)",
                "Phosphorus monoxide (PO)",
                "Phosphorus nonoxide (P4O18)",
                "Phosphorus octoxide (P4O16)",
                "Phosphorus pentoxide (P4O10)",
                "Phosphorus trioxide (P4O6)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)",
                "Sulfur trioxide (SO3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Carbon dioxide (CO2)",
            "Nitrous oxide (N2O)",
            "Sulfur dioxide (SO2)",
            "Hydrogen peroxide (H2O2)",
            "Ozone (O3)",
            "Sulfur trioxide (SO3)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Hydrogen sulfide (H2S)",
            "Ammonia (NH3)",
            "Hydrogen cyanide (HCN)",
            "Chlorine gas (Cl2)",
            "Bromine gas (Br2)",
            "Iodine gas (I2)",
            "Sulfur hexafluoride (SF6)",
            "Phosphine (PH3)",
            "Boron trifluoride (BF3)",
            "Silane (SiH4)",
            "Germane (GeH4)",
            "Arsine (AsH3)",
            "Stibine (SbH3)",
            "Phosphorus trichloride (PCl3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water (H2O)",
                "Carbon dioxide (CO2)",
                "Nitrous oxide (N2O)",
                "Sulfur dioxide (SO2)",
                "Hydrogen peroxide (H2O2)",
                "Ozone (O3)",
                "Sulfur trioxide (SO3)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Hydrogen sulfide (H2S)",
                "Ammonia (NH3)",
                "Hydrogen cyanide (HCN)",
                "Chlorine gas (Cl2)",
                "Bromine gas (Br2)",
                "Iodine gas (I2)",
                "Sulfur hexafluoride (SF6)",
                "Phosphine (PH3)",
                "Boron trifluoride (BF3)",
                "Silane (SiH4)",
                "Germane (GeH4)",
                "Arsine (AsH3)",
                "Stibine (SbH3)",
                "Phosphorus trichloride (PCl3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ammonia (NH3)",
                "Arsine (AsH3)",
                "Boron trifluoride (BF3)",
                "Bromine gas (Br2)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Chlorine gas (Cl2)",
                "Germane (GeH4)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen peroxide (H2O2)",
                "Hydrogen sulfide (H2S)",
                "Iodine gas (I2)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Phosphine (PH3)",
                "Phosphorus trichloride (PCl3)",
                "Silane (SiH4)",
                "Stibine (SbH3)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)",
                "Sulfur trioxide (SO3)",
                "Water (H2O)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon dioxide (CO\u2082)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO\u2082)",
            "Sulfur dioxide (SO\u2082)",
            "Hydrogen chloride (HCl)",
            "Phosgene (COCl\u2082)",
            "Chlorine dioxide (ClO\u2082)",
            "Formaldehyde (CH\u2082O)",
            "Acetaldehyde (C\u2082H\u2084O)",
            "Dimethyl ether (CH\u2083OCH\u2083)",
            "Ethylene oxide (C\u2082H\u2084O)",
            "Diethyl ether ((C\u2082H\u2085)\u2082O)",
            "Propylene oxide (C\u2083H\u2086O)",
            "Hydrogen cyanide (HCN)",
            "Oxygen difluoride (OF\u2082)",
            "Carbonyl sulfide (OCS)",
            "Acrolein (C\u2083H\u2084O)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Carbonyl fluoride (COF\u2082)",
            "Chloromethane (CH\u2083Cl)",
            "Dichlorine monoxide (Cl\u2082O)",
            "Methyl chloroform (CH\u2083CCl\u2083)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon dioxide (CO\u2082)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO\u2082)",
                "Sulfur dioxide (SO\u2082)",
                "Hydrogen chloride (HCl)",
                "Phosgene (COCl\u2082)",
                "Chlorine dioxide (ClO\u2082)",
                "Formaldehyde (CH\u2082O)",
                "Acetaldehyde (C\u2082H\u2084O)",
                "Dimethyl ether (CH\u2083OCH\u2083)",
                "Ethylene oxide (C\u2082H\u2084O)",
                "Diethyl ether ((C\u2082H\u2085)\u2082O)",
                "Propylene oxide (C\u2083H\u2086O)",
                "Hydrogen cyanide (HCN)",
                "Oxygen difluoride (OF\u2082)",
                "Carbonyl sulfide (OCS)",
                "Acrolein (C\u2083H\u2084O)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Carbonyl fluoride (COF\u2082)",
                "Chloromethane (CH\u2083Cl)",
                "Dichlorine monoxide (Cl\u2082O)",
                "Methyl chloroform (CH\u2083CCl\u2083)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde (C\u2082H\u2084O)",
                "Acrolein (C\u2083H\u2084O)",
                "Carbon dioxide (CO\u2082)",
                "Carbon monoxide (CO)",
                "Carbonyl fluoride (COF\u2082)",
                "Carbonyl sulfide (OCS)",
                "Chlorine dioxide (ClO\u2082)",
                "Chloromethane (CH\u2083Cl)",
                "Dichlorine monoxide (Cl\u2082O)",
                "Diethyl ether ((C\u2082H\u2085)\u2082O)",
                "Dimethyl ether (CH\u2083OCH\u2083)",
                "Ethylene oxide (C\u2082H\u2084O)",
                "Formaldehyde (CH\u2082O)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen iodide (HI)",
                "Methyl chloroform (CH\u2083CCl\u2083)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO\u2082)",
                "Oxygen difluoride (OF\u2082)",
                "Phosgene (COCl\u2082)",
                "Propylene oxide (C\u2083H\u2086O)",
                "Sulfur dioxide (SO\u2082)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon dioxide (CO2)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Sulfur dioxide (SO2)",
            "Nitrous oxide (N2O)",
            "Ozone (O3)",
            "Hydrogen sulfide (H2S)",
            "Chlorine dioxide (ClO2)",
            "Nitrogen trioxide (NO3)",
            "Dinitrogen tetroxide (N2O4)",
            "Sulfur trioxide (SO3)",
            "Carbonyl sulfide (COS)",
            "Phosgene (COCl2)",
            "Hydrogen cyanide (HCN)",
            "Formaldehyde (CH2O)",
            "Acetaldehyde (C2H4O)",
            "Methyl formate (C2H4O2)",
            "Ethylene oxide (C2H4O)",
            "Propylene oxide (C3H6O)",
            "Dimethyl ether (C2H6O)",
            "Methyl chloride (CH3Cl)",
            "Ethyl chloride (C2H5Cl)",
            "Vinyl chloride (C2H3Cl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Sulfur dioxide (SO2)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Hydrogen sulfide (H2S)",
                "Chlorine dioxide (ClO2)",
                "Nitrogen trioxide (NO3)",
                "Dinitrogen tetroxide (N2O4)",
                "Sulfur trioxide (SO3)",
                "Carbonyl sulfide (COS)",
                "Phosgene (COCl2)",
                "Hydrogen cyanide (HCN)",
                "Formaldehyde (CH2O)",
                "Acetaldehyde (C2H4O)",
                "Methyl formate (C2H4O2)",
                "Ethylene oxide (C2H4O)",
                "Propylene oxide (C3H6O)",
                "Dimethyl ether (C2H6O)",
                "Methyl chloride (CH3Cl)",
                "Ethyl chloride (C2H5Cl)",
                "Vinyl chloride (C2H3Cl)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaldehyde (C2H4O)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Carbonyl sulfide (COS)",
                "Chlorine dioxide (ClO2)",
                "Dimethyl ether (C2H6O)",
                "Dinitrogen tetroxide (N2O4)",
                "Ethyl chloride (C2H5Cl)",
                "Ethylene oxide (C2H4O)",
                "Formaldehyde (CH2O)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen sulfide (H2S)",
                "Methyl chloride (CH3Cl)",
                "Methyl formate (C2H4O2)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Nitrogen trioxide (NO3)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Phosgene (COCl2)",
                "Propylene oxide (C3H6O)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Vinyl chloride (C2H3Cl)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon monoxide (CO)",
            "Carbon dioxide (CO2)",
            "Nitrogen dioxide (NO2)",
            "Nitric oxide (NO)",
            "Nitrous oxide (N2O)",
            "Sulfur dioxide (SO2)",
            "Sulfur trioxide (SO3)",
            "Hydrogen sulfide (H2S)",
            "Carbonyl sulfide (COS)",
            "Phosgene (COCl2)",
            "Phosphine (PH3)",
            "Ammonia (NH3)",
            "Hydrogen cyanide (HCN)",
            "Hydrogen chloride (HCl)",
            "Hydrogen fluoride (HF)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Chlorine monoxide (ClO)",
            "Bromine monoxide (BrO)",
            "Iodine monoxide (IO)",
            "Dinitrogen monoxide (N2O)",
            "Nitrogen trichloride (NCl3)",
            "Phosphorus pentafluoride (PF5)",
            "Sulfuryl fluoride (SO2F2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon monoxide (CO)",
                "Carbon dioxide (CO2)",
                "Nitrogen dioxide (NO2)",
                "Nitric oxide (NO)",
                "Nitrous oxide (N2O)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Hydrogen sulfide (H2S)",
                "Carbonyl sulfide (COS)",
                "Phosgene (COCl2)",
                "Phosphine (PH3)",
                "Ammonia (NH3)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen chloride (HCl)",
                "Hydrogen fluoride (HF)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Chlorine monoxide (ClO)",
                "Bromine monoxide (BrO)",
                "Iodine monoxide (IO)",
                "Dinitrogen monoxide (N2O)",
                "Nitrogen trichloride (NCl3)",
                "Phosphorus pentafluoride (PF5)",
                "Sulfuryl fluoride (SO2F2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ammonia (NH3)",
                "Bromine monoxide (BrO)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Carbonyl sulfide (COS)",
                "Chlorine monoxide (ClO)",
                "Dinitrogen monoxide (N2O)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen fluoride (HF)",
                "Hydrogen iodide (HI)",
                "Hydrogen sulfide (H2S)",
                "Iodine monoxide (IO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Nitrogen trichloride (NCl3)",
                "Nitrous oxide (N2O)",
                "Phosgene (COCl2)",
                "Phosphine (PH3)",
                "Phosphorus pentafluoride (PF5)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Sulfuryl fluoride (SO2F2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "carbon dioxide (CO2)",
            "nitrous oxide (N2O)",
            "nitric oxide (NO)",
            "carbon monoxide (CO)",
            "sulfur dioxide (SO2)",
            "ozone (O3)",
            "nitrogen dioxide (NO2)",
            "chlorine monoxide (ClO)",
            "bromine monoxide (BrO)",
            "iodine monoxide (IO)",
            "xenon oxides (XeO3, XeO4)",
            "krypton oxides (KrO3, KrO4)",
            "radon oxides (RnO3, RnO4)",
            "nitrogen trioxide (N2O3)",
            "dinitrogen tetroxide (N2O4)",
            "nitrogen pentoxide (N2O5)",
            "sulfur trioxide (SO3)",
            "carbonyl fluoride (COF2)",
            "carbonyl chloride (COCl2)",
            "carbonyl bromide (COBr2)",
            "carbonyl iodide (COI2)",
            "hypofluorous acid (HOF)",
            "hypochlorous acid (HOCl)",
            "hypobromous acid (HOBr)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "carbon dioxide (CO2)",
                "nitrous oxide (N2O)",
                "nitric oxide (NO)",
                "carbon monoxide (CO)",
                "sulfur dioxide (SO2)",
                "ozone (O3)",
                "nitrogen dioxide (NO2)",
                "chlorine monoxide (ClO)",
                "bromine monoxide (BrO)",
                "iodine monoxide (IO)",
                "xenon oxides (XeO3, XeO4)",
                "krypton oxides (KrO3, KrO4)",
                "radon oxides (RnO3, RnO4)",
                "nitrogen trioxide (N2O3)",
                "dinitrogen tetroxide (N2O4)",
                "nitrogen pentoxide (N2O5)",
                "sulfur trioxide (SO3)",
                "carbonyl fluoride (COF2)",
                "carbonyl chloride (COCl2)",
                "carbonyl bromide (COBr2)",
                "carbonyl iodide (COI2)",
                "hypofluorous acid (HOF)",
                "hypochlorous acid (HOCl)",
                "hypobromous acid (HOBr)"
            ],
            "mismatches": [],
            "true_referents": [
                "bromine monoxide (BrO)",
                "carbon dioxide (CO2)",
                "carbon monoxide (CO)",
                "carbonyl bromide (COBr2)",
                "carbonyl chloride (COCl2)",
                "carbonyl fluoride (COF2)",
                "carbonyl iodide (COI2)",
                "chlorine monoxide (ClO)",
                "dinitrogen tetroxide (N2O4)",
                "hypobromous acid (HOBr)",
                "hypochlorous acid (HOCl)",
                "hypofluorous acid (HOF)",
                "iodine monoxide (IO)",
                "krypton oxides (KrO3, KrO4)",
                "nitric oxide (NO)",
                "nitrogen dioxide (NO2)",
                "nitrogen pentoxide (N2O5)",
                "nitrogen trioxide (N2O3)",
                "nitrous oxide (N2O)",
                "ozone (O3)",
                "radon oxides (RnO3, RnO4)",
                "sulfur dioxide (SO2)",
                "sulfur trioxide (SO3)",
                "xenon oxides (XeO3, XeO4)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Carbon dioxide (CO2)",
            "Nitrous oxide (N2O)",
            "Sulfur dioxide (SO2)",
            "Hydrogen peroxide (H2O2)",
            "Methane (CH4)",
            "Ethylene (C2H4)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Ammonia (NH3)",
            "Hydrogen sulfide (H2S)",
            "Ozone (O3)",
            "Nitrogen dioxide (NO2)",
            "Chlorine (Cl2)",
            "Fluorine (F2)",
            "Krypton (Kr)",
            "Xenon (Xe)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon dioxide (CO2)",
                "Nitrous oxide (N2O)",
                "Sulfur dioxide (SO2)",
                "Hydrogen peroxide (H2O2)",
                "Methane (CH4)",
                "Ethylene (C2H4)",
                "Propane (C3H8)",
                "Butane (C4H10)",
                "Ammonia (NH3)",
                "Hydrogen sulfide (H2S)",
                "Ozone (O3)",
                "Nitrogen dioxide (NO2)",
                "Chlorine (Cl2)",
                "Fluorine (F2)",
                "Krypton (Kr)",
                "Xenon (Xe)"
            ],
            "mismatches": [],
            "true_referents": [
                "Ammonia (NH3)",
                "Butane (C4H10)",
                "Carbon dioxide (CO2)",
                "Chlorine (Cl2)",
                "Ethylene (C2H4)",
                "Fluorine (F2)",
                "Hydrogen peroxide (H2O2)",
                "Hydrogen sulfide (H2S)",
                "Krypton (Kr)",
                "Methane (CH4)",
                "Nitrogen dioxide (NO2)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Propane (C3H8)",
                "Sulfur dioxide (SO2)",
                "Xenon (Xe)"
            ],
            "TP": 16,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "Oxygen (O2)",
            "Nitrogen dioxide (NO2)",
            "Nitric oxide (NO)",
            "Carbon dioxide (CO2)",
            "Nitrous oxide (N2O)",
            "Ozone (O3)",
            "Hydrogen peroxide (H2O2)",
            "Hydrogen sulfide (H2S)",
            "Sulfur dioxide (SO2)",
            "Phosgene (COCl2)",
            "Carbon monoxide (CO)",
            "Nitrogen trioxide (N2O3)",
            "Dinitrogen trioxide (N2O3)",
            "Nitrogen pentoxide (N2O5)",
            "Phosphorus pentoxide (P2O5)",
            "Sulfur trioxide (SO3)",
            "Sulfuric acid (H2SO4)",
            "Nitric acid (HNO3)",
            "Nitrous acid (HNO2)",
            "Hydrogen carbonate (HCO3)",
            "Carbonic acid (H2CO3)",
            "Sulfurous acid (H2SO3)",
            "Nitrous oxide (N2O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Oxygen (O2)",
                "Nitrogen dioxide (NO2)",
                "Nitric oxide (NO)",
                "Carbon dioxide (CO2)",
                "Nitrous oxide (N2O)",
                "Ozone (O3)",
                "Hydrogen peroxide (H2O2)",
                "Hydrogen sulfide (H2S)",
                "Sulfur dioxide (SO2)",
                "Phosgene (COCl2)",
                "Carbon monoxide (CO)",
                "Nitrogen trioxide (N2O3)",
                "Dinitrogen trioxide (N2O3)",
                "Nitrogen pentoxide (N2O5)",
                "Phosphorus pentoxide (P2O5)",
                "Sulfur trioxide (SO3)",
                "Sulfuric acid (H2SO4)",
                "Nitric acid (HNO3)",
                "Nitrous acid (HNO2)",
                "Hydrogen carbonate (HCO3)",
                "Carbonic acid (H2CO3)",
                "Sulfurous acid (H2SO3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Carbonic acid (H2CO3)",
                "Dinitrogen trioxide (N2O3)",
                "Hydrogen carbonate (HCO3)",
                "Hydrogen peroxide (H2O2)",
                "Hydrogen sulfide (H2S)",
                "Nitric acid (HNO3)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Nitrogen pentoxide (N2O5)",
                "Nitrogen trioxide (N2O3)",
                "Nitrous acid (HNO2)",
                "Nitrous oxide (N2O)",
                "Oxygen (O2)",
                "Ozone (O3)",
                "Phosgene (COCl2)",
                "Phosphorus pentoxide (P2O5)",
                "Sulfur dioxide (SO2)",
                "Sulfur trioxide (SO3)",
                "Sulfuric acid (H2SO4)",
                "Sulfurous acid (H2SO3)"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": [
            "ozone (O3)",
            "nitrous oxide (N2O)",
            "sulfur dioxide (SO2)",
            "nitrogen dioxide (NO2)",
            "carbon dioxide (CO2)",
            "sulfur trioxide (SO3)",
            "dichlorine oxide (Cl2O)",
            "phosgene (COCl2)",
            "dichlorine monoxide (Cl2O)",
            "trichlorine oxide (Cl2O3)",
            "tetrachlorine oxide (Cl2O4)",
            "chlorine dioxide (ClO2)",
            "bromine dioxide (BrO2)",
            "iodine dioxide (IO2)",
            "krypton oxide (KrO2)",
            "xenon oxide (XeO2)",
            "radon oxide (RnO2)",
            "oxygen difluoride (OF2)",
            "sulfuryl fluoride (SO2F2)",
            "thionyl fluoride (SOF2)",
            "carbonyl fluoride (COF2)",
            "nitryl fluoride (NO2F)",
            "nitrosyl fluoride (NOF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ozone (O3)",
                "nitrous oxide (N2O)",
                "sulfur dioxide (SO2)",
                "nitrogen dioxide (NO2)",
                "carbon dioxide (CO2)",
                "sulfur trioxide (SO3)",
                "dichlorine oxide (Cl2O)",
                "phosgene (COCl2)",
                "dichlorine monoxide (Cl2O)",
                "trichlorine oxide (Cl2O3)",
                "tetrachlorine oxide (Cl2O4)",
                "chlorine dioxide (ClO2)",
                "bromine dioxide (BrO2)",
                "iodine dioxide (IO2)",
                "krypton oxide (KrO2)",
                "xenon oxide (XeO2)",
                "radon oxide (RnO2)",
                "oxygen difluoride (OF2)",
                "sulfuryl fluoride (SO2F2)",
                "thionyl fluoride (SOF2)",
                "carbonyl fluoride (COF2)",
                "nitryl fluoride (NO2F)",
                "nitrosyl fluoride (NOF)"
            ],
            "mismatches": [],
            "true_referents": [
                "bromine dioxide (BrO2)",
                "carbon dioxide (CO2)",
                "carbonyl fluoride (COF2)",
                "chlorine dioxide (ClO2)",
                "dichlorine monoxide (Cl2O)",
                "dichlorine oxide (Cl2O)",
                "iodine dioxide (IO2)",
                "krypton oxide (KrO2)",
                "nitrogen dioxide (NO2)",
                "nitrosyl fluoride (NOF)",
                "nitrous oxide (N2O)",
                "nitryl fluoride (NO2F)",
                "oxygen difluoride (OF2)",
                "ozone (O3)",
                "phosgene (COCl2)",
                "radon oxide (RnO2)",
                "sulfur dioxide (SO2)",
                "sulfur trioxide (SO3)",
                "sulfuryl fluoride (SO2F2)",
                "tetrachlorine oxide (Cl2O4)",
                "thionyl fluoride (SOF2)",
                "trichlorine oxide (Cl2O3)",
                "xenon oxide (XeO2)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "gaseous oxide",
        "domain": "chemistry",
        "response": "{\"name\": \"water\", \"abbrev\": \"H2O\"}\n{\"name\": \"carbon dioxide\", \"abbrev\": \"CO2\"}\n{\"name\": \"hydrogen peroxide\", \"abbrev\": \"H2O2\"}\n{\"name\": \"hydrogen sulfide\", \"abbrev\": \"H2S\"}\n{\"name\": \"nitrogen oxide\", \"abbrev\": \"NO\"}\n{\"name\": \"nitrogen dioxide\", \"abbrev\": \"NO2\"}\n{\"name\": \"sulfur dioxide\", \"abbrev\": \"SO2\"}\n{\"name\": \"hydrogen fluoride\", \"abbrev\": \"HF\"}\n{\"name\": \"hydrogen chloride\", \"abbrev\": \"HCl\"}\n{\"name\": \"hydrogen bromide\", \"abbrev\": \"HBr\"}\n{\"name\": \"hydrogen iodide\", \"abbrev\": \"HI\"}",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]