[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "hydrogen (H2)",
            "helium (He)",
            "nitrogen (N2)",
            "oxygen (O2)",
            "fluorine (F2)",
            "chlorine (Cl2)",
            "bromine (Br2)",
            "iodine (I2)",
            "neon (Ne)",
            "argon (Ar)",
            "krypton (Kr)",
            "xenon (Xe)",
            "radon (Rn)",
            "methane (CH4)",
            "ethane (C2H6)",
            "propane (C3H8)",
            "butane (C4H10)",
            "pentane (C5H12)",
            "hexane (C6H14)",
            "heptane (C7H16)",
            "octane (C8H18)",
            "nonane (C9H20)",
            "decane (C10H22)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "hydrogen (H2)",
                "helium (He)",
                "nitrogen (N2)",
                "oxygen (O2)",
                "fluorine (F2)",
                "chlorine (Cl2)",
                "bromine (Br2)",
                "iodine (I2)",
                "neon (Ne)",
                "argon (Ar)",
                "krypton (Kr)",
                "xenon (Xe)",
                "radon (Rn)",
                "methane (CH4)",
                "ethane (C2H6)",
                "propane (C3H8)",
                "butane (C4H10)",
                "pentane (C5H12)",
                "hexane (C6H14)",
                "heptane (C7H16)",
                "octane (C8H18)",
                "nonane (C9H20)",
                "decane (C10H22)"
            ],
            "mismatches": [],
            "true_referents": [
                "argon (Ar)",
                "bromine (Br2)",
                "butane (C4H10)",
                "chlorine (Cl2)",
                "decane (C10H22)",
                "ethane (C2H6)",
                "fluorine (F2)",
                "helium (He)",
                "heptane (C7H16)",
                "hexane (C6H14)",
                "hydrogen (H2)",
                "iodine (I2)",
                "krypton (Kr)",
                "methane (CH4)",
                "neon (Ne)",
                "nitrogen (N2)",
                "nonane (C9H20)",
                "octane (C8H18)",
                "oxygen (O2)",
                "pentane (C5H12)",
                "propane (C3H8)",
                "radon (Rn)",
                "xenon (Xe)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Dioxygen (O2)",
            "Dinitrogen (N2)",
            "Dihydrogen (H2)",
            "Deuterium (D2)",
            "Helium dimer (He2)",
            "Neon dimer (Ne2)",
            "Argon dimer (Ar2)",
            "Krypton dimer (Kr2)",
            "Xenon dimer (Xe2)",
            "Ozone (O3) - Note: Ozone is actually a triatomic molecule, but it contains O2 units",
            "Nitric oxide (NO)",
            "Carbon monoxide (CO)",
            "Fluorine (F2)",
            "Chlorine (Cl2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Hydrogen chloride (HCl)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Sulfur dioxide (SO2) - Note: SO2 is a triatomic molecule, but it contains an S=O double bond similar to diatomic gases",
            "Phosphorus trifluoride (PF3) - Note: PF3 is not diatomic, but it contains a P-F bond similar to diatomic gases",
            "Sulfur monochloride (SCl)",
            "Selenium monochloride (SeCl)",
            "Boron monofluoride (BF)",
            "Silicon monofluoride (SiF)",
            "Germanium monofluoride (GeF)",
            "Carbon disulfide (CS2) - Note: CS2 is not diatomic, but it contains C=S bonds similar to diatomic gases"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dioxygen (O2)",
                "Dinitrogen (N2)",
                "Dihydrogen (H2)",
                "Deuterium (D2)",
                "Helium dimer (He2)",
                "Neon dimer (Ne2)",
                "Argon dimer (Ar2)",
                "Krypton dimer (Kr2)",
                "Xenon dimer (Xe2)",
                "Ozone (O3) - Note: Ozone is actually a triatomic molecule, but it contains O2 units",
                "Nitric oxide (NO)",
                "Carbon monoxide (CO)",
                "Fluorine (F2)",
                "Chlorine (Cl2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Hydrogen chloride (HCl)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Sulfur dioxide (SO2) - Note: SO2 is a triatomic molecule, but it contains an S=O double bond similar to diatomic gases",
                "Phosphorus trifluoride (PF3) - Note: PF3 is not diatomic, but it contains a P-F bond similar to diatomic gases",
                "Sulfur monochloride (SCl)",
                "Selenium monochloride (SeCl)",
                "Boron monofluoride (BF)"
            ],
            "mismatches": [
                "Silicon monofluoride (SiF)",
                "Germanium monofluoride (GeF)",
                "Carbon disulfide (CS2) - Note: CS2 is not diatomic, but it contains C=S bonds similar to diatomic gases"
            ],
            "true_referents": [
                "Argon dimer (Ar2)",
                "Boron monofluoride (BF)",
                "Bromine (Br2)",
                "Carbon monoxide (CO)",
                "Chlorine (Cl2)",
                "Deuterium (D2)",
                "Dihydrogen (H2)",
                "Dinitrogen (N2)",
                "Dioxygen (O2)",
                "Fluorine (F2)",
                "Helium dimer (He2)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen iodide (HI)",
                "Iodine (I2)",
                "Krypton dimer (Kr2)",
                "Neon dimer (Ne2)",
                "Nitric oxide (NO)",
                "Ozone (O3) - Note: Ozone is actually a triatomic molecule, but it contains O2 units",
                "Phosphorus trifluoride (PF3) - Note: PF3 is not diatomic, but it contains a P-F bond similar to diatomic gases",
                "Selenium monochloride (SeCl)",
                "Sulfur dioxide (SO2) - Note: SO2 is a triatomic molecule, but it contains an S=O double bond similar to diatomic gases",
                "Sulfur monochloride (SCl)",
                "Xenon dimer (Xe2)"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Nitrogen (N2)",
            "Oxygen (O2)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Hydrogen chloride (HCl)",
            "Carbon disulfide (CS2)",
            "Hydrogen sulfide (H2S)",
            "Sulfur dioxide (SO2)",
            "Sulfur monoxide (SO)",
            "Fluorine (F2)",
            "Chlorine (Cl2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Ozone (O3)",
            "Sulfur hexafluoride (SF6)",
            "Phosphorus trichloride (PCl3)",
            "Phosphorus pentachloride (PCl5)",
            "Dinitrogen monoxide (N2O)",
            "Dinitrogen trioxide (N2O3)",
            "Dinitrogen tetraoxide (N2O4)",
            "Dinitrogen pentoxide (N2O5)",
            "Hydrogen cyanide (HCN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Nitrogen (N2)",
                "Oxygen (O2)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Hydrogen chloride (HCl)",
                "Carbon disulfide (CS2)",
                "Hydrogen sulfide (H2S)",
                "Sulfur dioxide (SO2)",
                "Sulfur monoxide (SO)",
                "Fluorine (F2)",
                "Chlorine (Cl2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Ozone (O3)",
                "Sulfur hexafluoride (SF6)",
                "Phosphorus trichloride (PCl3)",
                "Phosphorus pentachloride (PCl5)",
                "Dinitrogen monoxide (N2O)",
                "Dinitrogen trioxide (N2O3)",
                "Dinitrogen tetraoxide (N2O4)",
                "Dinitrogen pentoxide (N2O5)",
                "Hydrogen cyanide (HCN)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bromine (Br2)",
                "Carbon disulfide (CS2)",
                "Carbon monoxide (CO)",
                "Chlorine (Cl2)",
                "Dinitrogen monoxide (N2O)",
                "Dinitrogen pentoxide (N2O5)",
                "Dinitrogen tetraoxide (N2O4)",
                "Dinitrogen trioxide (N2O3)",
                "Fluorine (F2)",
                "Hydrogen (H2)",
                "Hydrogen chloride (HCl)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen sulfide (H2S)",
                "Iodine (I2)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Nitrogen dioxide (NO2)",
                "Oxygen (O2)",
                "Ozone (O3)",
                "Phosphorus pentachloride (PCl5)",
                "Phosphorus trichloride (PCl3)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)",
                "Sulfur monoxide (SO)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen Gas (H\u2082)",
            "Helium (He)",
            "Nitrogen Gas (N\u2082)",
            "Oxygen Gas (O\u2082)",
            "Fluorine (F\u2082)",
            "Chlorine (Cl\u2082)",
            "Bromine (Br\u2082)",
            "Iodine (I\u2082)",
            "Carbon Monoxide (CO)",
            "Nitric Oxide (NO)",
            "Hydrogen Chloride (HCl)",
            "Hydrogen Fluoride (HF)",
            "Carbonyl Difluoride (COF\u2082)",
            "Carbonyl Chloride (COCl\u2082)",
            "Hydrogen Bromide (HBr)",
            "Hydrogen Iodide (HI)",
            "Phosphorus Monofluoride (PF)",
            "Phosphorus Monochloride (PCl)",
            "Sulfur Monofluoride (SF)",
            "Sulfur Monochloride (SCl)",
            "Vanadium Monofluoride (VF)",
            "Titanium Monochloride (TiCl)",
            "Molybdenum Monoxide (MoO)",
            "Tungsten Monofluoride (WF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen Gas (H\u2082)",
                "Helium (He)",
                "Nitrogen Gas (N\u2082)",
                "Oxygen Gas (O\u2082)",
                "Fluorine (F\u2082)",
                "Chlorine (Cl\u2082)",
                "Bromine (Br\u2082)",
                "Iodine (I\u2082)",
                "Carbon Monoxide (CO)",
                "Nitric Oxide (NO)",
                "Hydrogen Chloride (HCl)",
                "Hydrogen Fluoride (HF)",
                "Carbonyl Difluoride (COF\u2082)",
                "Carbonyl Chloride (COCl\u2082)",
                "Hydrogen Bromide (HBr)",
                "Hydrogen Iodide (HI)",
                "Phosphorus Monofluoride (PF)",
                "Phosphorus Monochloride (PCl)",
                "Sulfur Monofluoride (SF)",
                "Sulfur Monochloride (SCl)",
                "Vanadium Monofluoride (VF)",
                "Titanium Monochloride (TiCl)",
                "Molybdenum Monoxide (MoO)",
                "Tungsten Monofluoride (WF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bromine (Br\u2082)",
                "Carbon Monoxide (CO)",
                "Carbonyl Chloride (COCl\u2082)",
                "Carbonyl Difluoride (COF\u2082)",
                "Chlorine (Cl\u2082)",
                "Fluorine (F\u2082)",
                "Helium (He)",
                "Hydrogen Bromide (HBr)",
                "Hydrogen Chloride (HCl)",
                "Hydrogen Fluoride (HF)",
                "Hydrogen Gas (H\u2082)",
                "Hydrogen Iodide (HI)",
                "Iodine (I\u2082)",
                "Molybdenum Monoxide (MoO)",
                "Nitric Oxide (NO)",
                "Nitrogen Gas (N\u2082)",
                "Oxygen Gas (O\u2082)",
                "Phosphorus Monochloride (PCl)",
                "Phosphorus Monofluoride (PF)",
                "Sulfur Monochloride (SCl)",
                "Sulfur Monofluoride (SF)",
                "Titanium Monochloride (TiCl)",
                "Tungsten Monofluoride (WF)",
                "Vanadium Monofluoride (VF)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Oxygen (O2)",
            "Nitrogen (N2)",
            "Fluorine (F2)",
            "Chlorine (Cl2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Hydrogen chloride (HCl)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Hydrogen fluoride (HF)",
            "Deuterium (D2)",
            "Phosgene (COCl2)",
            "Cyanogen (C2N2)",
            "Hydrogen sulfide (H2S)",
            "Dinitrogen monoxide (N2O)",
            "Carbonyl fluoride (COF2)",
            "Nitrosyl chloride (NOCl)",
            "Boron trifluoride (BF3)",
            "Sulfur dioxide (SO2)",
            "Sulfur hexafluoride (SF6)",
            "Phosphine (PH3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Oxygen (O2)",
                "Nitrogen (N2)",
                "Fluorine (F2)",
                "Chlorine (Cl2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Hydrogen chloride (HCl)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Hydrogen fluoride (HF)",
                "Deuterium (D2)",
                "Phosgene (COCl2)",
                "Cyanogen (C2N2)",
                "Hydrogen sulfide (H2S)",
                "Dinitrogen monoxide (N2O)",
                "Carbonyl fluoride (COF2)",
                "Nitrosyl chloride (NOCl)",
                "Boron trifluoride (BF3)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)",
                "Phosphine (PH3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Boron trifluoride (BF3)",
                "Bromine (Br2)",
                "Carbon monoxide (CO)",
                "Carbonyl fluoride (COF2)",
                "Chlorine (Cl2)",
                "Cyanogen (C2N2)",
                "Deuterium (D2)",
                "Dinitrogen monoxide (N2O)",
                "Fluorine (F2)",
                "Hydrogen (H2)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen fluoride (HF)",
                "Hydrogen iodide (HI)",
                "Hydrogen sulfide (H2S)",
                "Iodine (I2)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Nitrosyl chloride (NOCl)",
                "Oxygen (O2)",
                "Phosgene (COCl2)",
                "Phosphine (PH3)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Oxygen (O2)",
            "Nitrogen (N2)",
            "Fluorine (F2)",
            "Neon (Ne)",
            "Chlorine (Cl2)",
            "Argon (Ar)",
            "Krypton (Kr)",
            "Xenon (Xe)",
            "Radon (Rn)",
            "Carbon monoxide (CO)",
            "Hydrogen chloride (HCl)",
            "Nitric oxide (NO)",
            "Sulfur dioxide (SO2)",
            "Ammonia (NH3)",
            "Ethylene (C2H4)",
            "Acetylene (C2H2)",
            "Phosgene (COCl2)",
            "Dinitrogen (N2O)",
            "Carbon disulfide (CS2)",
            "Hydrogen sulfide (H2S)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Hydrogen bromide (HBr)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Oxygen (O2)",
                "Nitrogen (N2)",
                "Fluorine (F2)",
                "Neon (Ne)",
                "Chlorine (Cl2)",
                "Argon (Ar)",
                "Krypton (Kr)",
                "Xenon (Xe)",
                "Radon (Rn)",
                "Carbon monoxide (CO)",
                "Hydrogen chloride (HCl)",
                "Nitric oxide (NO)",
                "Sulfur dioxide (SO2)",
                "Ammonia (NH3)",
                "Ethylene (C2H4)",
                "Acetylene (C2H2)",
                "Phosgene (COCl2)",
                "Carbon disulfide (CS2)",
                "Hydrogen sulfide (H2S)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Hydrogen bromide (HBr)"
            ],
            "mismatches": [
                "Dinitrogen (N2O)"
            ],
            "true_referents": [
                "Acetylene (C2H2)",
                "Ammonia (NH3)",
                "Argon (Ar)",
                "Bromine (Br2)",
                "Carbon disulfide (CS2)",
                "Carbon monoxide (CO)",
                "Chlorine (Cl2)",
                "Dinitrogen (N2O)",
                "Ethylene (C2H4)",
                "Fluorine (F2)",
                "Hydrogen (H2)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen sulfide (H2S)",
                "Iodine (I2)",
                "Krypton (Kr)",
                "Neon (Ne)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Oxygen (O2)",
                "Phosgene (COCl2)",
                "Radon (Rn)",
                "Sulfur dioxide (SO2)",
                "Xenon (Xe)"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Oxygen (O2)",
            "Nitrogen (N2)",
            "Chlorine (Cl2)",
            "Fluorine (F2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Radon (Rn)",
            "Neon (Ne)",
            "Helium (He)",
            "Argon (Ar)",
            "Krypton (Kr)",
            "Xenon (Xe)",
            "Nitric oxide (NO)",
            "Carbon monoxide (CO)",
            "Nitrous oxide (N2O)",
            "Ammonia (NH3)",
            "Methane (CH4)",
            "Ethylene (C2H4)",
            "Acetylene (C2H2)",
            "Hydrogen chloride (HCl)",
            "Sulfur dioxide (SO2)",
            "Carbon dioxide (CO2)",
            "Hydrogen sulfide (H2S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Oxygen (O2)",
                "Nitrogen (N2)",
                "Chlorine (Cl2)",
                "Fluorine (F2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Radon (Rn)",
                "Neon (Ne)",
                "Helium (He)",
                "Argon (Ar)",
                "Krypton (Kr)",
                "Xenon (Xe)",
                "Nitric oxide (NO)",
                "Carbon monoxide (CO)",
                "Nitrous oxide (N2O)",
                "Ammonia (NH3)",
                "Methane (CH4)",
                "Ethylene (C2H4)",
                "Acetylene (C2H2)",
                "Hydrogen chloride (HCl)",
                "Sulfur dioxide (SO2)",
                "Carbon dioxide (CO2)",
                "Hydrogen sulfide (H2S)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C2H2)",
                "Ammonia (NH3)",
                "Argon (Ar)",
                "Bromine (Br2)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Chlorine (Cl2)",
                "Ethylene (C2H4)",
                "Fluorine (F2)",
                "Helium (He)",
                "Hydrogen (H2)",
                "Hydrogen chloride (HCl)",
                "Hydrogen sulfide (H2S)",
                "Iodine (I2)",
                "Krypton (Kr)",
                "Methane (CH4)",
                "Neon (Ne)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Nitrous oxide (N2O)",
                "Oxygen (O2)",
                "Radon (Rn)",
                "Sulfur dioxide (SO2)",
                "Xenon (Xe)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Oxygen (O2)",
            "Nitrogen (N2)",
            "Fluorine (F2)",
            "Chlorine (Cl2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Carbon monoxide (CO)",
            "Sulfur dioxide (SO2)",
            "Hydrogen chloride (HCl)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Hydrogen fluoride (HF)",
            "Nitric oxide (NO)",
            "Nitrogen dioxide (NO2)",
            "Sulfur hexafluoride (SF6)",
            "Phosphine (PH3)",
            "Methane (CH4)",
            "Ethylene (C2H4)",
            "Propylene (C3H6)",
            "Acetylene (C2H2)",
            "Butadiene (C4H6)",
            "Ammonia (NH3)",
            "Carbon dioxide (CO2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Oxygen (O2)",
                "Nitrogen (N2)",
                "Fluorine (F2)",
                "Chlorine (Cl2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Carbon monoxide (CO)",
                "Sulfur dioxide (SO2)",
                "Hydrogen chloride (HCl)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Hydrogen fluoride (HF)",
                "Nitric oxide (NO)",
                "Nitrogen dioxide (NO2)",
                "Sulfur hexafluoride (SF6)",
                "Phosphine (PH3)",
                "Methane (CH4)",
                "Ethylene (C2H4)",
                "Propylene (C3H6)",
                "Acetylene (C2H2)",
                "Butadiene (C4H6)",
                "Ammonia (NH3)",
                "Carbon dioxide (CO2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C2H2)",
                "Ammonia (NH3)",
                "Bromine (Br2)",
                "Butadiene (C4H6)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Chlorine (Cl2)",
                "Ethylene (C2H4)",
                "Fluorine (F2)",
                "Hydrogen (H2)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen fluoride (HF)",
                "Hydrogen iodide (HI)",
                "Iodine (I2)",
                "Methane (CH4)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Nitrogen dioxide (NO2)",
                "Oxygen (O2)",
                "Phosphine (PH3)",
                "Propylene (C3H6)",
                "Sulfur dioxide (SO2)",
                "Sulfur hexafluoride (SF6)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H\u2082)",
            "Nitrogen (N\u2082)",
            "Oxygen (O\u2082)",
            "Fluorine (F\u2082)",
            "Chlorine (Cl\u2082)",
            "Hydrogen chloride (HCl)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H\u2082)",
                "Nitrogen (N\u2082)",
                "Oxygen (O\u2082)",
                "Fluorine (F\u2082)",
                "Chlorine (Cl\u2082)",
                "Hydrogen chloride (HCl)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)"
            ],
            "mismatches": [],
            "true_referents": [
                "Carbon monoxide (CO)",
                "Chlorine (Cl\u2082)",
                "Fluorine (F\u2082)",
                "Hydrogen (H\u2082)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen iodide (HI)",
                "Nitric oxide (NO)",
                "Nitrogen (N\u2082)",
                "Oxygen (O\u2082)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H\u2082)",
            "Nitrogen (N\u2082)",
            "Oxygen (O\u2082)",
            "Fluorine (F\u2082)",
            "Chlorine (Cl\u2082)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Hydrogen chloride (HCl)",
            "Hydrogen fluoride (HF)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Nitrous oxide (N\u2082O)",
            "Nitrogen dioxide (NO\u2082)",
            "Sulfur dioxide (SO\u2082)",
            "Carbon dioxide (CO\u2082)",
            "Hydrogen sulfide (H\u2082S)",
            "Carbonyl sulfide (COS)",
            "Deuterium (D\u2082)",
            "Ozone (O\u2083)",
            "Nitric acid (HNO\u2083)",
            "Hydrogen cyanide (HCN)",
            "Acetylene (C\u2082H\u2082)",
            "Ethylene (C\u2082H\u2084)",
            "Ethane (C\u2082H\u2086)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H\u2082)",
                "Nitrogen (N\u2082)",
                "Oxygen (O\u2082)",
                "Fluorine (F\u2082)",
                "Chlorine (Cl\u2082)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Hydrogen chloride (HCl)",
                "Hydrogen fluoride (HF)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Nitrous oxide (N\u2082O)",
                "Nitrogen dioxide (NO\u2082)",
                "Sulfur dioxide (SO\u2082)",
                "Carbon dioxide (CO\u2082)",
                "Hydrogen sulfide (H\u2082S)",
                "Carbonyl sulfide (COS)",
                "Deuterium (D\u2082)",
                "Ozone (O\u2083)",
                "Nitric acid (HNO\u2083)",
                "Hydrogen cyanide (HCN)",
                "Acetylene (C\u2082H\u2082)",
                "Ethylene (C\u2082H\u2084)",
                "Ethane (C\u2082H\u2086)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C\u2082H\u2082)",
                "Carbon dioxide (CO\u2082)",
                "Carbon monoxide (CO)",
                "Carbonyl sulfide (COS)",
                "Chlorine (Cl\u2082)",
                "Deuterium (D\u2082)",
                "Ethane (C\u2082H\u2086)",
                "Ethylene (C\u2082H\u2084)",
                "Fluorine (F\u2082)",
                "Hydrogen (H\u2082)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen fluoride (HF)",
                "Hydrogen iodide (HI)",
                "Hydrogen sulfide (H\u2082S)",
                "Nitric acid (HNO\u2083)",
                "Nitric oxide (NO)",
                "Nitrogen (N\u2082)",
                "Nitrogen dioxide (NO\u2082)",
                "Nitrous oxide (N\u2082O)",
                "Oxygen (O\u2082)",
                "Ozone (O\u2083)",
                "Sulfur dioxide (SO\u2082)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Nitrogen (N2)",
            "Oxygen (O2)",
            "Fluorine (F2)",
            "Chlorine (Cl2)",
            "Bromine (Br2)",
            "Iodine (I2)",
            "Carbon monoxide (CO)",
            "Nitric oxide (NO)",
            "Hydrogen chloride (HCl)",
            "Hydrogen fluoride (HF)",
            "Hydrogen bromide (HBr)",
            "Hydrogen iodide (HI)",
            "Hydrogen sulfide (H2S)",
            "Hydrogen cyanide (HCN)",
            "Carbonyl sulfide (COS)",
            "Nitrous oxide (N2O)",
            "Carbon dioxide (CO2)",
            "Sulfur dioxide (SO2)",
            "Disulfur dichloride (S2Cl2)",
            "Cyanogen (C2N2)",
            "Dioxygen difluoride (O2F2)",
            "Chlorine monoxide (ClO)",
            "Bromine monochloride (BrCl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Nitrogen (N2)",
                "Oxygen (O2)",
                "Fluorine (F2)",
                "Chlorine (Cl2)",
                "Bromine (Br2)",
                "Iodine (I2)",
                "Carbon monoxide (CO)",
                "Nitric oxide (NO)",
                "Hydrogen chloride (HCl)",
                "Hydrogen fluoride (HF)",
                "Hydrogen bromide (HBr)",
                "Hydrogen iodide (HI)",
                "Hydrogen sulfide (H2S)",
                "Hydrogen cyanide (HCN)",
                "Carbonyl sulfide (COS)",
                "Nitrous oxide (N2O)",
                "Carbon dioxide (CO2)",
                "Sulfur dioxide (SO2)",
                "Disulfur dichloride (S2Cl2)",
                "Cyanogen (C2N2)",
                "Dioxygen difluoride (O2F2)",
                "Chlorine monoxide (ClO)",
                "Bromine monochloride (BrCl)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bromine (Br2)",
                "Bromine monochloride (BrCl)",
                "Carbon dioxide (CO2)",
                "Carbon monoxide (CO)",
                "Carbonyl sulfide (COS)",
                "Chlorine (Cl2)",
                "Chlorine monoxide (ClO)",
                "Cyanogen (C2N2)",
                "Dioxygen difluoride (O2F2)",
                "Disulfur dichloride (S2Cl2)",
                "Fluorine (F2)",
                "Hydrogen (H2)",
                "Hydrogen bromide (HBr)",
                "Hydrogen chloride (HCl)",
                "Hydrogen cyanide (HCN)",
                "Hydrogen fluoride (HF)",
                "Hydrogen iodide (HI)",
                "Hydrogen sulfide (H2S)",
                "Iodine (I2)",
                "Nitric oxide (NO)",
                "Nitrogen (N2)",
                "Nitrous oxide (N2O)",
                "Oxygen (O2)",
                "Sulfur dioxide (SO2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "carbon monoxide (CO)",
            "nitric oxide (NO)",
            "hydrogen chloride (HCl)",
            "hydrogen bromide (HBr)",
            "hydrogen iodide (HI)",
            "hydrogen fluoride (HF)",
            "nitrogen dioxide (NO2)",
            "carbon dioxide (CO2)",
            "nitrous oxide (N2O)",
            "sulfur dioxide (SO2)",
            "chlorine monoxide (ClO)",
            "chlorine dioxide (OClO)",
            "silicon monoxide (SiO)",
            "silicon dioxide (SiO2)",
            "dinitrogen monoxide (N2O)",
            "dioxygen (O2)",
            "ozone (O3)",
            "dichlorine monoxide (Cl2O)",
            "dinitrogen trioxide (N2O3)",
            "sulfur monoxide (SO)",
            "seleninyl fluoride (SeOF2)",
            "tellurium dioxide (TeO2)",
            "dichlorine dioxide (Cl2O2)",
            "dichlorine heptoxide (Cl2O7)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "carbon monoxide (CO)",
                "nitric oxide (NO)",
                "hydrogen chloride (HCl)",
                "hydrogen bromide (HBr)",
                "hydrogen iodide (HI)",
                "hydrogen fluoride (HF)",
                "nitrogen dioxide (NO2)",
                "carbon dioxide (CO2)",
                "nitrous oxide (N2O)",
                "sulfur dioxide (SO2)",
                "chlorine monoxide (ClO)",
                "chlorine dioxide (OClO)",
                "silicon monoxide (SiO)",
                "silicon dioxide (SiO2)",
                "dinitrogen monoxide (N2O)",
                "dioxygen (O2)",
                "ozone (O3)",
                "dichlorine monoxide (Cl2O)",
                "dinitrogen trioxide (N2O3)",
                "sulfur monoxide (SO)",
                "seleninyl fluoride (SeOF2)",
                "tellurium dioxide (TeO2)",
                "dichlorine dioxide (Cl2O2)",
                "dichlorine heptoxide (Cl2O7)"
            ],
            "mismatches": [],
            "true_referents": [
                "carbon dioxide (CO2)",
                "carbon monoxide (CO)",
                "chlorine dioxide (OClO)",
                "chlorine monoxide (ClO)",
                "dichlorine dioxide (Cl2O2)",
                "dichlorine heptoxide (Cl2O7)",
                "dichlorine monoxide (Cl2O)",
                "dinitrogen monoxide (N2O)",
                "dinitrogen trioxide (N2O3)",
                "dioxygen (O2)",
                "hydrogen bromide (HBr)",
                "hydrogen chloride (HCl)",
                "hydrogen fluoride (HF)",
                "hydrogen iodide (HI)",
                "nitric oxide (NO)",
                "nitrogen dioxide (NO2)",
                "nitrous oxide (N2O)",
                "ozone (O3)",
                "seleninyl fluoride (SeOF2)",
                "silicon dioxide (SiO2)",
                "silicon monoxide (SiO)",
                "sulfur dioxide (SO2)",
                "sulfur monoxide (SO)",
                "tellurium dioxide (TeO2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Hydrogen (H2)",
            "Helium (He)",
            "Neon (Ne)",
            "Argon (Ar)",
            "Krypton (Kr)",
            "Xenon (Xe)",
            "Radon (Rn)",
            "Oganesson (Og)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hydrogen (H2)",
                "Helium (He)",
                "Neon (Ne)",
                "Argon (Ar)",
                "Krypton (Kr)",
                "Xenon (Xe)",
                "Radon (Rn)",
                "Oganesson (Og)"
            ],
            "mismatches": [],
            "true_referents": [
                "Argon (Ar)",
                "Helium (He)",
                "Hydrogen (H2)",
                "Krypton (Kr)",
                "Neon (Ne)",
                "Oganesson (Og)",
                "Radon (Rn)",
                "Xenon (Xe)"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "Nitrogen (N2)",
            "Oxygen (O2)",
            "Helium (He)",
            "Hydrogen (H2)",
            "Carbon dioxide (CO2)",
            "Nitrous oxide (N2O)",
            "Dinitrogen monoxide (N2O)",
            "Oxygen difluoride (OF2)",
            "Oxygen trifluoride (OF3)",
            "Oxygen pentafluoride (OF5)",
            "Dioxygen difluoride (O2F2)",
            "Nitrogen trifluoride (NF3)",
            "Nitrogen pentafluoride (NF5)",
            "Nitrogen hexafluoride (NF6)",
            "Phosphorus difluoride (PF2)",
            "Phosphorus trifluoride (PF3)",
            "Phosphorus pentafluoride (PF5)",
            "Arsenic trifluoride (AsF3)",
            "Arsenic pentafluoride (AsF5)",
            "Antimony trifluoride (SbF3)",
            "Antimony pentafluoride (SbF5)",
            "Boron trifluoride (BF3)",
            "Boron pentafluoride (BF5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nitrogen (N2)",
                "Oxygen (O2)",
                "Helium (He)",
                "Hydrogen (H2)",
                "Carbon dioxide (CO2)",
                "Nitrous oxide (N2O)",
                "Dinitrogen monoxide (N2O)",
                "Oxygen difluoride (OF2)",
                "Oxygen trifluoride (OF3)",
                "Oxygen pentafluoride (OF5)",
                "Dioxygen difluoride (O2F2)",
                "Nitrogen trifluoride (NF3)",
                "Nitrogen pentafluoride (NF5)",
                "Nitrogen hexafluoride (NF6)",
                "Phosphorus difluoride (PF2)",
                "Phosphorus trifluoride (PF3)",
                "Phosphorus pentafluoride (PF5)",
                "Arsenic trifluoride (AsF3)",
                "Arsenic pentafluoride (AsF5)",
                "Antimony trifluoride (SbF3)",
                "Antimony pentafluoride (SbF5)",
                "Boron trifluoride (BF3)",
                "Boron pentafluoride (BF5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Antimony pentafluoride (SbF5)",
                "Antimony trifluoride (SbF3)",
                "Arsenic pentafluoride (AsF5)",
                "Arsenic trifluoride (AsF3)",
                "Boron pentafluoride (BF5)",
                "Boron trifluoride (BF3)",
                "Carbon dioxide (CO2)",
                "Dinitrogen monoxide (N2O)",
                "Dioxygen difluoride (O2F2)",
                "Helium (He)",
                "Hydrogen (H2)",
                "Nitrogen (N2)",
                "Nitrogen hexafluoride (NF6)",
                "Nitrogen pentafluoride (NF5)",
                "Nitrogen trifluoride (NF3)",
                "Nitrous oxide (N2O)",
                "Oxygen (O2)",
                "Oxygen difluoride (OF2)",
                "Oxygen pentafluoride (OF5)",
                "Oxygen trifluoride (OF3)",
                "Phosphorus difluoride (PF2)",
                "Phosphorus pentafluoride (PF5)",
                "Phosphorus trifluoride (PF3)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            "H2O (Water)",
            "CH4 (Methane)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "H2O (Water)",
                "CH4 (Methane)"
            ],
            "mismatches": [],
            "true_referents": [
                "CH4 (Methane)",
                "H2O (Water)"
            ],
            "TP": 2,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "diatomic gas at room temperature",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "water",
                "Canonical name": "H2O"
            },
            {
                "Referent": "carbon dioxide",
                "Canonical name": "CO2"
            },
            {
                "Referent": "hydrogen",
                "Canonical name": "H2"
            },
            {
                "Referent": "nitrogen",
                "Canonical name": "N2"
            },
            {
                "Referent": "oxygen",
                "Canonical name": "O2"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water",
                "carbon dioxide",
                "hydrogen",
                "nitrogen",
                "oxygen"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"carbon dioxide\", \"Canonical name\": \"CO2\"}",
                "{\"Referent\": \"hydrogen\", \"Canonical name\": \"H2\"}",
                "{\"Referent\": \"nitrogen\", \"Canonical name\": \"N2\"}",
                "{\"Referent\": \"oxygen\", \"Canonical name\": \"O2\"}",
                "{\"Referent\": \"water\", \"Canonical name\": \"H2O\"}"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 0
        }
    }
]