[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Nitrous acid (HNO2)",
            "Benzene (C6H6)",
            "Formaldehyde (CH2O)",
            "Acrylamide",
            "Ultraviolet (UV) radiation",
            "Radiation (gamma rays)",
            "Methyl methanesulfonate (MMS)",
            "Ethyl methanesulfonate (EMS)",
            "4-Nitroquinoline 1-oxide (4-NQO)",
            "N-Methyl-N-nitrosourea (MNU)",
            "Benzo[a]pyrene",
            "Vinyl chloride",
            "Aflatoxin B1",
            "Arsenic trioxide (As2O3)",
            "Nickel compounds",
            "Cadmium compounds",
            "Bis(chloromethyl)ether",
            "Sulfur mustard (Mustard gas)",
            "Tobacco smoke",
            "Asbestos",
            "Polychlorinated biphenyls (PCBs)",
            "Nitrosamines",
            "Diethylstilbestrol (DES)",
            "Ionizing radiation"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrylamide",
                "Methyl methanesulfonate (MMS)",
                "Ethyl methanesulfonate (EMS)",
                "N-Methyl-N-nitrosourea (MNU)",
                "Bis(chloromethyl)ether"
            ],
            "mismatches": [
                "Nitrous acid (HNO2)",
                "Benzene (C6H6)",
                "Formaldehyde (CH2O)",
                "Ultraviolet (UV) radiation",
                "Radiation (gamma rays)",
                "Benzo[a]pyrene",
                "Vinyl chloride",
                "Aflatoxin B1",
                "Arsenic trioxide (As2O3)",
                "Nickel compounds",
                "Cadmium compounds",
                "Sulfur mustard (Mustard gas)",
                "Tobacco smoke",
                "Asbestos",
                "Polychlorinated biphenyls (PCBs)",
                "Nitrosamines",
                "Diethylstilbestrol (DES)",
                "Ionizing radiation"
            ],
            "true_referents": [
                "(E)-4-nitrostilbene",
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "2-aminoacrylic acid",
                "3,6-diamino-10-methylacridinium chloride.HCl",
                "4-methylquinoline",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "chloromethane",
                "cisplatin",
                "dibenz[a,h]anthracene",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfoxide",
                "ethyl methanesulfonate",
                "iron(III) nitrilotriacetate",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "ozone",
                "phosphoramide mustard",
                "styrene"
            ],
            "TP": 5,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV)",
            "Ionizing Radiation (IR)",
            "Benzene (C6H6)",
            "Formaldehyde (HCHO)",
            "Aflatoxins (AF)",
            "Nitrosamines (NOC)",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Vinyl Chloride (VCM)",
            "Ethylene Oxide (EtO)",
            "X-rays",
            "Gamma Rays",
            "Alpha Particles",
            "Beta Particles",
            "Cyclophosphamide (CY)",
            "Mustard Gas (HD)",
            "Acridine Dyes",
            "5-Bromouracil (5BU)",
            "2-Acetylaminofluorene (2-AAF)",
            "Cisplatin (CDDP)",
            "Arsenic (As)",
            "Tobacco Smoke",
            "Alkylating Agents",
            "N-Nitroso Compounds (NOCs)",
            "Chloramphenicol (CAP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "5-bromouracil",
                "cisplatin",
                "cyclophosphamide",
                "alkylating agent"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "Ionizing Radiation (IR)",
                "Benzene (C6H6)",
                "Formaldehyde (HCHO)",
                "Aflatoxins (AF)",
                "Nitrosamines (NOC)",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Vinyl Chloride (VCM)",
                "Ethylene Oxide (EtO)",
                "X-rays",
                "Gamma Rays",
                "Alpha Particles",
                "Beta Particles",
                "Mustard Gas (HD)",
                "Acridine Dyes",
                "2-Acetylaminofluorene (2-AAF)",
                "Arsenic (As)",
                "Tobacco Smoke",
                "N-Nitroso Compounds (NOCs)",
                "Chloramphenicol (CAP)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1,2:7,8-diepoxyoctane",
                "1-methylphenanthrene",
                "2-acetamidofluorene",
                "2-nitrofluorene",
                "4-hydroxycyclophosphamide",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "ICR-170",
                "N-acetoxy-2-acetamidofluorene",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide hydrochloride",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "alkylating agent",
                "anthramycin",
                "benzo[e]pyrene",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "bromoethane",
                "carboplatin",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethylmyleran",
                "ethidium",
                "ethidium bromide",
                "iodoacetic acid",
                "methoxyacetic acid",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "UV radiation (UVR)",
            "X-rays",
            "Gamma rays",
            "Alpha particles",
            "Beta particles",
            "Ethyl methanesulfonate (EMS)",
            "N-Ethyl-N-nitrosourea (ENU)",
            "Nitrosoguanidine (NTG)",
            "Methyl methanesulfonate (MMS)",
            "Acridine dyes",
            "Benzo[a]pyrene",
            "Dimethyl sulfate",
            "Hydroxylamine",
            "5-Bromouracil",
            "2-Aminopurine",
            "Mitomycin C",
            "Chlorambucil",
            "Cyclophosphamide",
            "Nitrosodiethylamine",
            "Nitrosodimethylamine",
            "Dibenz[a,h]anthracene",
            "Ionizing radiation",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Diepoxybutane"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl methanesulfonate",
                "N-Ethyl-N-nitrosourea",
                "Methyl methanesulfonate",
                "Dimethyl sulfate",
                "5-Bromouracil",
                "Mitomycin C",
                "Chlorambucil",
                "Cyclophosphamide",
                "Nitrosodiethylamine",
                "Nitrosodimethylamine",
                "Dibenz[a,h]anthracene",
                "N-Methyl-N'-nitro-N-nitrosoguanidine",
                "Diepoxybutane"
            ],
            "mismatches": [
                "UV radiation (UVR)",
                "X-rays",
                "Gamma rays",
                "Alpha particles",
                "Beta particles",
                "Nitrosoguanidine (NTG)",
                "Acridine dyes",
                "Benzo[a]pyrene",
                "Hydroxylamine",
                "2-Aminopurine",
                "Ionizing radiation"
            ],
            "true_referents": [
                "(R,R)-diepoxybutane",
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "anthramycin",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "chlorambucil",
                "cisplatin",
                "cyclophosphamide",
                "dibenz[a,h]anthracene",
                "dibenzo[a,l]pyrene",
                "diepoxybutane",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethidium",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "meso-diepoxybutane",
                "methyl methanesulfonate",
                "mitomycin B",
                "mitomycin C",
                "ozone"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV)",
            "Ethyl Methanesulfonate (EMS)",
            "Nitrous Acid (HNO2)",
            "Benzo[a]pyrene (BaP)",
            "Aflatoxin B1 (AFB1)",
            "2-Amino-1-methyl-6-phenylimidazo[4,5-b]quinoline (PhIP)",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Acridine Orange (AO)",
            "Ethidium Bromide (EtBr)",
            "Potassium Dichromate (K2Cr2O7)",
            "Methyl Mercury (CH3Hg+)",
            "Vinyl Chloride (VC)",
            "Diazomethane (CH2N2)",
            "Cyclobutane Pyrimidine Dimers (CPDs)",
            "6-4 Photoproducts (6-4PPs)",
            "Ionizing Radiation (e.g., X-rays, Gamma rays)",
            "Hydrogen Peroxide (H2O2)",
            "Sodium Azide (NaN3)",
            "Mustard Gas (HN2C(CH2Cl)2)",
            "Tris(2-chloroethyl)phosphine (TCEP)",
            "Procarbazine (PCZ)",
            "Temozolomide (TMZ)",
            "Busulfan (BSF)",
            "Mitomycin C (MMC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl Methanesulfonate (EMS)",
                "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
                "Ethidium Bromide (EtBr)",
                "Diazomethane (CH2N2)",
                "Sodium Azide (NaN3)",
                "Temozolomide (TMZ)",
                "Busulfan (BSF)",
                "Mitomycin C (MMC)"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "Nitrous Acid (HNO2)",
                "Benzo[a]pyrene (BaP)",
                "Aflatoxin B1 (AFB1)",
                "2-Amino-1-methyl-6-phenylimidazo[4,5-b]quinoline (PhIP)",
                "Acridine Orange (AO)",
                "Potassium Dichromate (K2Cr2O7)",
                "Methyl Mercury (CH3Hg+)",
                "Vinyl Chloride (VC)",
                "Cyclobutane Pyrimidine Dimers (CPDs)",
                "6-4 Photoproducts (6-4PPs)",
                "Ionizing Radiation (e.g., X-rays, Gamma rays)",
                "Hydrogen Peroxide (H2O2)",
                "Mustard Gas (HN2C(CH2Cl)2)",
                "Tris(2-chloroethyl)phosphine (TCEP)",
                "Procarbazine (PCZ)"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(R*,R*)-diepoxybutane",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2-dimethylhydrazine",
                "1,2:7,8-diepoxyoctane",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2,5-bis(2-hydroxyethylamino)-3,6-diaziridinylbenzoquinone",
                "3,N(4)-ethenocytosine",
                "4-Ketocyclophosphamide",
                "4-hydroxycyclophosphamide",
                "4-methylquinoline",
                "9-aminoacridine",
                "Carboxyphosphamide",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide hydrochloride",
                "S-(chloromethyl)glutathione",
                "acridine half-mustard",
                "acrylamide",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "busulfan",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "dacarbazine",
                "diazomethane",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "hexane-2,5-diyl bis(methanesulfonate)",
                "methyl methanesulfonate",
                "mitomycin A",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "perfosfamide",
                "sodium azide",
                "temozolomide",
                "trimethyl phosphite"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV)",
            "X-rays",
            "Gamma Rays",
            "Benzene",
            "Formaldehyde",
            "Asbestos",
            "Arsenic",
            "Ethidium Bromide (EtBr)",
            "Aflatoxin B1",
            "Benzo[a]pyrene",
            "Nitrosamines",
            "Mustard Gas",
            "Vinyl Chloride",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Ethylene Oxide",
            "Chlorambucil",
            "Cyclophosphamide",
            "Mitomycin C",
            "Cisplatin",
            "5-Bromouracil",
            "2-Aminopurine",
            "Proflavine",
            "Hydrogen Peroxide (H2O2)",
            "Acridine Orange"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium Bromide",
                "Chlorambucil",
                "Cyclophosphamide",
                "Mitomycin C",
                "Cisplatin",
                "5-Bromouracil"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "X-rays",
                "Gamma Rays",
                "Benzene",
                "Formaldehyde",
                "Asbestos",
                "Arsenic",
                "Aflatoxin B1",
                "Benzo[a]pyrene",
                "Nitrosamines",
                "Mustard Gas",
                "Vinyl Chloride",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Ethylene Oxide",
                "2-Aminopurine",
                "Proflavine",
                "Hydrogen Peroxide (H2O2)",
                "Acridine Orange"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Porfiromycine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "anthramycin",
                "apholate",
                "aziridine",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "carboplatin",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfoxide",
                "ethidium",
                "ethidium bromide",
                "iodoacetic acid",
                "methoxyacetic acid",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "quinacrine mustard",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Aflatoxin B1 (AFB1)",
            "Benzene (C6H6)",
            "Cyclophosphamide (CP)",
            "Ethyl methanesulfonate (EMS)",
            "Formaldehyde (HCHO)",
            "Gamma radiation (\u03b3-radiation)",
            "Hydrazine (N2H4)",
            "Ionizing radiation (IR)",
            "Methyl methanesulfonate (MMS)",
            "Nitrosamines (NAs)",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Radiation from radioactive isotopes (e.g., Cesium-137)",
            "Sodium azide (NaN3)",
            "Sulfur mustard (Mustard gas)",
            "Tobacco smoke (TS)",
            "Ultraviolet light (UV)",
            "Vinyl chloride (C2H3Cl)",
            "Acrylamide (ACR)",
            "Benzo[a]pyrene (BaP)",
            "2-Amino-3-methylimidazo[4,5-f]quinoline (IQ)",
            "4-Nitroquinoline 1-oxide (4NQO)",
            "Dimethyl sulfate (DMS)",
            "Ethylene oxide (EO)",
            "Nitrogen mustard (NM)",
            "Tetracycline (TC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cyclophosphamide (CP)",
                "Ethyl methanesulfonate (EMS)",
                "Methyl methanesulfonate (MMS)",
                "Sodium azide (NaN3)",
                "Dimethyl sulfate (DMS)",
                "Nitrogen mustard (NM)",
                "Acrylamide (ACR)"
            ],
            "mismatches": [
                "Aflatoxin B1 (AFB1)",
                "Benzene (C6H6)",
                "Formaldehyde (HCHO)",
                "Gamma radiation (\u03b3-radiation)",
                "Hydrazine (N2H4)",
                "Ionizing radiation (IR)",
                "Nitrosamines (NAs)",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Radiation from radioactive isotopes (e.g., Cesium-137)",
                "Sulfur mustard (Mustard gas)",
                "Tobacco smoke (TS)",
                "Ultraviolet light (UV)",
                "Vinyl chloride (C2H3Cl)",
                "Benzo[a]pyrene (BaP)",
                "2-Amino-3-methylimidazo[4,5-f]quinoline (IQ)",
                "4-Nitroquinoline 1-oxide (4NQO)",
                "Ethylene oxide (EO)",
                "Tetracycline (TC)"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2-dimethylhydrazine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2,5-bis(2-hydroxyethylamino)-3,6-diaziridinylbenzoquinone",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "4-methylquinoline",
                "4-nitrophenylhydrazine",
                "ICR-170",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide",
                "Nitrogen mustard N-oxide hydrochloride",
                "acetaldehyde",
                "acrylamide",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bromoethane",
                "chloromethane",
                "cyclophosphamide",
                "diazomethane",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methoxyacetic acid",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "ozone",
                "phosphoramide mustard",
                "sodium azide"
            ],
            "TP": 7,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV)",
            "Ionizing Radiation (IR)",
            "Tobacco Smoke",
            "Alcohol",
            "Processed Meats",
            "Aflatoxin B1 (AFB1)",
            "Polychlorinated Biphenyls (PCBs)",
            "Vinyl Chloride",
            "Formaldehyde",
            "Benzene",
            "Arsenic",
            "Lead",
            "Mercury",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Acrylamide",
            "Ethidium Bromide",
            "5-Bromouracil",
            "2-Aminopurine",
            "Nitrous Acid",
            "Hydroxylamine",
            "Alkylating Agents",
            "Base Analogs",
            "Intercalating Agents",
            "Transposons"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrylamide",
                "Ethidium Bromide",
                "5-Bromouracil",
                "Alkylating Agents"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "Ionizing Radiation (IR)",
                "Tobacco Smoke",
                "Alcohol",
                "Processed Meats",
                "Aflatoxin B1 (AFB1)",
                "Polychlorinated Biphenyls (PCBs)",
                "Vinyl Chloride",
                "Formaldehyde",
                "Benzene",
                "Arsenic",
                "Lead",
                "Mercury",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "2-Aminopurine",
                "Nitrous Acid",
                "Hydroxylamine",
                "Base Analogs",
                "Intercalating Agents",
                "Transposons"
            ],
            "true_referents": [
                "(S,S)-diepoxybutane",
                "(Z)-tomaymycin",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "Bandrowski's base",
                "ICR-170",
                "N(6)-hydroxyadenine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "alkylating agent",
                "anthramycin",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "caffeine",
                "chloromethane",
                "dibenzo[a,l]pyrene",
                "dimethylmyleran",
                "ethidium",
                "ethidium bromide",
                "ifosfamide",
                "intercalator",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "ozone",
                "propidium",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Benzene (C6H6)",
            "UV radiation",
            "Tobacco smoke",
            "Formaldehyde (HCHO)",
            "Asbestos",
            "Nitrosamines",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Aflatoxins",
            "Bisphenol A (BPA)",
            "Acrylamide (C3H5NO)",
            "Vinyl chloride (C2H3Cl)",
            "Lead (Pb)",
            "Arsenic (As)",
            "Mercury (Hg)",
            "Cadmium (Cd)",
            "Ethylene oxide (C2H4O)",
            "Dioxins",
            "X-rays",
            "Gamma rays",
            "Mustard gas (C4H8Cl2S)",
            "Chlorine gas (Cl2)",
            "Sulfur mustard (C4H8Cl2S)",
            "Hydrogen cyanide (HCN)",
            "Ethylene dibromide (C2H4Br2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrylamide (C3H5NO)",
                "Sulfur mustard (C4H8Cl2S)",
                "Ethylene dibromide (C2H4Br2)"
            ],
            "mismatches": [
                "Benzene (C6H6)",
                "UV radiation",
                "Tobacco smoke",
                "Formaldehyde (HCHO)",
                "Asbestos",
                "Nitrosamines",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Aflatoxins",
                "Bisphenol A (BPA)",
                "Vinyl chloride (C2H3Cl)",
                "Lead (Pb)",
                "Arsenic (As)",
                "Mercury (Hg)",
                "Cadmium (Cd)",
                "Ethylene oxide (C2H4O)",
                "Dioxins",
                "X-rays",
                "Gamma rays",
                "Mustard gas (C4H8Cl2S)",
                "Chlorine gas (Cl2)",
                "Hydrogen cyanide (HCN)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1,1-dichloroethene",
                "1,2-dibromoethane",
                "1,2-dichloroethane",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "3,4-epoxy-1-cyclohexene",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "anthramycin",
                "benz[a]anthracene 5,6-oxide",
                "benzo[e]pyrene",
                "bis(2-chloroethyl) sulfide",
                "bromoethane",
                "chloromethane",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfoxide",
                "ethidium bromide",
                "iodoacetic acid",
                "methoxyacetic acid",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "ozone",
                "phosphoramide mustard",
                "styrene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet radiation (UV)",
            "Ionizing radiation",
            "X-rays",
            "Gamma rays",
            "Benzene",
            "Asbestos",
            "Ethidium bromide",
            "Aflatoxin B1 (AFB1)",
            "2-Aminofluorene (2-AF)",
            "Nitrosamines",
            "Cisplatin",
            "N-Methyl-N\u2019-nitro-N-nitrosoguanidine (MNNG)",
            "Acrylamide",
            "Sodium azide",
            "Mitomycin C",
            "Trp-P-1",
            "Ethyl nitrosourea (ENU)",
            "Formaldehyde",
            "Vinyl chloride",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Acridine Orange",
            "Ethyl methanesulfonate (EMS)",
            "Bleomycin",
            "4-Nitroquinoline 1-oxide (4-NQO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium bromide",
                "Cisplatin",
                "N-Methyl-N\u2019-nitro-N-nitrosoguanidine (MNNG)",
                "Acrylamide",
                "Sodium azide",
                "Mitomycin C",
                "Ethyl methanesulfonate (EMS)"
            ],
            "mismatches": [
                "Ultraviolet radiation (UV)",
                "Ionizing radiation",
                "X-rays",
                "Gamma rays",
                "Benzene",
                "Asbestos",
                "Aflatoxin B1 (AFB1)",
                "2-Aminofluorene (2-AF)",
                "Nitrosamines",
                "Trp-P-1",
                "Ethyl nitrosourea (ENU)",
                "Formaldehyde",
                "Vinyl chloride",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Acridine Orange",
                "Bleomycin",
                "4-Nitroquinoline 1-oxide (4-NQO)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methylphenanthrene",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "2-acetamidofluorene",
                "2-aminoacrylic acid",
                "2-nitrofluorene",
                "3,N(4)-ethenocytosine",
                "4-methylquinoline",
                "9-aminoacridine",
                "N-acetoxy-2-acetamidofluorene",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "caffeine",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "mitomycin",
                "mitomycin B",
                "mitomycin C",
                "myosmine",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phleomycin",
                "sodium azide",
                "sodium fluoride",
                "sodium hydrogensulfite",
                "styrene"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet radiation (UV)",
            "X-rays",
            "Gamma rays",
            "Ethidium bromide (EtBr)",
            "Nitrous acid (HNO2)",
            "Ethyl methanesulfonate (EMS)",
            "N-methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Benzo[a]pyrene",
            "Aflatoxin B1",
            "Acridine orange",
            "5-Bromouracil",
            "2-Aminopurine",
            "Hydroxylamine",
            "Sodium azide",
            "Mitomycin C",
            "Cisplatin",
            "Cyclophosphamide",
            "Formaldehyde",
            "Mustard gas",
            "Vinyl chloride",
            "Benzene",
            "Radon",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "N-Nitroso compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium bromide",
                "Ethyl methanesulfonate",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "5-Bromouracil",
                "Sodium azide",
                "Mitomycin C",
                "Cisplatin",
                "Cyclophosphamide"
            ],
            "mismatches": [
                "Ultraviolet radiation (UV)",
                "X-rays",
                "Gamma rays",
                "Nitrous acid (HNO2)",
                "Benzo[a]pyrene",
                "Aflatoxin B1",
                "Acridine orange",
                "2-Aminopurine",
                "Hydroxylamine",
                "Formaldehyde",
                "Mustard gas",
                "Vinyl chloride",
                "Benzene",
                "Radon",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "N-Nitroso compounds"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N(6)-hydroxyadenine",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "quinacrine mustard",
                "sodium azide",
                "sodium fluoride",
                "sodium hydrogensulfite",
                "styrene"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethidium bromide (EtBr)",
            "Methylnitronitrosoguanidine (MNNG)",
            "Ethyl methanesulfonate (EMS)",
            "Nitrous acid (HNO2)",
            "Hydroxylamine (HA)",
            "2-Aminopurine (2-AP)",
            "Benzene",
            "Aflatoxins",
            "Benzo[a]pyrene",
            "Dimethyl sulfate (DMS)",
            "Diethyl sulfate (DES)",
            "Acridine orange (AO)",
            "Proflavine",
            "Nitrogen mustards",
            "Vinyl chloride",
            "Formaldehyde",
            "Aristolochic acids",
            "Ultraviolet (UV) radiation",
            "Ionizing radiation (X-rays, gamma rays)",
            "Asbestos",
            "Chromium (VI) compounds",
            "Nickel compounds",
            "Cadmium compounds",
            "Arsenic compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium bromide",
                "Ethyl methanesulfonate",
                "Dimethyl sulfate",
                "Diethyl sulfate",
                "Benzo[a]pyrene",
                "Nitrogen mustard"
            ],
            "mismatches": [
                "Methylnitronitrosoguanidine (MNNG)",
                "Nitrous acid (HNO2)",
                "Hydroxylamine (HA)",
                "2-Aminopurine (2-AP)",
                "Benzene",
                "Aflatoxins",
                "Acridine orange (AO)",
                "Proflavine",
                "Vinyl chloride",
                "Formaldehyde",
                "Aristolochic acids",
                "Ultraviolet (UV) radiation",
                "Ionizing radiation (X-rays, gamma rays)",
                "Asbestos",
                "Chromium (VI) compounds",
                "Nickel compounds",
                "Cadmium compounds",
                "Arsenic compounds"
            ],
            "true_referents": [
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "3,6-diamino-10-methylacridinium chloride.HCl",
                "9-aminoacridine",
                "N(6)-hydroxyadenine",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide",
                "Nitrogen mustard N-oxide hydrochloride",
                "Porfiromycine",
                "acetaldehyde",
                "acridine half-mustard",
                "acrylamide",
                "anthramycin",
                "apholate",
                "aristolochic acid A",
                "aristolochic acid B",
                "aristolochic acid C",
                "aziridine",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "chloromethane",
                "dibenz[a,h]anthracene",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "iron(III) nitrilotriacetate",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "ozone",
                "p-N,N-bis(2-chloroethyl)aminophenylvaleric acid",
                "styrene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet radiation (UV)",
            "Ionizing radiation (e.g. X-rays, gamma rays)",
            "Reactive oxygen species (ROS)",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Nitrosamines",
            "Aflatoxins",
            "Benzene",
            "Vinyl chloride",
            "Ethylene oxide",
            "Mustard gas (Bis(2-chloroethyl) sulfide)",
            "Cisplatin",
            "Ethidium bromide",
            "Acridine dyes",
            "Nitrous acid",
            "Alkylating agents (e.g. Ethyl methanesulfonate, Methyl methanesulfonate)",
            "Intercalating agents (e.g. Ethidium bromide, Acridine orange)",
            "Benzo[a]pyrene",
            "N-Nitrosodiethylamine",
            "Formaldehyde",
            "Asbestos fibers",
            "Tobacco smoke",
            "Grilled meat (containing heterocyclic amines)",
            "Radon gas",
            "Certain viruses (e.g. Human papillomavirus, Hepatitis B virus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "bis(2-chloroethyl) sulfide",
                "cisplatin",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "N-nitrosodiethylamine"
            ],
            "mismatches": [
                "Ultraviolet radiation (UV)",
                "Ionizing radiation (e.g. X-rays, gamma rays)",
                "Reactive oxygen species (ROS)",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Nitrosamines",
                "Aflatoxins",
                "Benzene",
                "Vinyl chloride",
                "Ethylene oxide",
                "Mustard gas (Bis(2-chloroethyl) sulfide)",
                "Acridine dyes",
                "Nitrous acid",
                "Alkylating agents (e.g. Ethyl methanesulfonate, Methyl methanesulfonate)",
                "Intercalating agents (e.g. Ethidium bromide, Acridine orange)",
                "Benzo[a]pyrene",
                "Formaldehyde",
                "Asbestos fibers",
                "Tobacco smoke",
                "Grilled meat (containing heterocyclic amines)",
                "Radon gas",
                "Certain viruses (e.g. Human papillomavirus, Hepatitis B virus)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "8-oxo-dGTP",
                "9-aminoacridine",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "S-(chloromethyl)glutathione",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "alkylating agent",
                "anthramycin",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(2-chloroethyl) sulfide",
                "bis(chloromethyl) ether",
                "bromoethane",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "methoxyacetic acid",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "styrene",
                "urethane"
            ],
            "TP": 5,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV)",
            "Ionizing Radiation (IR)",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Nitrosamines",
            "Aflatoxins",
            "Alkylating Agents",
            "Intercalating Agents",
            "Topoisomerase Inhibitors",
            "Reactive Oxygen Species (ROS)",
            "Cigarette Smoke",
            "Asbestos",
            "Benzene",
            "Vinyl Chloride",
            "Ethylene Oxide",
            "Formaldehyde",
            "Acrylamide",
            "Bisphenol A (BPA)",
            "Phthalates",
            "Pesticides",
            "Heavy Metals (e.g., Lead, Mercury, Cadmium)",
            "Heterocyclic Amines (HCAs)",
            "Polychlorinated Biphenyls (PCBs)",
            "Dioxins",
            "Furans"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acrylamide",
                "alkylating agent",
                "intercalator"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "Ionizing Radiation (IR)",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Nitrosamines",
                "Aflatoxins",
                "Intercalating Agents",
                "Topoisomerase Inhibitors",
                "Reactive Oxygen Species (ROS)",
                "Cigarette Smoke",
                "Asbestos",
                "Benzene",
                "Vinyl Chloride",
                "Ethylene Oxide",
                "Formaldehyde",
                "Bisphenol A (BPA)",
                "Phthalates",
                "Pesticides",
                "Heavy Metals (e.g., Lead, Mercury, Cadmium)",
                "Heterocyclic Amines (HCAs)",
                "Polychlorinated Biphenyls (PCBs)",
                "Dioxins",
                "Furans"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "3,6-diamino-10-methylacridinium chloride.HCl",
                "3,6-diaminoacridine",
                "3-acetyl-2,5-dimethylfuran",
                "8-oxo-dGTP",
                "ICR-170",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "S-(chloromethyl)glutathione",
                "acetaldehyde",
                "acrylamide",
                "alkylating agent",
                "anthramycin",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "chloromethane",
                "dibenzo[a,l]pyrene",
                "dimethylmyleran",
                "ifosfamide",
                "intercalator",
                "methoxyacetic acid",
                "mitomycin A",
                "nitrosocarbaryl",
                "ozone",
                "styrene",
                "urethane"
            ],
            "TP": 3,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Alkylating agents (e.g. mustard gas, cyclophosphamide) (1)",
            "Aromatic amines (e.g. 2-naphthylamine, 4-aminobiphenyl) (2)",
            "Benzo(a)pyrene (3)",
            "Bis(chloromethyl)ether (4)",
            "Cisplatin (5)",
            "Ethylene oxide (6)",
            "Ethyl methanesulfonate (EMS) (7)",
            "Furan (8)",
            "Ionizing radiation (e.g. X-rays, gamma rays) (9)",
            "Methyl methanesulfonate (MMS) (10)",
            "N-nitroso compounds (e.g. N-nitrosodiethylamine, N-nitrosodimethylamine) (11)",
            "Nitrosamines (e.g. N-nitrosodimethylamine) (12)",
            "Polycyclic aromatic hydrocarbons (PAHs) (e.g. benzo(a)pyrene) (13)",
            "Propylene oxide (14)",
            "Sulfur mustard (15)",
            "Tetrachloroethylene (16)",
            "Ultraviolet (UV) light (17)",
            "Vinyl chloride (18)",
            "X-rays (19)",
            "\u03b3-Radiation (20)",
            "Acrylamide (21)",
            "Asbestos (22)",
            "Cadmium (23)",
            "Cycasin (24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alkylating agents (e.g. mustard gas, cyclophosphamide) (1)",
                "Benzo(a)pyrene (3)",
                "Bis(chloromethyl)ether (4)",
                "Cisplatin (5)",
                "Ethyl methanesulfonate (EMS) (7)",
                "Methyl methanesulfonate (MMS) (10)",
                "N-nitroso compounds (e.g. N-nitrosodiethylamine, N-nitrosodimethylamine) (11)",
                "Nitrosamines (e.g. N-nitrosodimethylamine) (12)",
                "Polycyclic aromatic hydrocarbons (PAHs) (e.g. benzo(a)pyrene) (13)",
                "Acrylamide (21)"
            ],
            "mismatches": [
                "Aromatic amines (e.g. 2-naphthylamine, 4-aminobiphenyl) (2)",
                "Ethylene oxide (6)",
                "Furan (8)",
                "Ionizing radiation (e.g. X-rays, gamma rays) (9)",
                "Propylene oxide (14)",
                "Sulfur mustard (15)",
                "Tetrachloroethylene (16)",
                "Ultraviolet (UV) light (17)",
                "Vinyl chloride (18)",
                "X-rays (19)",
                "\u03b3-Radiation (20)",
                "Asbestos (22)",
                "Cadmium (23)",
                "Cycasin (24)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "(S,S)-diepoxybutane",
                "(Z)-tomaymycin",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2:7,8-diepoxyoctane",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "3-acetyl-2,5-dimethylfuran",
                "BMH-21",
                "MeIQx",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acrylamide",
                "alkylating agent",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfoxide",
                "ethyl methanesulfonate",
                "hesperidin",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "styrene"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet light (UV)",
            "Ionizing radiation",
            "X-rays",
            "Gamma rays",
            "Radon (Rn)",
            "Benzene (C6H6)",
            "Aflatoxin B1 (AFB1)",
            "Cisplatin (CDDP)",
            "Mitomycin C (MMC)",
            "Bleomycin (BLM)",
            "Actinomycin D (ACTD)",
            "Vinyl chloride (C2H3Cl)",
            "Arsenic (As)",
            "Cadmium (Cd)",
            "Chromium VI (CrVI)",
            "Nickel (Ni)",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Aristolochic acid (AA)",
            "Cyclophosphamide (CP)",
            "Busulfan (BU)",
            "Mustard gas (bis(2-chloroethyl) sulfide)",
            "Ethidium bromide (EtBr)",
            "Methyl methanesulfonate (MMS)",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin",
                "Mitomycin C",
                "Actinomycin D",
                "Cyclophosphamide",
                "Busulfan",
                "bis(2-chloroethyl) sulfide",
                "Ethidium bromide",
                "Methyl methanesulfonate",
                "N-methyl-N'-nitro-N-nitrosoguanidine"
            ],
            "mismatches": [
                "Ultraviolet light (UV)",
                "Ionizing radiation",
                "X-rays",
                "Gamma rays",
                "Radon (Rn)",
                "Benzene (C6H6)",
                "Aflatoxin B1 (AFB1)",
                "Bleomycin (BLM)",
                "Vinyl chloride (C2H3Cl)",
                "Arsenic (As)",
                "Cadmium (Cd)",
                "Chromium VI (CrVI)",
                "Nickel (Ni)",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Aristolochic acid (AA)"
            ],
            "true_referents": [
                "(R*,R*)-diepoxybutane",
                "(S)-colchicine",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1-methylphenanthrene",
                "3,6-diamino-10-methylacridinium chloride.HCl",
                "3,6-diaminoacridine",
                "4-hydroxycyclophosphamide",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "acrylamide",
                "actinomycin D",
                "anthramycin",
                "aristolochic acid A",
                "aristolochic acid B",
                "aristolochic acid C",
                "benzo[e]pyrene",
                "bis(2-chloroethyl) sulfide",
                "bromoethane",
                "busulfan",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "iron(III) nitrilotriacetate",
                "lomustine",
                "methyl methanesulfonate",
                "mitomycin",
                "mitomycin A",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "oxaliplatin",
                "ozone",
                "phleomycin",
                "phosphoramide mustard",
                "steffimycin D",
                "styrene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Aflatoxin",
                "Canonical Name": "(AFL)"
            },
            {
                "Referent": "Alkylating agents",
                "Canonical Name": "(ALKYL)"
            },
            {
                "Referent": "Benzene",
                "Canonical Name": "(BEN)"
            },
            {
                "Referent": "Benzo[a]pyrene",
                "Canonical Name": "(BP)"
            },
            {
                "Referent": "Ethyl methanesulfonate",
                "Canonical Name": "(EMS)"
            },
            {
                "Referent": "Ethyl nitrosourea",
                "Canonical Name": "(ENU)"
            },
            {
                "Referent": "Methyl methanesulfonate",
                "Canonical Name": "(MMS)"
            },
            {
                "Referent": "N-Methyl-N-nitro-N-nitrosoguanidine",
                "Canonical Name": "(MNNG)"
            },
            {
                "Referent": "Nitrosamines",
                "Canonical Name": "(NITR)"
            },
            {
                "Referent": "Nitrosoureas",
                "Canonical Name": "(NITU)"
            },
            {
                "Referent": "Procarbazine",
                "Canonical Name": "(PRO)"
            },
            {
                "Referent": "Radiation",
                "Canonical Name": "(RAD)"
            },
            {
                "Referent": "Uracil",
                "Canonical Name": "(URA)"
            },
            {
                "Referent": "X-rays",
                "Canonical Name": "(XRAY)"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "alkylating agent",
                "ethyl methanesulfonate",
                "methyl methanesulfonate",
                "N-methyl-N'-nitro-N-nitrosoguanidine"
            ],
            "mismatches": [
                "Aflatoxin",
                "Benzene",
                "Benzo[a]pyrene",
                "Ethyl nitrosourea",
                "Nitrosamines",
                "Nitrosoureas",
                "Procarbazine",
                "Radiation",
                "Uracil",
                "X-rays"
            ],
            "true_referents": [
                "(E)-4-nitrostilbene",
                "(E)-dacarbazine",
                "(E)-tomaymycin",
                "(S)-colchicine",
                "1-methylphenanthrene",
                "5-[bis(2-chloroethyl)amino]uracil",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "acrylamide",
                "alkylating agent",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "dacarbazine",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethyl methanesulfonate",
                "hexane-2,5-diyl bis(methanesulfonate)",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "ozone"
            ],
            "TP": 4,
            "FP": 10,
            "FN": 0
        }
    }
]