[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "N-nitroso compounds (NOCs)",
            "Benzene",
            "Arsenic",
            "Formaldehyde",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Vinyl chloride",
            "Ethylene oxide",
            "Bis(chloromethyl) ether",
            "Methyl methanesulfonate (MMS)",
            "Ethylnitrosourea (ENU)",
            "Acrylamide",
            "Ultraviolet radiation (UV)",
            "Ionizing radiation",
            "Cigarette smoke",
            "Aflatoxins",
            "Chlorambucil",
            "Busulfan",
            "Cyclophosphamide",
            "Mitomycin C",
            "Nitrous oxide",
            "Bromate",
            "Tamoxifen",
            "Chlorine",
            "Mercury compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bis(chloromethyl) ether",
                "Methyl methanesulfonate (MMS)",
                "Acrylamide",
                "Chlorambucil",
                "Busulfan",
                "Cyclophosphamide",
                "Mitomycin C"
            ],
            "mismatches": [
                "N-nitroso compounds (NOCs)",
                "Benzene",
                "Arsenic",
                "Formaldehyde",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Vinyl chloride",
                "Ethylene oxide",
                "Ethylnitrosourea (ENU)",
                "Ultraviolet radiation (UV)",
                "Ionizing radiation",
                "Cigarette smoke",
                "Aflatoxins",
                "Nitrous oxide",
                "Bromate",
                "Tamoxifen",
                "Chlorine",
                "Mercury compounds"
            ],
            "true_referents": [
                "1,2-dichloroethane",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "5-bromouridine",
                "BMH-21",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "anthramycin",
                "benzo[e]pyrene",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "bromoethane",
                "busulfan",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "ethyl methanesulfonate",
                "lomustine",
                "methoxyacetic acid",
                "methyl methanesulfonate",
                "mitomycin B",
                "mitomycin C",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "sodium fluoride",
                "styrene",
                "temozolomide"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet radiation (UV)",
            "Ionizing radiation (IR)",
            "Benzene",
            "Aflatoxin B1",
            "N-nitrosodimethylamine (NDMA)",
            "Ethyl methanesulfonate (EMS)",
            "Methyl methanesulfonate (MMS)",
            "Diethyl sulfate",
            "Acridine orange",
            "Nitrous acid",
            "2-Acetylaminofluorene (2-AAF)",
            "Mustard gas (Sulfur mustard)",
            "Vinyl chloride",
            "Ethylene oxide",
            "Radon",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Formaldehyde",
            "Tobacco smoke",
            "Arsenic",
            "Alkylating agents",
            "X-rays",
            "Gamma rays",
            "Alpha particles",
            "Beta particles",
            "Base analogs"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "N-nitrosodimethylamine",
                "ethyl methanesulfonate",
                "methyl methanesulfonate",
                "diethyl sulfate",
                "alkylating agent"
            ],
            "mismatches": [
                "Ultraviolet radiation (UV)",
                "Ionizing radiation (IR)",
                "Benzene",
                "Aflatoxin B1",
                "Acridine orange",
                "Nitrous acid",
                "2-Acetylaminofluorene (2-AAF)",
                "Mustard gas (Sulfur mustard)",
                "Vinyl chloride",
                "Ethylene oxide",
                "Radon",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Formaldehyde",
                "Tobacco smoke",
                "Arsenic",
                "X-rays",
                "Gamma rays",
                "Alpha particles",
                "Beta particles",
                "Base analogs"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "2-acetamidofluorene",
                "2-nitrofluorene",
                "9-aminoacridine",
                "ICR-170",
                "N-acetoxy-2-acetamidofluorene",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "alkylating agent",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "chloromethane",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "dimethylmyleran",
                "ethidium",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methoxyacetic acid",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "ozone",
                "phosphoramide mustard",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 5,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethyl methanesulfonate (EMS)",
            "N-methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "N-ethyl-N-nitrosourea (ENU)",
            "2-Aminopurine",
            "Acridine Orange",
            "Benzo[a]pyrene",
            "Cyclophosphamide",
            "Diepoxybutane (DEB)",
            "Dimethyl sulfate (DMS)",
            "Ethyl nitrosourea (ENU)",
            "Hydroxylamine",
            "Ionizing radiation",
            "Methyl methanesulfonate (MMS)",
            "Mitomycin C",
            "Nitrosoguanidine",
            "Nitrosomethylurea",
            "Nitrosomethylurethane",
            "Procarbazine",
            "Tritium",
            "Urethane",
            "Ultraviolet light",
            "X-rays",
            "5-Bromodeoxyuridine (BrdU)",
            "Vinblastine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-ethyl-N-nitrosourea",
                "Cyclophosphamide",
                "Dimethyl sulfate",
                "Methyl methanesulfonate",
                "Mitomycin C",
                "Urethane"
            ],
            "mismatches": [
                "Ethyl methanesulfonate (EMS)",
                "2-Aminopurine",
                "Acridine Orange",
                "Benzo[a]pyrene",
                "Diepoxybutane (DEB)",
                "Ethyl nitrosourea (ENU)",
                "Hydroxylamine",
                "Ionizing radiation",
                "Nitrosoguanidine",
                "Nitrosomethylurea",
                "Nitrosomethylurethane",
                "Procarbazine",
                "Tritium",
                "Ultraviolet light",
                "X-rays",
                "5-Bromodeoxyuridine (BrdU)",
                "Vinblastine"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(R,R)-diepoxybutane",
                "(S)-colchicine",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "3-bromopyruvic acid",
                "4-hydroxycyclophosphamide",
                "5-bromouracil",
                "5-bromouridine",
                "9-aminoacridine",
                "Methyl violet 2B",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bromoethane",
                "carboplatin",
                "chlorambucil",
                "cisplatin",
                "cyclophosphamide",
                "dacarbazine",
                "dibenzo[a,l]pyrene",
                "diepoxybutane",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "meso-diepoxybutane",
                "methyl methanesulfonate",
                "mitomycin B",
                "mitomycin C",
                "oxaliplatin",
                "ozone",
                "styrene",
                "urethane"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV) ",
            "Ionizing Radiation (IR)",
            "Ethyl Methanesulfonate (EMS)",
            "Nitrosoguanidine (NTG)",
            "Benzyne (Bz)",
            "Acridine Orange (AO)",
            "Ethidium Bromide (EtBr)",
            "2-Amino-7-methylisoxanthopterin (AMIX)",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Methyl Methanesulfonate (MMS)",
            "Diepoxybutane (DEB)",
            "Azobenzene (AzB)",
            "Procarbazine (PCB)",
            "Cyclophosphamide (CTX)",
            "Busulfan (BSF)",
            "Temozolomide (TMZ)",
            "Melphalan (MLP)",
            "Chlorambucil (CLB)",
            "Mustard Gas (HN)",
            "Nitrogen Mustard (NM)",
            "Aphidicolin (APH)",
            "Colchicine (COL)",
            "Puromycin (PUR)",
            "Streptonigrin (STR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl Methanesulfonate",
                "Ethidium Bromide",
                "N-Methyl-N'-nitro-N-nitrosoguanidine",
                "Methyl Methanesulfonate",
                "Diepoxybutane",
                "Cyclophosphamide",
                "Busulfan",
                "Temozolomide",
                "Melphalan",
                "Chlorambucil",
                "Nitrogen Mustard"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV)",
                "Ionizing Radiation (IR)",
                "Nitrosoguanidine (NTG)",
                "Benzyne (Bz)",
                "Acridine Orange (AO)",
                "2-Amino-7-methylisoxanthopterin (AMIX)",
                "Azobenzene (AzB)",
                "Procarbazine (PCB)",
                "Mustard Gas (HN)",
                "Aphidicolin (APH)",
                "Colchicine (COL)",
                "Puromycin (PUR)",
                "Streptonigrin (STR)"
            ],
            "true_referents": [
                "(E)-4-nitrostilbene",
                "(E)-dacarbazine",
                "(R,R)-diepoxybutane",
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "4-hydroxycyclophosphamide",
                "5-[bis(2-chloroethyl)amino]uracil",
                "9-aminoacridine",
                "ICR-170",
                "MeIQx",
                "N-hydroxy-MeIQx",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "Nitrogen mustard N-oxide",
                "Nitrogen mustard N-oxide hydrochloride",
                "Porfiromycin",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "anthramycin",
                "apholate",
                "aristolochic acid B",
                "benz[a]anthracene 5,6-oxide",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bromoethane",
                "busulfan",
                "carboplatin",
                "chlorambucil",
                "cisplatin",
                "cyclophosphamide",
                "dacarbazine",
                "diazomethane",
                "dibenzo[a,l]pyrene",
                "diepoxybutane",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "melphalan",
                "meso-diepoxybutane",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "perfosfamide",
                "saintopin",
                "sodium azide",
                "temozolomide",
                "tilimycin",
                "tretamine"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethidium Bromide (EtBr)",
            "Acridine Orange",
            "Benzo[a]pyrene",
            "Aflatoxin B1",
            "Nitrosamines",
            "Ultraviolet Radiation (UV)",
            "X-rays",
            "Gamma Rays",
            "Mustard Gas",
            "Cisplatin",
            "Vinyl Chloride",
            "Formaldehyde",
            "Benzene",
            "Arsenic",
            "Chromium(VI)",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Alkylating Agents",
            "Epoxides",
            "Hydrazine",
            "Proflavine",
            "Thalidomide",
            "5-Bromouracil",
            "2-Aminopurine",
            "Mitomycin C"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium Bromide (EtBr)",
                "Cisplatin",
                "5-Bromouracil",
                "Mitomycin C"
            ],
            "mismatches": [
                "Acridine Orange",
                "Benzo[a]pyrene",
                "Aflatoxin B1",
                "Nitrosamines",
                "Ultraviolet Radiation (UV)",
                "X-rays",
                "Gamma Rays",
                "Mustard Gas",
                "Vinyl Chloride",
                "Formaldehyde",
                "Benzene",
                "Arsenic",
                "Chromium(VI)",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Alkylating Agents",
                "Epoxides",
                "Hydrazine",
                "Proflavine",
                "Thalidomide",
                "2-Aminopurine"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(S)-colchicine",
                "1,2-dimethylhydrazine",
                "1-methylphenanthrene",
                "11,12-epoxy-3-methylcholanthrene",
                "1H-imidazo[2,1-i]purine",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "3,4-epoxy-1-cyclohexene",
                "4-nitrophenylhydrazine",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Porfiromycine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "alkylating agent",
                "apholate",
                "aziridine",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethylmyleran",
                "ethidium",
                "ethidium bromide",
                "iodoacetic acid",
                "melphalan",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "quinacrine mustard",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Aflatoxin B1 (AFB1)",
            "Benzene (C6H6)",
            "Cyclophosphamide (CP)",
            "Ethyl methanesulfonate (EMS)",
            "Nitrosamines (NAs)",
            "Radiation (Ionizing radiation)",
            "Vinyl chloride (VC)",
            "Arsenic (As)",
            "Benzo[a]pyrene (BaP)",
            "Formaldehyde (CH2O)",
            "Hydrazine (N2H4)",
            "Dimethyl sulfate (DMS)",
            "Mercury (Hg)",
            "Tobacco smoke (TS)",
            "2-Amino-3-methylimidazo[4,5-f]quinoline (IQ)",
            "Acrylamide (AA)",
            "Methyl methanesulfonate (MMS)",
            "Phenol (C6H5OH)",
            "Cisplatin (CDDP)",
            "Doxorubicin (DOX)",
            "Sodium azide (NaN3)",
            "Alkylating agents (general)",
            "Hydroxyurea (HU)",
            "Dinitrosopiperazine (DNOP)",
            "Tetracycline (TC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cyclophosphamide (CP)",
                "Ethyl methanesulfonate (EMS)",
                "Dimethyl sulfate (DMS)",
                "Acrylamide (AA)",
                "Methyl methanesulfonate (MMS)",
                "Cisplatin (CDDP)",
                "Sodium azide (NaN3)"
            ],
            "mismatches": [
                "Aflatoxin B1 (AFB1)",
                "Benzene (C6H6)",
                "Nitrosamines (NAs)",
                "Radiation (Ionizing radiation)",
                "Vinyl chloride (VC)",
                "Arsenic (As)",
                "Benzo[a]pyrene (BaP)",
                "Formaldehyde (CH2O)",
                "Hydrazine (N2H4)",
                "Mercury (Hg)",
                "Tobacco smoke (TS)",
                "2-Amino-3-methylimidazo[4,5-f]quinoline (IQ)",
                "Phenol (C6H5OH)",
                "Doxorubicin (DOX)",
                "Alkylating agents (general)",
                "Hydroxyurea (HU)",
                "Dinitrosopiperazine (DNOP)",
                "Tetracycline (TC)"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(S)-colchicine",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2-dimethylhydrazine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2,5-bis(2-hydroxyethylamino)-3,6-diaziridinylbenzoquinone",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "4-methylquinoline",
                "4-nitrophenylhydrazine",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-hydroxy-MeIQx",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "N-nitrosopiperidine",
                "acetaldehyde",
                "acrylamide",
                "alkylating agent",
                "anthramycin",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "diazomethane",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "dimethylmyleran",
                "ethyl methanesulfonate",
                "methyl methanesulfonate",
                "mitomycin",
                "oxaliplatin",
                "ozone",
                "sodium azide"
            ],
            "TP": 7,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ultraviolet Radiation (UV Radiation)",
            "X-rays",
            "Gamma Rays",
            "Alpha Particles",
            "Ethyl Methanesulfonate (EMS)",
            "Benzo[a]pyrene",
            "Aflatoxin B1",
            "Tobacco Smoke",
            "Alkylating Antineoplastic Drugs",
            "Mustard Gas",
            "Vinyl Chloride",
            "Formaldehyde",
            "Radon",
            "Polychlorinated Biphenyls (PCBs)",
            "Dioxin",
            "Arsenic",
            "Diesel Exhaust",
            "Alcohol",
            "Processed Meats",
            "Acrylamide",
            "Polycyclic Aromatic Hydrocarbons (PAHs)",
            "Human Papillomavirus (HPV)",
            "Hepatitis B Virus (HBV)",
            "Hepatitis C Virus (HCV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl Methanesulfonate (EMS)",
                "Acrylamide"
            ],
            "mismatches": [
                "Ultraviolet Radiation (UV Radiation)",
                "X-rays",
                "Gamma Rays",
                "Alpha Particles",
                "Benzo[a]pyrene",
                "Aflatoxin B1",
                "Tobacco Smoke",
                "Alkylating Antineoplastic Drugs",
                "Mustard Gas",
                "Vinyl Chloride",
                "Formaldehyde",
                "Radon",
                "Polychlorinated Biphenyls (PCBs)",
                "Dioxin",
                "Arsenic",
                "Diesel Exhaust",
                "Alcohol",
                "Processed Meats",
                "Polycyclic Aromatic Hydrocarbons (PAHs)",
                "Human Papillomavirus (HPV)",
                "Hepatitis B Virus (HBV)",
                "Hepatitis C Virus (HCV)"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "BMH-21",
                "acetaldehyde",
                "acrylamide",
                "alkylating agent",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "caffeine",
                "carboplatin",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "ozone",
                "phosphoramide mustard",
                "quinacrine mustard",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 2,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "mutagen",
        "domain": "medicine",
        "response": "",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethyl methanesulfonate (EMS)",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Benzopyrene",
            "Aflatoxin B1",
            "Nitrosamines",
            "Formaldehyde",
            "Sodium azide",
            "Mitomycin C",
            "Cyclophosphamide",
            "Dimethyl sulfate",
            "Ultraviolet (UV) radiation",
            "X-rays",
            "Gamma rays",
            "Hydrazine",
            "Vinyl chloride",
            "Acrylamide",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Base analogs (e.g., 5-bromouracil)",
            "Colchicine",
            "DMSO (Dimethyl sulfoxide)",
            "Azathioprine",
            "Bleomycin",
            "Trichloroethylene",
            "Lead compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl methanesulfonate (EMS)",
                "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
                "Sodium azide",
                "Mitomycin C",
                "Cyclophosphamide",
                "Dimethyl sulfate",
                "Acrylamide",
                "Base analogs (e.g., 5-bromouracil)",
                "DMSO (Dimethyl sulfoxide)"
            ],
            "mismatches": [
                "Benzopyrene",
                "Aflatoxin B1",
                "Nitrosamines",
                "Formaldehyde",
                "Ultraviolet (UV) radiation",
                "X-rays",
                "Gamma rays",
                "Hydrazine",
                "Vinyl chloride",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Colchicine",
                "Azathioprine",
                "Bleomycin",
                "Trichloroethylene",
                "Lead compounds"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(S)-colchicine",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2-dimethylhydrazine",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "4-nitrophenylhydrazine",
                "5-bromouracil",
                "5-bromouridine",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(chloromethyl) ether",
                "bromoethane",
                "busulfan",
                "chlorambucil",
                "chloromethane",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "mitomycin",
                "mitomycin B",
                "mitomycin C",
                "nitrosocarbaryl",
                "ozone",
                "phleomycin",
                "sodium azide",
                "sodium fluoride",
                "sodium hydrogensulfite",
                "styrene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethyl methanesulfonate (EMS)",
            "N-ethyl-N-nitrosourea (ENU)",
            "Acridine orange",
            "Nitrous acid",
            "Hydroxylamine",
            "Sodium azide",
            "Methyl methanesulfonate (MMS)",
            "5-Bromouracil",
            "2-Aminopurine",
            "Ethidium bromide",
            "Proflavine",
            "Mitomycin C",
            "Bleomycin",
            "N-methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "4-Nitroquinoline 1-oxide (4NQO)",
            "Benzo[a]pyrene",
            "Aflatoxin B1",
            "Ultraviolet (UV) radiation",
            "X-rays",
            "Gamma rays",
            "Hydrogen peroxide",
            "Formaldehyde",
            "Cisplatin",
            "Cyclophosphamide"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl methanesulfonate",
                "N-ethyl-N-nitrosourea",
                "Sodium azide",
                "Methyl methanesulfonate",
                "5-Bromouracil",
                "Ethidium bromide",
                "Mitomycin C",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "Benzo[a]pyrene",
                "Cisplatin",
                "Cyclophosphamide"
            ],
            "mismatches": [
                "Acridine orange",
                "Nitrous acid",
                "Hydroxylamine",
                "2-Aminopurine",
                "Proflavine",
                "Bleomycin",
                "4-Nitroquinoline 1-oxide",
                "Aflatoxin B1",
                "Ultraviolet (UV) radiation",
                "X-rays",
                "Gamma rays",
                "Hydrogen peroxide",
                "Formaldehyde"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "4-methylquinoline",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "Porfiromycine",
                "acetaldehyde",
                "acridine half-mustard",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "apholate",
                "aziridine",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bromoethane",
                "carboplatin",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfoxide",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "mitomycin",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phleomycin",
                "sodium azide",
                "sodium fluoride",
                "sodium hydrogensulfite"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethidium bromide (EtBr)",
            "Methyl methanesulfonate (MMS)",
            "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
            "Ethyl methanesulfonate (EMS)",
            "Hydroxylamine (HA)",
            "2-Aminopurine (2-AP)",
            "5-Bromouracil (5-BU)",
            "Nitrous acid (HNO2)",
            "Sodium azide (NaN3)",
            "Acridine orange (AO)",
            "Proflavine",
            "Benzopyrene",
            "Aflatoxin B1",
            "2-Acetylaminofluorene (2-AAF)",
            "4-Nitroquinoline 1-oxide (4-NQO)",
            "Nitrogen mustard (HN2)",
            "Cyclophosphamide",
            "Mitomycin C (MMC)",
            "Cisplatin",
            "Ultraviolet (UV) radiation",
            "X-rays",
            "Gamma rays",
            "Alpha particles",
            "Beta particles"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethidium bromide (EtBr)",
                "Methyl methanesulfonate (MMS)",
                "N-Methyl-N'-nitro-N-nitrosoguanidine (MNNG)",
                "Ethyl methanesulfonate (EMS)",
                "5-Bromouracil (5-BU)",
                "Sodium azide (NaN3)",
                "Nitrogen mustard (HN2)",
                "Cyclophosphamide",
                "Mitomycin C (MMC)",
                "Cisplatin"
            ],
            "mismatches": [
                "Hydroxylamine (HA)",
                "2-Aminopurine (2-AP)",
                "Nitrous acid (HNO2)",
                "Acridine orange (AO)",
                "Proflavine",
                "Benzopyrene",
                "Aflatoxin B1",
                "2-Acetylaminofluorene (2-AAF)",
                "4-Nitroquinoline 1-oxide (4-NQO)",
                "Ultraviolet (UV) radiation",
                "X-rays",
                "Gamma rays",
                "Alpha particles",
                "Beta particles"
            ],
            "true_referents": [
                "(S)-colchicine",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "1H-imidazo[2,1-i]purine",
                "2,5-bis(aziridin-1-yl)-1,4-benzoquinone",
                "2-acetamidofluorene",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "2-nitrofluorene",
                "4-hydroxycyclophosphamide",
                "4-methylquinoline",
                "5-bromouracil",
                "5-bromouridine",
                "5-formyluracil",
                "9-aminoacridine",
                "N(6)-hydroxyadenine",
                "N-acetoxy-2-acetamidofluorene",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide",
                "Nitrogen mustard N-oxide hydrochloride",
                "Porfiromycine",
                "acridine half-mustard",
                "acrylamide",
                "apholate",
                "aziridine",
                "benz[a]anthracene 5,6-oxide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bromoethane",
                "carboplatin",
                "cisplatin",
                "cyclophosphamide",
                "diazomethane",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "mitomycin A",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "sodium azide"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ionizing radiation (e.g. X-rays, gamma rays) (IR)",
            "Ultraviolet radiation (UV)",
            "Mutagenic chemicals (e.g. ethidium bromide, nitrosamines) (MC)",
            "Reactive oxygen species (ROS)",
            "Alkylating agents (e.g. methyl methanesulfonate, ethyl methanesulfonate) (AA)",
            "Intercalating agents (e.g. acridine dyes, ethidium bromide) (IA)",
            "Base analogs (e.g. 5-bromouracil, 2-aminopurine) (BA)",
            "Deaminating agents (e.g. nitrous acid, hydroxylamine) (DA)",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Aflatoxins (AF)",
            "Benzopyrene (BP)",
            "Mustard gas (MG)",
            "Formaldehyde (FA)",
            "Vinyl chloride (VC)",
            "Asbestos fibers (AF)",
            "Tobacco smoke (TS)",
            "Radon gas (RG)",
            "Viruses (e.g. human papillomavirus) (V)",
            "Certain bacteria (e.g. Helicobacter pylori) (B)",
            "Certain fungi (e.g. Aspergillus flavus) (F)",
            "Certain plants (e.g. Aristolochia species) (P)",
            "Certain heavy metals (e.g. cadmium, chromium) (HM)",
            "Certain pesticides (e.g. captan, maneb) (PE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethidium bromide",
                "methyl methanesulfonate",
                "5-bromouracil"
            ],
            "mismatches": [
                "Ionizing radiation (e.g. X-rays, gamma rays) (IR)",
                "Ultraviolet radiation (UV)",
                "Reactive oxygen species (ROS)",
                "Intercalating agents (e.g. acridine dyes, ethidium bromide) (IA)",
                "Base analogs (e.g. 5-bromouracil, 2-aminopurine) (BA)",
                "Deaminating agents (e.g. nitrous acid, hydroxylamine) (DA)",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Aflatoxins (AF)",
                "Benzopyrene (BP)",
                "Mustard gas (MG)",
                "Formaldehyde (FA)",
                "Vinyl chloride (VC)",
                "Asbestos fibers (AF)",
                "Tobacco smoke (TS)",
                "Radon gas (RG)",
                "Viruses (e.g. human papillomavirus) (V)",
                "Certain bacteria (e.g. Helicobacter pylori) (B)",
                "Certain fungi (e.g. Aspergillus flavus) (F)",
                "Certain plants (e.g. Aristolochia species) (P)",
                "Certain heavy metals (e.g. cadmium, chromium) (HM)",
                "Certain pesticides (e.g. captan, maneb) (PE)"
            ],
            "true_referents": [
                "(R*,R*)-diepoxybutane",
                "(R,R)-diepoxybutane",
                "(S)-colchicine",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1-methylphenanthrene",
                "3,6-diamino-10-methylacridinium chloride.HCl",
                "5-bromouracil",
                "5-bromouridine",
                "5-fluorouridine",
                "8-oxo-dGTP",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide",
                "S-(chloromethyl)glutathione",
                "acetaldehyde",
                "acridine half-mustard dihydrochloride",
                "acrylamide",
                "acrylonitrile",
                "alkylating agent",
                "anthramycin",
                "aristolochic acid A",
                "aristolochic acid B",
                "aristolochic acid C",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bromoethane",
                "chloromethane",
                "dibenzo[a,l]pyrene",
                "ethidium bromide",
                "iodoacetic acid",
                "methyl methanesulfonate",
                "nitrogen mustard",
                "ozone",
                "phosphoramide mustard",
                "sodium fluoride",
                "styrene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Ethyl methanesulfonate (EMS)",
            "Nitrosoguanidine (NG)",
            "Ultraviolet radiation (UV)",
            "Benzo[a]pyrene (BaP)",
            "Aflatoxin B1 (AFB1)",
            "Methylnitrosourea (MNU)",
            "Dimethyl sulfate (DMS)",
            "Ethylnitrosourea (ENU)",
            "Hydroxylamine (HA)",
            "N-Ethyl-N-nitrosourea (ENU)",
            "Methyl methanesulfonate (MMS)",
            "Diethyl sulfate (DES)",
            "Ethyl nitrosourea (ENU)",
            "Procarbazine (PCB)",
            "Cisplatin (CDDP)",
            "Mitomycin C (MMC)",
            "Cyclophosphamide (CPA)",
            "Diepoxybutane (DEB)",
            "Methyl iodide (CH3I)",
            "Ethyl iodide (C2H5I)",
            "Propyl iodide (C3H7I)",
            "Butyl iodide (C4H9I)",
            "Benzidine (BZD)",
            "Hydrazine (N2H4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyl methanesulfonate (EMS)",
                "Dimethyl sulfate (DMS)",
                "N-Ethyl-N-nitrosourea (ENU)",
                "Methyl methanesulfonate (MMS)",
                "Diethyl sulfate (DES)",
                "Cisplatin (CDDP)",
                "Mitomycin C (MMC)",
                "Cyclophosphamide (CPA)",
                "Diepoxybutane (DEB)"
            ],
            "mismatches": [
                "Nitrosoguanidine (NG)",
                "Ultraviolet radiation (UV)",
                "Benzo[a]pyrene (BaP)",
                "Aflatoxin B1 (AFB1)",
                "Methylnitrosourea (MNU)",
                "Ethylnitrosourea (ENU)",
                "Hydroxylamine (HA)",
                "Ethyl nitrosourea (ENU)",
                "Procarbazine (PCB)",
                "Methyl iodide (CH3I)",
                "Ethyl iodide (C2H5I)",
                "Propyl iodide (C3H7I)",
                "Butyl iodide (C4H9I)",
                "Benzidine (BZD)",
                "Hydrazine (N2H4)"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(R,R)-diepoxybutane",
                "1,2-dibromoethane",
                "1,2-dimethylhydrazine",
                "1,2:7,8-diepoxyoctane",
                "1-methylphenanthrene",
                "2-amino-6-hydroxyaminopurine",
                "2-aminoacrylic acid",
                "4-nitrophenylhydrazine",
                "N(6)-hydroxyadenine",
                "N-ethyl-N-nitrosourea",
                "N-methyl-N'-nitro-N-nitrosoguanidine",
                "N-methyl-N-nitrosourea",
                "acrylamide",
                "benzo[a]pyrene diol epoxide I",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bromoethane",
                "buta-1,3-diene",
                "carboplatin",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "cyclophosphamide hydrate",
                "dacarbazine",
                "dibenzo[a,l]pyrene",
                "diepoxybutane",
                "diethyl sulfate",
                "dimethyl sulfate",
                "dimethyl sulfoxide",
                "ethidium bromide",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "meso-diepoxybutane",
                "methyl methanesulfonate",
                "mitomycin A",
                "mitomycin C",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Alkylating agent (ALK)",
            "Benzene",
            "Bis(chloromethyl)ether",
            "Cyclophosphamide",
            "Ethylene oxide",
            "Ionizing radiation",
            "Methyl methanesulfonate",
            "Mitomycin C",
            "Nitrogen mustard",
            "Nitrosamines",
            "Plutonium",
            "Radon",
            "Sulfur mustard",
            "Tetrahydrofuran",
            "Thiotepa",
            "Uranium",
            "Vinblastine",
            "Vincristine",
            "X-rays",
            "Amsacrine",
            "Busulfan",
            "Cisplatin",
            "Dacarbazine",
            "Etoposide",
            "Melphalan",
            "Mitoxantrone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "alkylating agent",
                "cyclophosphamide",
                "methyl methanesulfonate",
                "mitomycin C",
                "nitrogen mustard",
                "busulfan",
                "cisplatin",
                "dacarbazine"
            ],
            "mismatches": [
                "alkylating agent (ALK)",
                "benzene",
                "bis(chloromethyl)ether",
                "ethylene oxide",
                "ionizing radiation",
                "nitrosamines",
                "plutonium",
                "radon",
                "sulfur mustard",
                "tetrahydrofuran",
                "thiotepa",
                "uranium",
                "vinblastine",
                "vincristine",
                "x-rays",
                "amsacrine",
                "etoposide",
                "melphalan",
                "mitoxantrone"
            ],
            "true_referents": [
                "(E)-dacarbazine",
                "(S)-colchicine",
                "1-methylphenanthrene",
                "3,6-diaminoacridine",
                "3,6-diaminoacridine dihydrochloride",
                "3,6-diaminoacridine monohydrochloride",
                "3-acetyl-2,5-dimethylfuran",
                "4-hydroxycyclophosphamide",
                "Alcophosphamide",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "Nitrogen mustard N-oxide",
                "Nitrogen mustard N-oxide hydrochloride",
                "PhIP",
                "alkylating agent",
                "anthramycin",
                "apholate",
                "benzo[b]fluoranthene",
                "benzo[e]pyrene",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "bromoethane",
                "busulfan",
                "carboplatin",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dacarbazine",
                "dibenzo[a,l]pyrene",
                "dimethyl sulfoxide",
                "ethidium",
                "ethyl methanesulfonate",
                "iodoacetic acid",
                "lomustine",
                "methoxyacetic acid",
                "methyl methanesulfonate",
                "mitomycin",
                "mitomycin B",
                "mitomycin C",
                "nitrogen mustard",
                "nitrosocarbaryl",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "saintopin",
                "temozolomide"
            ],
            "TP": 8,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            "Acrylamide (ACR)",
            "Vinyl chloride (VCM)",
            "Ultraviolet radiation (UVR)",
            "X-rays",
            "Mitomycin C (MMC)",
            "Benzo[a]pyrene (BaP)",
            "1,3-Butadiene (BD)",
            "Ethylene oxide (ETO)",
            "Chloroprene (CPL)",
            "Acetaldehyde (AA)",
            "Acrylonitrile (ACN)",
            "2,3,7,8-Tetrachlorodibenzo-p-dioxin (TCDD)",
            "Tamoxifen (TAM)",
            "Cyclophosphamide (CP)",
            "Busulfan (BUS)",
            "Cisplatin (CDDP)",
            "Mitoxantrone (MIT)",
            "Chlorine gas (Cl2)",
            "Mustard gas (Bis(2-chloroethyl) sulfide)",
            "Polycyclic aromatic hydrocarbons (PAHs)",
            "Aflatoxin B1 (AFB1)",
            "Dimethylnitrosamine (DMN)",
            "Diethylnitrosamine (DEN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrylamide (ACR)",
                "Mitomycin C (MMC)",
                "Acetaldehyde (AA)",
                "Acrylonitrile (ACN)",
                "Cyclophosphamide (CP)",
                "Busulfan (BUS)",
                "Cisplatin (CDDP)",
                "Mustard gas (Bis(2-chloroethyl) sulfide)",
                "Dimethylnitrosamine (DMN)",
                "Diethylnitrosamine (DEN)"
            ],
            "mismatches": [
                "Vinyl chloride (VCM)",
                "Ultraviolet radiation (UVR)",
                "X-rays",
                "Benzo[a]pyrene (BaP)",
                "1,3-Butadiene (BD)",
                "Ethylene oxide (ETO)",
                "Chloroprene (CPL)",
                "2,3,7,8-Tetrachlorodibenzo-p-dioxin (TCDD)",
                "Tamoxifen (TAM)",
                "Mitoxantrone (MIT)",
                "Chlorine gas (Cl2)",
                "Polycyclic aromatic hydrocarbons (PAHs)",
                "Aflatoxin B1 (AFB1)"
            ],
            "true_referents": [
                "(R*,R*)-diepoxybutane",
                "(S)-colchicine",
                "(S,S)-diepoxybutane",
                "1,1-dichloroethene",
                "1,2-dichloroethane",
                "1,2:7,8-diepoxyoctane",
                "1-methoxy-N-nitrosodiethylamine",
                "1-methylphenanthrene",
                "2-aminoacrylic acid",
                "4-hydroxycyclophosphamide",
                "N-nitrosodiethylamine",
                "N-nitrosodimethylamine",
                "acetaldehyde",
                "acrylamide",
                "acrylonitrile",
                "benzo[a]pyrene diol epoxide I",
                "benzo[e]pyrene",
                "bis(2-chloroethyl) sulfide",
                "bis(2-chloroethyl)amine",
                "bis(chloromethyl) ether",
                "bromoethane",
                "busulfan",
                "buta-1,3-diene",
                "carboplatin",
                "chlorambucil",
                "chloromethane",
                "cisplatin",
                "cyclophosphamide",
                "dibenzo[a,l]pyrene",
                "dimethylmyleran",
                "ethidium bromide",
                "iodoacetic acid",
                "lomustine",
                "mitomycin",
                "mitomycin A",
                "mitomycin C",
                "nitrogen mustard",
                "oxaliplatin",
                "ozone",
                "phosphoramide mustard",
                "styrene",
                "temozolomide"
            ],
            "TP": 10,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "mutagen",
        "domain": "medicine",
        "response": [
            [
                "Canonical name",
                "Aflatoxin"
            ],
            [
                "Canonical name",
                "Benzene"
            ],
            [
                "Canonical name",
                "Formaldehyde"
            ],
            [
                "Canonical name",
                "Radon"
            ],
            [
                "Canonical name",
                "Cigarette smoke"
            ],
            [
                "Canonical name",
                "Uranium"
            ],
            [
                "Canonical name",
                "Lead"
            ],
            [
                "Canonical name",
                "Arsenic"
            ],
            [
                "Canonical name",
                "Mercury"
            ],
            [
                "Canonical name",
                "Polonium"
            ],
            [
                "Canonical name",
                "Cadmium"
            ],
            [
                "Canonical name",
                "Nickel"
            ],
            [
                "Canonical name",
                "Cobalt"
            ],
            [
                "Canonical name",
                "Nickel"
            ],
            [
                "Canonical name",
                "Thallium"
            ],
            [
                "Canonical name",
                "Beryllium"
            ],
            [
                "Canonical name",
                "Radium"
            ],
            [
                "Canonical name",
                "Aluminum"
            ],
            [
                "Canonical name",
                "Chromium"
            ],
            [
                "Canonical name",
                "Selenium"
            ],
            [
                "Canonical name",
                "Antimony"
            ],
            [
                "Canonical name",
                "Bismuth"
            ],
            [
                "Canonical name",
                "Tungsten"
            ],
            [
                "Canonical name",
                "Vanadium"
            ],
            [
                "Canonical name",
                "Zinc"
            ],
            [
                "Canonical name",
                "Beryllium"
            ],
            [
                "Canonical name",
                "Molybdenum"
            ],
            [
                "Canonical name",
                "Beryllium"
            ],
            [
                "Canonical name",
                "Beryllium"
            ]
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Aflatoxin",
                "Benzene",
                "Formaldehyde",
                "Radon",
                "Cigarette smoke",
                "Uranium",
                "Lead",
                "Arsenic",
                "Mercury",
                "Polonium",
                "Cadmium",
                "Nickel",
                "Cobalt",
                "Thallium",
                "Beryllium",
                "Radium",
                "Aluminum",
                "Chromium",
                "Selenium",
                "Antimony",
                "Bismuth",
                "Tungsten",
                "Vanadium",
                "Zinc",
                "Molybdenum"
            ],
            "true_referents": [
                "(2E)-dec-2-enal",
                "(E)-tomaymycin",
                "(S)-colchicine",
                "(S,S)-diepoxybutane",
                "1-methylphenanthrene",
                "6,12-dimethyldibenzo[d,d']benzo[1,2-b:4,5-b']bisthiophene",
                "Methyl violet 2B",
                "S-2-chloroethylcysteine",
                "acetaldehyde",
                "acrylamide",
                "benz[a]anthracene 5,6-oxide",
                "benzo[e]pyrene",
                "chloromethane",
                "cisplatin",
                "dibenzo[a,l]pyrene",
                "ethidium",
                "methoxyacetic acid",
                "ozone",
                "propidium"
            ],
            "TP": 0,
            "FP": 25,
            "FN": 0
        }
    }
]