[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Aristolochic acid",
            "Mercury (Hg)",
            "Lead (Pb)",
            "Tobacco-specific nitrosamines (TSNA)",
            "Cadmium (Cd)",
            "Amphotericin B",
            "Iodinated contrast agents",
            "Vancomycin",
            "Sulindac",
            "Cimetidine",
            "Nephrotoxic antibiotics (e.g., aminoglycosides)",
            "Amphotericin B lipid complex",
            "Tacrolimus",
            "Cyclosporine",
            "Doxorubicin",
            "Ifosfamide",
            "Mitomycin C",
            "Nephrotoxic NSAIDs (e.g., ketorolac)",
            "Acyclovir",
            "Tacrolimus",
            "Contrast-induced nephropathy agents",
            "Sunitinib",
            "Vincristine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Aristolochic acid",
                "Mercury (Hg)",
                "Lead (Pb)",
                "Tobacco-specific nitrosamines (TSNA)",
                "Cadmium (Cd)",
                "Amphotericin B",
                "Iodinated contrast agents",
                "Vancomycin",
                "Sulindac",
                "Cimetidine",
                "Nephrotoxic antibiotics (e.g., aminoglycosides)",
                "Amphotericin B lipid complex",
                "Tacrolimus",
                "Cyclosporine",
                "Doxorubicin",
                "Ifosfamide",
                "Mitomycin C",
                "Nephrotoxic NSAIDs (e.g., ketorolac)",
                "Acyclovir",
                "Contrast-induced nephropathy agents",
                "Sunitinib",
                "Vincristine"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir",
                "Amphotericin B",
                "Amphotericin B lipid complex",
                "Aristolochic acid",
                "Cadmium (Cd)",
                "Cimetidine",
                "Cisplatin (CDDP)",
                "Contrast-induced nephropathy agents",
                "Cyclosporine",
                "Doxorubicin",
                "Ifosfamide",
                "Iodinated contrast agents",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Mitomycin C",
                "Nephrotoxic NSAIDs (e.g., ketorolac)",
                "Nephrotoxic antibiotics (e.g., aminoglycosides)",
                "Sulindac",
                "Sunitinib",
                "Tacrolimus",
                "Tobacco-specific nitrosamines (TSNA)",
                "Vancomycin",
                "Vincristine"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Paracetamol)",
            "Cisplatin",
            "Aminoglycosides (e.g., Gentamicin)",
            "Nonsteroidal Anti-inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
            "Contrast Media (e.g., Iodinated Contrast)",
            "Cyclosporine",
            "Amphotericin B",
            "Lithium",
            "Heparin",
            "Erythropoiesis-Stimulating Agents (e.g., Epoetin Alfa)",
            "Radiographic Contrast Agents (e.g., Gadolinium-based agents)",
            "Antibiotics (e.g., Vancomycin)",
            "Heroin",
            "Methotrexate",
            "Cyclophosphamide",
            "Aldesleukin",
            "Ethylene Glycol",
            "Paraquat",
            "Rifampin",
            "Ciprofloxacin",
            "Methoxyflurane",
            "Foscarnet",
            "Furosemide",
            "Calcineurin Inhibitors (e.g., Tacrolimus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Paracetamol)",
                "Cisplatin",
                "Aminoglycosides (e.g., Gentamicin)",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
                "Contrast Media (e.g., Iodinated Contrast)",
                "Cyclosporine",
                "Amphotericin B",
                "Lithium",
                "Heparin",
                "Erythropoiesis-Stimulating Agents (e.g., Epoetin Alfa)",
                "Radiographic Contrast Agents (e.g., Gadolinium-based agents)",
                "Antibiotics (e.g., Vancomycin)",
                "Heroin",
                "Methotrexate",
                "Cyclophosphamide",
                "Aldesleukin",
                "Ethylene Glycol",
                "Paraquat",
                "Rifampin",
                "Ciprofloxacin",
                "Methoxyflurane",
                "Foscarnet",
                "Furosemide",
                "Calcineurin Inhibitors (e.g., Tacrolimus)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (Paracetamol)",
                "Aldesleukin",
                "Aminoglycosides (e.g., Gentamicin)",
                "Amphotericin B",
                "Antibiotics (e.g., Vancomycin)",
                "Calcineurin Inhibitors (e.g., Tacrolimus)",
                "Ciprofloxacin",
                "Cisplatin",
                "Contrast Media (e.g., Iodinated Contrast)",
                "Cyclophosphamide",
                "Cyclosporine",
                "Erythropoiesis-Stimulating Agents (e.g., Epoetin Alfa)",
                "Ethylene Glycol",
                "Foscarnet",
                "Furosemide",
                "Heparin",
                "Heroin",
                "Lithium",
                "Methotrexate",
                "Methoxyflurane",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
                "Paraquat",
                "Radiographic Contrast Agents (e.g., Gadolinium-based agents)",
                "Rifampin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Cyclosporine",
            "Gentamicin",
            "Vancomycin",
            "Amphotericin B",
            "Nitrosoureas",
            "Ifosfamide",
            "Cyclophosphamide",
            "Methotrexate",
            "Tacrolimus",
            "Lithium",
            "Mercury",
            "Lead",
            "Cadmium",
            "Aristolochic acid",
            "Bromobenzene",
            "Chloramphenicol",
            "Naphthalene",
            "Phenacetin",
            "Phenol",
            "Pyrimethamine",
            "Foscarnet",
            "Tenofovir",
            "Methylmalonic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Cyclosporine",
                "Gentamicin",
                "Vancomycin",
                "Amphotericin B",
                "Nitrosoureas",
                "Ifosfamide",
                "Cyclophosphamide",
                "Methotrexate",
                "Tacrolimus",
                "Lithium",
                "Mercury",
                "Lead",
                "Cadmium",
                "Aristolochic acid",
                "Bromobenzene",
                "Chloramphenicol",
                "Naphthalene",
                "Phenacetin",
                "Phenol",
                "Pyrimethamine",
                "Foscarnet",
                "Tenofovir",
                "Methylmalonic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Amphotericin B",
                "Aristolochic acid",
                "Bromobenzene",
                "Cadmium",
                "Chloramphenicol",
                "Cisplatin (CDDP)",
                "Cyclophosphamide",
                "Cyclosporine",
                "Foscarnet",
                "Gentamicin",
                "Ifosfamide",
                "Lead",
                "Lithium",
                "Mercury",
                "Methotrexate",
                "Methylmalonic acid",
                "Naphthalene",
                "Nitrosoureas",
                "Phenacetin",
                "Phenol",
                "Pyrimethamine",
                "Tacrolimus",
                "Tenofovir",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Ethylene Glycol (EG)",
            "Cisplatin (CSP)",
            "Gentamicin (GM)",
            "Amphotericin B (AmB)",
            "Streptozotocin (STZ)",
            "Carbon Tetrachloride (CCl4)",
            "Cadmium (Cd)",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Aristolochic Acid (AA)",
            "Puromycin (PM)",
            "Vancomycin (VAN)",
            "Tetracycline (TC)",
            "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
            "Radiocontrast Agents (RCA)",
            "Lithium (Li)",
            "Cyclosporine (CsA)",
            "Tacrolimus (TAC)",
            "Mycophenolic Acid (MPA)",
            "Fumonisin B1 (FB1)",
            "Ochratoxin A (OTA)",
            "Deoxynivalenol (DON)",
            "Canagliflozin (CAN)",
            "Dapagliflozin (DAP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethylene Glycol (EG)",
                "Cisplatin (CSP)",
                "Gentamicin (GM)",
                "Amphotericin B (AmB)",
                "Streptozotocin (STZ)",
                "Carbon Tetrachloride (CCl4)",
                "Cadmium (Cd)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Aristolochic Acid (AA)",
                "Puromycin (PM)",
                "Vancomycin (VAN)",
                "Tetracycline (TC)",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
                "Radiocontrast Agents (RCA)",
                "Lithium (Li)",
                "Cyclosporine (CsA)",
                "Tacrolimus (TAC)",
                "Mycophenolic Acid (MPA)",
                "Fumonisin B1 (FB1)",
                "Ochratoxin A (OTA)",
                "Deoxynivalenol (DON)",
                "Canagliflozin (CAN)",
                "Dapagliflozin (DAP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Amphotericin B (AmB)",
                "Aristolochic Acid (AA)",
                "Cadmium (Cd)",
                "Canagliflozin (CAN)",
                "Carbon Tetrachloride (CCl4)",
                "Cisplatin (CSP)",
                "Cyclosporine (CsA)",
                "Dapagliflozin (DAP)",
                "Deoxynivalenol (DON)",
                "Ethylene Glycol (EG)",
                "Fumonisin B1 (FB1)",
                "Gentamicin (GM)",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Mycophenolic Acid (MPA)",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
                "Ochratoxin A (OTA)",
                "Puromycin (PM)",
                "Radiocontrast Agents (RCA)",
                "Streptozotocin (STZ)",
                "Tacrolimus (TAC)",
                "Tetracycline (TC)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Gentamicin",
            "Cisplatin",
            "Amphotericin B",
            "Ibuprofen",
            "Naproxen",
            "Vancomycin",
            "Cyclosporine",
            "Tacrolimus",
            "Aminoglycosides",
            "Contrast Media",
            "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
            "Lithium",
            "Methotrexate",
            "Tenofovir",
            "Adefovir",
            "Foscarnet",
            "Cidofovir",
            "Ifosfamide",
            "Pamidronate",
            "Zoledronic Acid",
            "Indomethacin",
            "Sulfonamides",
            "Penicillins",
            "Cephalosporins"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gentamicin",
                "Cisplatin",
                "Amphotericin B",
                "Ibuprofen",
                "Naproxen",
                "Vancomycin",
                "Cyclosporine",
                "Tacrolimus",
                "Aminoglycosides",
                "Contrast Media",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
                "Lithium",
                "Methotrexate",
                "Tenofovir",
                "Adefovir",
                "Foscarnet",
                "Cidofovir",
                "Ifosfamide",
                "Pamidronate",
                "Zoledronic Acid",
                "Indomethacin",
                "Sulfonamides",
                "Penicillins",
                "Cephalosporins"
            ],
            "mismatches": [],
            "true_referents": [
                "Adefovir",
                "Aminoglycosides",
                "Amphotericin B",
                "Cephalosporins",
                "Cidofovir",
                "Cisplatin",
                "Contrast Media",
                "Cyclosporine",
                "Foscarnet",
                "Gentamicin",
                "Ibuprofen",
                "Ifosfamide",
                "Indomethacin",
                "Lithium",
                "Methotrexate",
                "Naproxen",
                "Nonsteroidal Anti-inflammatory Drugs (NSAIDs)",
                "Pamidronate",
                "Penicillins",
                "Sulfonamides",
                "Tacrolimus",
                "Tenofovir",
                "Vancomycin",
                "Zoledronic Acid"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Ibuprofen",
            "Aspirin",
            "Gentamicin",
            "Vancomycin",
            "Lithium",
            "Cyclosporine",
            "Amphotericin B",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Cisplatin",
            "Foscarnet",
            "Tenofovir",
            "Sodium valproate",
            "Diclofenac",
            "Indomethacin",
            "Probenecid",
            "Penicillin G",
            "Dapsone",
            "Quinapril",
            "Acetazolamide",
            "Rifampicin",
            "Gemcitabine",
            "Aminoglycosides",
            "Antiretroviral drugs"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Ibuprofen",
                "Aspirin",
                "Gentamicin",
                "Vancomycin",
                "Lithium",
                "Cyclosporine",
                "Amphotericin B",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Cisplatin",
                "Foscarnet",
                "Tenofovir",
                "Sodium valproate",
                "Diclofenac",
                "Indomethacin",
                "Probenecid",
                "Penicillin G",
                "Dapsone",
                "Quinapril",
                "Acetazolamide",
                "Rifampicin",
                "Gemcitabine",
                "Aminoglycosides",
                "Antiretroviral drugs"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Acetazolamide",
                "Aminoglycosides",
                "Amphotericin B",
                "Antiretroviral drugs",
                "Aspirin",
                "Cisplatin",
                "Cyclosporine",
                "Dapsone",
                "Diclofenac",
                "Foscarnet",
                "Gemcitabine",
                "Gentamicin",
                "Ibuprofen",
                "Indomethacin",
                "Lithium",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Penicillin G",
                "Probenecid",
                "Quinapril",
                "Rifampicin",
                "Sodium valproate",
                "Tenofovir",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Mercury (Hg)",
            "Lead (Pb)",
            "Cadmium (Cd)",
            "Chromium (Cr)",
            "Arsenic (As)",
            "Uranium (U)",
            "Platinum (Pt)",
            "Gentamicin (GM)",
            "Cisplatin (CDDP)",
            "Methotrexate (MTX)",
            "Vancomycin (VAN)",
            "Tetracycline (TC)",
            "Amphotericin B (AmB)",
            "Acetaminophen (APAP)",
            "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
            "Ethylene Glycol (EG)",
            "Carbon Tetrachloride (CCl4)",
            "Paraquat (PQ)",
            "Colistin (COL)",
            "Polychlorinated Biphenyls (PCBs)",
            "Dioxin (TCDD)",
            "Phenacetin (PHN)",
            "Chloroform (CHCl3)",
            "Benzene (C6H6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mercury (Hg)",
                "Lead (Pb)",
                "Cadmium (Cd)",
                "Chromium (Cr)",
                "Arsenic (As)",
                "Uranium (U)",
                "Platinum (Pt)",
                "Gentamicin (GM)",
                "Cisplatin (CDDP)",
                "Methotrexate (MTX)",
                "Vancomycin (VAN)",
                "Tetracycline (TC)",
                "Amphotericin B (AmB)",
                "Acetaminophen (APAP)",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
                "Ethylene Glycol (EG)",
                "Carbon Tetrachloride (CCl4)",
                "Paraquat (PQ)",
                "Colistin (COL)",
                "Polychlorinated Biphenyls (PCBs)",
                "Dioxin (TCDD)",
                "Phenacetin (PHN)",
                "Chloroform (CHCl3)",
                "Benzene (C6H6)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Amphotericin B (AmB)",
                "Arsenic (As)",
                "Benzene (C6H6)",
                "Cadmium (Cd)",
                "Carbon Tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Chromium (Cr)",
                "Cisplatin (CDDP)",
                "Colistin (COL)",
                "Dioxin (TCDD)",
                "Ethylene Glycol (EG)",
                "Gentamicin (GM)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
                "Paraquat (PQ)",
                "Phenacetin (PHN)",
                "Platinum (Pt)",
                "Polychlorinated Biphenyls (PCBs)",
                "Tetracycline (TC)",
                "Uranium (U)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Cisplatin",
            "Ibuprofen",
            "Gentamicin",
            "Vancomycin",
            "Cyclosporine",
            "Methotrexate",
            "Aminoglycosides",
            "Amphotericin B",
            "Tenofovir",
            "Contrast agents (e.g., iodinated contrast media)",
            "Cephalosporins",
            "Sulfonamides",
            "NSAIDs (Nonsteroidal anti-inflammatory drugs)",
            "Angiotensin-converting enzyme inhibitors (ACE inhibitors)",
            "Angiotensin receptor blockers (ARBs)",
            "Proton pump inhibitors (PPIs)",
            "Chemotherapy drugs",
            "Antiviral medications",
            "Antibiotics",
            "Antifungal medications",
            "Diuretics",
            "Cimetidine",
            "Rifampin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Cisplatin",
                "Ibuprofen",
                "Gentamicin",
                "Vancomycin",
                "Cyclosporine",
                "Methotrexate",
                "Aminoglycosides",
                "Amphotericin B",
                "Tenofovir",
                "Contrast agents (e.g., iodinated contrast media)",
                "Cephalosporins",
                "Sulfonamides",
                "NSAIDs (Nonsteroidal anti-inflammatory drugs)",
                "Angiotensin-converting enzyme inhibitors (ACE inhibitors)",
                "Angiotensin receptor blockers (ARBs)",
                "Proton pump inhibitors (PPIs)",
                "Chemotherapy drugs",
                "Antiviral medications",
                "Antibiotics",
                "Antifungal medications",
                "Diuretics",
                "Cimetidine",
                "Rifampin"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aminoglycosides",
                "Amphotericin B",
                "Angiotensin receptor blockers (ARBs)",
                "Angiotensin-converting enzyme inhibitors (ACE inhibitors)",
                "Antibiotics",
                "Antifungal medications",
                "Antiviral medications",
                "Cephalosporins",
                "Chemotherapy drugs",
                "Cimetidine",
                "Cisplatin",
                "Contrast agents (e.g., iodinated contrast media)",
                "Cyclosporine",
                "Diuretics",
                "Gentamicin",
                "Ibuprofen",
                "Methotrexate",
                "NSAIDs (Nonsteroidal anti-inflammatory drugs)",
                "Proton pump inhibitors (PPIs)",
                "Rifampin",
                "Sulfonamides",
                "Tenofovir",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Gentamicin",
            "Amikacin",
            "Tobramycin",
            "Amphotericin B",
            "Iohexol",
            "Iopromide",
            "Naproxen",
            "Ibuprofen",
            "Indomethacin",
            "Ketoprofen",
            "Diclofenac",
            "Lithium carbonate",
            "Mercury chloride",
            "Lead acetate",
            "Cyclosporine",
            "Ifosfamide",
            "Methotrexate",
            "Tenofovir",
            "Colistin",
            "Vancomycin",
            "Tacrolimus",
            "Foscarnet",
            "Everolimus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Gentamicin",
                "Amikacin",
                "Tobramycin",
                "Amphotericin B",
                "Iohexol",
                "Iopromide",
                "Naproxen",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Diclofenac",
                "Lithium carbonate",
                "Mercury chloride",
                "Lead acetate",
                "Cyclosporine",
                "Ifosfamide",
                "Methotrexate",
                "Tenofovir",
                "Colistin",
                "Vancomycin",
                "Tacrolimus",
                "Foscarnet",
                "Everolimus"
            ],
            "mismatches": [],
            "true_referents": [
                "Amikacin",
                "Amphotericin B",
                "Cisplatin (CDDP)",
                "Colistin",
                "Cyclosporine",
                "Diclofenac",
                "Everolimus",
                "Foscarnet",
                "Gentamicin",
                "Ibuprofen",
                "Ifosfamide",
                "Indomethacin",
                "Iohexol",
                "Iopromide",
                "Ketoprofen",
                "Lead acetate",
                "Lithium carbonate",
                "Mercury chloride",
                "Methotrexate",
                "Naproxen",
                "Tacrolimus",
                "Tenofovir",
                "Tobramycin",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin",
            "Gentamicin",
            "Mercury (Hg)",
            "Cadmium (Cd)",
            "Lead (Pb)",
            "Aristolochic acid",
            "Ethylene glycol",
            "Melamine",
            "Acetaminophen (APAP)",
            "Cyclosporine A",
            "Ifosfamide",
            "Acyclovir",
            "Amphotericin B",
            "Vancomycin",
            "Contrast media (iodinated)",
            "Lithium",
            "Tenofovir",
            "Diethylene glycol",
            "Paraquat",
            "Ochratoxin A",
            "Fumaric acid",
            "Citrinin",
            "Uranium (U)",
            "Aristolochia fangchi"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin",
                "Gentamicin",
                "Mercury (Hg)",
                "Cadmium (Cd)",
                "Lead (Pb)",
                "Aristolochic acid",
                "Ethylene glycol",
                "Melamine",
                "Acetaminophen (APAP)",
                "Cyclosporine A",
                "Ifosfamide",
                "Acyclovir",
                "Amphotericin B",
                "Vancomycin",
                "Contrast media (iodinated)",
                "Lithium",
                "Tenofovir",
                "Diethylene glycol",
                "Paraquat",
                "Ochratoxin A",
                "Fumaric acid",
                "Citrinin",
                "Uranium (U)",
                "Aristolochia fangchi"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Acyclovir",
                "Amphotericin B",
                "Aristolochia fangchi",
                "Aristolochic acid",
                "Cadmium (Cd)",
                "Cisplatin",
                "Citrinin",
                "Contrast media (iodinated)",
                "Cyclosporine A",
                "Diethylene glycol",
                "Ethylene glycol",
                "Fumaric acid",
                "Gentamicin",
                "Ifosfamide",
                "Lead (Pb)",
                "Lithium",
                "Melamine",
                "Mercury (Hg)",
                "Ochratoxin A",
                "Paraquat",
                "Tenofovir",
                "Uranium (U)",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Aristolochic acid",
            "Cadmium",
            "Cisplatin",
            "Cyclosporine",
            "Ethylene glycol",
            "Gentamicin",
            "Ifosfamide",
            "Indomethacin",
            "Lead",
            "Lithium",
            "Mercury",
            "Methoxyflurane",
            "Naproxen",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Paraquat",
            "Phosphate",
            "Radiocontrast agents",
            "Rifampin",
            "Tacrolimus",
            "Tenofovir",
            "Tobramycin",
            "Vancomycin",
            "Warfarin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Aristolochic acid",
                "Cadmium",
                "Cisplatin",
                "Cyclosporine",
                "Ethylene glycol",
                "Gentamicin",
                "Ifosfamide",
                "Indomethacin",
                "Lead",
                "Lithium",
                "Mercury",
                "Methoxyflurane",
                "Naproxen",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Paraquat",
                "Phosphate",
                "Radiocontrast agents",
                "Rifampin",
                "Tacrolimus",
                "Tenofovir",
                "Tobramycin",
                "Vancomycin",
                "Warfarin"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aristolochic acid",
                "Cadmium",
                "Cisplatin",
                "Cyclosporine",
                "Ethylene glycol",
                "Gentamicin",
                "Ifosfamide",
                "Indomethacin",
                "Lead",
                "Lithium",
                "Mercury",
                "Methoxyflurane",
                "Naproxen",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Paraquat",
                "Phosphate",
                "Radiocontrast agents",
                "Rifampin",
                "Tacrolimus",
                "Tenofovir",
                "Tobramycin",
                "Vancomycin",
                "Warfarin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Aristolochic acid (AA)",
            "Cadmium (Cd)",
            "Chromium (Cr)",
            "Cisplatin",
            "Cyclosporine",
            "Doxorubicin",
            "Ethylene glycol",
            "Gentamicin",
            "Ifosfamide",
            "Lead (Pb)",
            "Lithium (Li)",
            "Mercury (Hg)",
            "Methotrexate",
            "Nitrilotriacetic acid (NTA)",
            "Ochratoxin A (OTA)",
            "Paraquat",
            "Potassium bromate (KBrO3)",
            "Puromycin aminonucleoside",
            "Streptozotocin",
            "Teniposide",
            "Tenofovir",
            "Tetracycline",
            "Tobramycin",
            "Uranyl nitrate (UO2(NO3)2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aristolochic acid (AA)",
                "Cadmium (Cd)",
                "Chromium (Cr)",
                "Cisplatin",
                "Cyclosporine",
                "Doxorubicin",
                "Ethylene glycol",
                "Gentamicin",
                "Ifosfamide",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Methotrexate",
                "Nitrilotriacetic acid (NTA)",
                "Ochratoxin A (OTA)",
                "Paraquat",
                "Potassium bromate (KBrO3)",
                "Puromycin aminonucleoside",
                "Streptozotocin",
                "Teniposide",
                "Tenofovir",
                "Tetracycline",
                "Tobramycin",
                "Uranyl nitrate (UO2(NO3)2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aristolochic acid (AA)",
                "Cadmium (Cd)",
                "Chromium (Cr)",
                "Cisplatin",
                "Cyclosporine",
                "Doxorubicin",
                "Ethylene glycol",
                "Gentamicin",
                "Ifosfamide",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Methotrexate",
                "Nitrilotriacetic acid (NTA)",
                "Ochratoxin A (OTA)",
                "Paraquat",
                "Potassium bromate (KBrO3)",
                "Puromycin aminonucleoside",
                "Streptozotocin",
                "Teniposide",
                "Tenofovir",
                "Tetracycline",
                "Tobramycin",
                "Uranyl nitrate (UO2(NO3)2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Aristolochic Acid (AA)",
            "Cisplatin (CDDP)",
            "Cyclosporine (CsA)",
            "Gentamicin (GM)",
            "Ifosfamide (IFO)",
            "Melamine (MEL)",
            "Mercury (Hg)",
            "Methotrexate (MTX)",
            "Mycophenolate Mofetil (MMF)",
            "Oxaliplatin (OXA)",
            "Paracetamol (APAP)",
            "Paraquat (PQ)",
            "Polymyxin B (PMB)",
            "Tacrolimus (FK506)",
            "Tenofovir (TDF)",
            "Vancomycin (VAN)",
            "Acetaminophen (APAP)",
            "Amphotericin B (AmB)",
            "Cadmium (Cd)",
            "Carboplatin (CBDCA)",
            "Chromium (Cr)",
            "Doxorubicin (DOX)",
            "Ethylene Glycol (EG)",
            "Sodium Valproate (VPA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aristolochic Acid (AA)",
                "Cisplatin (CDDP)",
                "Cyclosporine (CsA)",
                "Gentamicin (GM)",
                "Ifosfamide (IFO)",
                "Melamine (MEL)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Mycophenolate Mofetil (MMF)",
                "Oxaliplatin (OXA)",
                "Paracetamol (APAP)",
                "Paraquat (PQ)",
                "Polymyxin B (PMB)",
                "Tacrolimus (FK506)",
                "Tenofovir (TDF)",
                "Vancomycin (VAN)",
                "Acetaminophen (APAP)",
                "Amphotericin B (AmB)",
                "Cadmium (Cd)",
                "Carboplatin (CBDCA)",
                "Chromium (Cr)",
                "Doxorubicin (DOX)",
                "Ethylene Glycol (EG)",
                "Sodium Valproate (VPA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Amphotericin B (AmB)",
                "Aristolochic Acid (AA)",
                "Cadmium (Cd)",
                "Carboplatin (CBDCA)",
                "Chromium (Cr)",
                "Cisplatin (CDDP)",
                "Cyclosporine (CsA)",
                "Doxorubicin (DOX)",
                "Ethylene Glycol (EG)",
                "Gentamicin (GM)",
                "Ifosfamide (IFO)",
                "Melamine (MEL)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Mycophenolate Mofetil (MMF)",
                "Oxaliplatin (OXA)",
                "Paracetamol (APAP)",
                "Paraquat (PQ)",
                "Polymyxin B (PMB)",
                "Sodium Valproate (VPA)",
                "Tacrolimus (FK506)",
                "Tenofovir (TDF)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Aspirin",
            "Ibuprofen",
            "Ketoprofen",
            "Naproxen",
            "Celecoxib",
            "Diclofenac",
            "Indomethacin",
            "Meclofenamate",
            "Meloxicam",
            "Nimesulide",
            "Piroxicam",
            "Tenoxicam",
            "Tiaprofenic acid",
            "Valdecoxib",
            "Rofecoxib",
            "Lornoxicam",
            "Etoricoxib",
            "Coxib",
            "Propoxyphene",
            "Dextropropoxyphene",
            "Phenacetin",
            "Morphine",
            "Codeine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Aspirin",
                "Ibuprofen",
                "Ketoprofen",
                "Naproxen",
                "Celecoxib",
                "Diclofenac",
                "Indomethacin",
                "Meclofenamate",
                "Meloxicam",
                "Nimesulide",
                "Piroxicam",
                "Tenoxicam",
                "Tiaprofenic acid",
                "Valdecoxib",
                "Rofecoxib",
                "Lornoxicam",
                "Etoricoxib",
                "Coxib",
                "Propoxyphene",
                "Dextropropoxyphene",
                "Phenacetin",
                "Morphine",
                "Codeine"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aspirin",
                "Celecoxib",
                "Codeine",
                "Coxib",
                "Dextropropoxyphene",
                "Diclofenac",
                "Etoricoxib",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Lornoxicam",
                "Meclofenamate",
                "Meloxicam",
                "Morphine",
                "Naproxen",
                "Nimesulide",
                "Phenacetin",
                "Piroxicam",
                "Propoxyphene",
                "Rofecoxib",
                "Tenoxicam",
                "Tiaprofenic acid",
                "Valdecoxib"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Gentamicin",
            "Streptomycin",
            "Cyclosporine",
            "Tetracycline",
            "Aminoglycosides",
            "Amphotericin B",
            "Cephaloridine",
            "Methoxyflurane",
            "Ifosfamide",
            "Oxaliplatin",
            "Carboplatin",
            "Cidofovir",
            "Foscarnet",
            "Acyclovir",
            "Indinavir",
            "Ritonavir",
            "Tacrolimus",
            "Sirolimus",
            "Everolimus",
            "Ibuprofen",
            "Diclofenac",
            "Mefenamic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Gentamicin",
                "Streptomycin",
                "Cyclosporine",
                "Tetracycline",
                "Aminoglycosides",
                "Amphotericin B",
                "Cephaloridine",
                "Methoxyflurane",
                "Ifosfamide",
                "Oxaliplatin",
                "Carboplatin",
                "Cidofovir",
                "Foscarnet",
                "Acyclovir",
                "Indinavir",
                "Ritonavir",
                "Tacrolimus",
                "Sirolimus",
                "Everolimus",
                "Ibuprofen",
                "Diclofenac",
                "Mefenamic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir",
                "Aminoglycosides",
                "Amphotericin B",
                "Carboplatin",
                "Cephaloridine",
                "Cidofovir",
                "Cisplatin (CDDP)",
                "Cyclosporine",
                "Diclofenac",
                "Everolimus",
                "Foscarnet",
                "Gentamicin",
                "Ibuprofen",
                "Ifosfamide",
                "Indinavir",
                "Mefenamic acid",
                "Methoxyflurane",
                "Oxaliplatin",
                "Ritonavir",
                "Sirolimus",
                "Streptomycin",
                "Tacrolimus",
                "Tetracycline"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Acrylamide",
                "Canonical Name": "Acrylamide"
            },
            {
                "Referent": "Arsenic",
                "Canonical Name": "Arsenic"
            },
            {
                "Referent": "Benzene",
                "Canonical Name": "Benzene"
            },
            {
                "Referent": "Cadmium",
                "Canonical Name": "Cadmium"
            },
            {
                "Referent": "Carbon tetrachloride",
                "Canonical Name": "Carbon tetrachloride"
            },
            {
                "Referent": "Cigarette smoke",
                "Canonical Name": "Cigarette smoke"
            },
            {
                "Referent": "Cyanide",
                "Canonical Name": "Cyanide"
            },
            {
                "Referent": "Ethylene oxide",
                "Canonical Name": "Ethylene oxide"
            },
            {
                "Referent": "Formaldehyde",
                "Canonical Name": "Formaldehyde"
            },
            {
                "Referent": "Mercury",
                "Canonical Name": "Mercury"
            },
            {
                "Referent": "Nickel",
                "Canonical Name": "Nickel"
            },
            {
                "Referent": "Polonium",
                "Canonical Name": "Polonium"
            },
            {
                "Referent": "Radium",
                "Canonical Name": "Radium"
            },
            {
                "Referent": "Vinyl chloride",
                "Canonical Name": "Vinyl chloride"
            },
            {
                "Referent": "Vinylidene chloride",
                "Canonical Name": "Vinylidene chloride"
            },
            {
                "Referent": "Xylenes",
                "Canonical Name": "Xylenes"
            },
            {
                "Referent": "Aflatoxin B1",
                "Canonical Name": "Aflatoxin B1"
            },
            {
                "Referent": "Aflatoxin B2",
                "Canonical Name": "Aflatoxin B2"
            },
            {
                "Referent": "Aflatoxin G1",
                "Canonical Name": "Aflatoxin G1"
            },
            {
                "Referent": "Aflatoxin G2",
                "Canonical Name": "Aflatoxin G2"
            },
            {
                "Referent": "Aflatoxin M1",
                "Canonical Name": "Aflatoxin M1"
            },
            {
                "Referent": "Aflatoxin M2",
                "Canonical Name": "Aflatoxin M2"
            },
            {
                "Referent": "Aflatoxin Q1",
                "Canonical Name": "Aflatoxin Q1"
            },
            {
                "Referent": "Aflatoxin Q2",
                "Canonical Name": "Aflatoxin Q2"
            },
            {
                "Referent": "Aflatoxin P1",
                "Canonical Name": "Aflatoxin P1"
            },
            {
                "Referent": "Aflatoxin P2",
                "Canonical Name": "Aflatoxin P2"
            },
            {
                "Referent": "Aflatoxin B3",
                "Canonical Name": "Aflatoxin B3"
            },
            {
                "Referent": "Aflatoxin B4",
                "Canonical Name": "Aflatoxin B4"
            },
            {
                "Referent": "Aflatoxin M3",
                "Canonical Name": "Aflatoxin M3"
            },
            {
                "Referent": "Aflatoxin M4",
                "Canonical Name": "Aflatoxin M4"
            },
            {
                "Referent": "Aflatoxin Q3",
                "Canonical Name": "Aflatoxin Q3"
            },
            {
                "Referent": "Aflatoxin Q4",
                "Canonical Name": "Aflatoxin Q4"
            },
            {
                "Referent": "Aflatoxin P3",
                "Canonical Name": "Aflatoxin P3"
            },
            {
                "Referent": "Aflatoxin P4",
                "Canonical Name": "Aflatoxin P4"
            },
            {
                "Referent": "Aflatoxin M5",
                "Canonical Name": "Aflatoxin M5"
            },
            {
                "Referent": "Aflatoxin M6",
                "Canonical Name": "Aflatoxin M6"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrylamide",
                "Arsenic",
                "Benzene",
                "Cadmium",
                "Carbon tetrachloride",
                "Cigarette smoke",
                "Cyanide",
                "Ethylene oxide",
                "Formaldehyde",
                "Mercury",
                "Nickel",
                "Polonium",
                "Radium",
                "Vinyl chloride",
                "Vinylidene chloride",
                "Xylenes",
                "Aflatoxin B1",
                "Aflatoxin B2",
                "Aflatoxin G1",
                "Aflatoxin G2",
                "Aflatoxin M1",
                "Aflatoxin M2",
                "Aflatoxin Q1",
                "Aflatoxin Q2"
            ],
            "mismatches": [
                "Aflatoxin P1",
                "Aflatoxin P2",
                "Aflatoxin B3",
                "Aflatoxin B4",
                "Aflatoxin M3",
                "Aflatoxin M4",
                "Aflatoxin Q3",
                "Aflatoxin Q4",
                "Aflatoxin P3",
                "Aflatoxin P4",
                "Aflatoxin M5",
                "Aflatoxin M6"
            ],
            "true_referents": [
                "{\"Referent\": \"Acrylamide\", \"Canonical Name\": \"Acrylamide\"}",
                "{\"Referent\": \"Aflatoxin B1\", \"Canonical Name\": \"Aflatoxin B1\"}",
                "{\"Referent\": \"Aflatoxin B2\", \"Canonical Name\": \"Aflatoxin B2\"}",
                "{\"Referent\": \"Aflatoxin G1\", \"Canonical Name\": \"Aflatoxin G1\"}",
                "{\"Referent\": \"Aflatoxin G2\", \"Canonical Name\": \"Aflatoxin G2\"}",
                "{\"Referent\": \"Aflatoxin M1\", \"Canonical Name\": \"Aflatoxin M1\"}",
                "{\"Referent\": \"Aflatoxin M2\", \"Canonical Name\": \"Aflatoxin M2\"}",
                "{\"Referent\": \"Aflatoxin Q1\", \"Canonical Name\": \"Aflatoxin Q1\"}",
                "{\"Referent\": \"Aflatoxin Q2\", \"Canonical Name\": \"Aflatoxin Q2\"}",
                "{\"Referent\": \"Arsenic\", \"Canonical Name\": \"Arsenic\"}",
                "{\"Referent\": \"Benzene\", \"Canonical Name\": \"Benzene\"}",
                "{\"Referent\": \"Cadmium\", \"Canonical Name\": \"Cadmium\"}",
                "{\"Referent\": \"Carbon tetrachloride\", \"Canonical Name\": \"Carbon tetrachloride\"}",
                "{\"Referent\": \"Cigarette smoke\", \"Canonical Name\": \"Cigarette smoke\"}",
                "{\"Referent\": \"Cyanide\", \"Canonical Name\": \"Cyanide\"}",
                "{\"Referent\": \"Ethylene oxide\", \"Canonical Name\": \"Ethylene oxide\"}",
                "{\"Referent\": \"Formaldehyde\", \"Canonical Name\": \"Formaldehyde\"}",
                "{\"Referent\": \"Mercury\", \"Canonical Name\": \"Mercury\"}",
                "{\"Referent\": \"Nickel\", \"Canonical Name\": \"Nickel\"}",
                "{\"Referent\": \"Polonium\", \"Canonical Name\": \"Polonium\"}",
                "{\"Referent\": \"Radium\", \"Canonical Name\": \"Radium\"}",
                "{\"Referent\": \"Vinyl chloride\", \"Canonical Name\": \"Vinyl chloride\"}",
                "{\"Referent\": \"Vinylidene chloride\", \"Canonical Name\": \"Vinylidene chloride\"}",
                "{\"Referent\": \"Xylenes\", \"Canonical Name\": \"Xylenes\"}"
            ],
            "TP": 24,
            "FP": 12,
            "FN": 0
        }
    }
]