[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Amphotericin B (AmB)",
            "Cisplatin",
            "Contrast media",
            "Foscarnet",
            "Gentamicin",
            "HgCl2 (Mercury chloride)",
            "Iodinated contrast media",
            "Metformin",
            "Mitomycin C",
            "Naproxen",
            "Nitrofurantoin",
            "Paracetamol (Acetaminophen) overdose",
            "Pentamidine",
            "Polymyxin B",
            "Sulfadiazine",
            "Tacrolimus",
            "Tenofovir",
            "Tobramycin",
            "Vancomycin",
            "Vincristine",
            "Vitamin D analogues",
            "Zoledronic acid",
            "Aminoglycosides",
            "Trimethoprim-sulfamethoxazole"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amphotericin B (AmB)",
                "Cisplatin",
                "Contrast media",
                "Foscarnet",
                "Gentamicin",
                "HgCl2 (Mercury chloride)",
                "Iodinated contrast media",
                "Metformin",
                "Mitomycin C",
                "Naproxen",
                "Nitrofurantoin",
                "Paracetamol (Acetaminophen) overdose",
                "Pentamidine",
                "Polymyxin B",
                "Sulfadiazine",
                "Tacrolimus",
                "Tenofovir",
                "Tobramycin",
                "Vancomycin",
                "Vincristine",
                "Vitamin D analogues",
                "Zoledronic acid",
                "Aminoglycosides",
                "Trimethoprim-sulfamethoxazole"
            ],
            "mismatches": [],
            "true_referents": [
                "Aminoglycosides",
                "Amphotericin B (AmB)",
                "Cisplatin",
                "Contrast media",
                "Foscarnet",
                "Gentamicin",
                "HgCl2 (Mercury chloride)",
                "Iodinated contrast media",
                "Metformin",
                "Mitomycin C",
                "Naproxen",
                "Nitrofurantoin",
                "Paracetamol (Acetaminophen) overdose",
                "Pentamidine",
                "Polymyxin B",
                "Sulfadiazine",
                "Tacrolimus",
                "Tenofovir",
                "Tobramycin",
                "Trimethoprim-sulfamethoxazole",
                "Vancomycin",
                "Vincristine",
                "Vitamin D analogues",
                "Zoledronic acid"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Aminoglycosides (e.g., Gentamicin)",
            "Amphotericin B",
            "Acyclovir",
            "Tacrolimus (FK506)",
            "Cyclosporine",
            "Isoniazid",
            "Methotrexate",
            "Lithium",
            "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
            "Contrast Media (e.g., Iodinated contrast agents)",
            "Cyclophosphamide",
            "Ifosfamide",
            "Rifampin",
            "Foscarnet",
            "Cidofovir",
            "Erythropoiesis-Stimulating Agents (e.g., Epoetin alfa)",
            "Vancomycin",
            "Methoxyflurane",
            "Halogenated Anesthetics (e.g., Sevoflurane)",
            "Cyclosporine A",
            "Cefoperazone",
            "Radiographic Contrast Dyes",
            "Cytarabine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Aminoglycosides (e.g., Gentamicin)",
                "Amphotericin B",
                "Acyclovir",
                "Tacrolimus (FK506)",
                "Cyclosporine",
                "Isoniazid",
                "Methotrexate",
                "Lithium",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
                "Contrast Media (e.g., Iodinated contrast agents)",
                "Cyclophosphamide",
                "Ifosfamide",
                "Rifampin",
                "Foscarnet",
                "Cidofovir",
                "Erythropoiesis-Stimulating Agents (e.g., Epoetin alfa)",
                "Vancomycin",
                "Methoxyflurane",
                "Halogenated Anesthetics (e.g., Sevoflurane)",
                "Cyclosporine A",
                "Cefoperazone",
                "Radiographic Contrast Dyes",
                "Cytarabine"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir",
                "Aminoglycosides (e.g., Gentamicin)",
                "Amphotericin B",
                "Cefoperazone",
                "Cidofovir",
                "Cisplatin (CDDP)",
                "Contrast Media (e.g., Iodinated contrast agents)",
                "Cyclophosphamide",
                "Cyclosporine",
                "Cyclosporine A",
                "Cytarabine",
                "Erythropoiesis-Stimulating Agents (e.g., Epoetin alfa)",
                "Foscarnet",
                "Halogenated Anesthetics (e.g., Sevoflurane)",
                "Ifosfamide",
                "Isoniazid",
                "Lithium",
                "Methotrexate",
                "Methoxyflurane",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs, e.g., Ibuprofen)",
                "Radiographic Contrast Dyes",
                "Rifampin",
                "Tacrolimus (FK506)",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen",
            "Amphotericin B",
            "Cisplatin",
            "Cyclosporine",
            "Foscarnet",
            "Gentamicin",
            "Heroin",
            "Ibuprofen",
            "Indomethacin",
            "Ketamine",
            "Lithium",
            "Methotrexate",
            "Neomycin",
            "NSAIDs",
            "Penicillin G",
            "Phenacetin",
            "Phenytoin",
            "Rifampicin",
            "Sulfamethoxazole",
            "Tacrolimus",
            "Tobramycin",
            "Vancomycin",
            "Vincristine",
            "Zidovudine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen",
                "Amphotericin B",
                "Cisplatin",
                "Cyclosporine",
                "Foscarnet",
                "Gentamicin",
                "Heroin",
                "Ibuprofen",
                "Indomethacin",
                "Ketamine",
                "Lithium",
                "Methotrexate",
                "Neomycin",
                "NSAIDs",
                "Penicillin G",
                "Phenacetin",
                "Phenytoin",
                "Rifampicin",
                "Sulfamethoxazole",
                "Tacrolimus",
                "Tobramycin",
                "Vancomycin",
                "Vincristine",
                "Zidovudine"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen",
                "Amphotericin B",
                "Cisplatin",
                "Cyclosporine",
                "Foscarnet",
                "Gentamicin",
                "Heroin",
                "Ibuprofen",
                "Indomethacin",
                "Ketamine",
                "Lithium",
                "Methotrexate",
                "NSAIDs",
                "Neomycin",
                "Penicillin G",
                "Phenacetin",
                "Phenytoin",
                "Rifampicin",
                "Sulfamethoxazole",
                "Tacrolimus",
                "Tobramycin",
                "Vancomycin",
                "Vincristine",
                "Zidovudine"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Gentamicin (GEN)",
            "Cisplatin (CIS)",
            "Amphotericin B (AMB)",
            "Vancomycin (VAN)",
            "Streptozotocin (STR)",
            "Cadmium (Cd)",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Aristolochic acid (AA)",
            "Ethylene glycol (EG)",
            "Methanol (MeOH)",
            "Carbon tetrachloride (CCl4)",
            "Puromycin (PUR)",
            "Cyclosporine (CsA)",
            "Tacrolimus (Tac)",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Radiocontrast agents (RCA)",
            "Lithium (Li)",
            "Foscarnet (FSC)",
            "Tenofovir (TDF)",
            "Acyclovir (ACV)",
            "Melamine (MEL)",
            "Dichloromethane (DCM)",
            "Bromobenzene (BB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gentamicin (GEN)",
                "Cisplatin (CIS)",
                "Amphotericin B (AMB)",
                "Vancomycin (VAN)",
                "Streptozotocin (STR)",
                "Cadmium (Cd)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Aristolochic acid (AA)",
                "Ethylene glycol (EG)",
                "Methanol (MeOH)",
                "Carbon tetrachloride (CCl4)",
                "Puromycin (PUR)",
                "Cyclosporine (CsA)",
                "Tacrolimus (Tac)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Radiocontrast agents (RCA)",
                "Lithium (Li)",
                "Foscarnet (FSC)",
                "Tenofovir (TDF)",
                "Acyclovir (ACV)",
                "Melamine (MEL)",
                "Dichloromethane (DCM)",
                "Bromobenzene (BB)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir (ACV)",
                "Amphotericin B (AMB)",
                "Aristolochic acid (AA)",
                "Bromobenzene (BB)",
                "Cadmium (Cd)",
                "Carbon tetrachloride (CCl4)",
                "Cisplatin (CIS)",
                "Cyclosporine (CsA)",
                "Dichloromethane (DCM)",
                "Ethylene glycol (EG)",
                "Foscarnet (FSC)",
                "Gentamicin (GEN)",
                "Lead (Pb)",
                "Lithium (Li)",
                "Melamine (MEL)",
                "Mercury (Hg)",
                "Methanol (MeOH)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Puromycin (PUR)",
                "Radiocontrast agents (RCA)",
                "Streptozotocin (STR)",
                "Tacrolimus (Tac)",
                "Tenofovir (TDF)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Gentamicin (GM)",
            "Cisplatin (CDDP)",
            "Amphotericin B (AmB)",
            "Ibuprofen (IBU)",
            "Vancomycin (VAN)",
            "Acyclovir (ACV)",
            "Methotrexate (MTX)",
            "Furosemide (FUR)",
            "Cyclosporine (CsA)",
            "Tacrolimus (TAC)",
            "Contrast Media (CM)",
            "Paracetamol (PCM)",
            "Tenofovir (TDF)",
            "Lithium (Li)",
            "Ifosfamide (IFO)",
            "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
            "Penicillamine (PCA)",
            "Aminoglycosides (AGs)",
            "Ranitidine (RAN)",
            "Captopril (CAP)",
            "Chloroquine (CQ)",
            "Sulfonamides (SAs)",
            "Methoxyflurane (MOF)",
            "Nitrosoureas (NIT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gentamicin (GM)",
                "Cisplatin (CDDP)",
                "Amphotericin B (AmB)",
                "Ibuprofen (IBU)",
                "Vancomycin (VAN)",
                "Acyclovir (ACV)",
                "Methotrexate (MTX)",
                "Furosemide (FUR)",
                "Cyclosporine (CsA)",
                "Tacrolimus (TAC)",
                "Contrast Media (CM)",
                "Paracetamol (PCM)",
                "Tenofovir (TDF)",
                "Lithium (Li)",
                "Ifosfamide (IFO)",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
                "Penicillamine (PCA)",
                "Aminoglycosides (AGs)",
                "Ranitidine (RAN)",
                "Captopril (CAP)",
                "Chloroquine (CQ)",
                "Sulfonamides (SAs)",
                "Methoxyflurane (MOF)",
                "Nitrosoureas (NIT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir (ACV)",
                "Aminoglycosides (AGs)",
                "Amphotericin B (AmB)",
                "Captopril (CAP)",
                "Chloroquine (CQ)",
                "Cisplatin (CDDP)",
                "Contrast Media (CM)",
                "Cyclosporine (CsA)",
                "Furosemide (FUR)",
                "Gentamicin (GM)",
                "Ibuprofen (IBU)",
                "Ifosfamide (IFO)",
                "Lithium (Li)",
                "Methotrexate (MTX)",
                "Methoxyflurane (MOF)",
                "Nitrosoureas (NIT)",
                "Nonsteroidal Anti-Inflammatory Drugs (NSAIDs)",
                "Paracetamol (PCM)",
                "Penicillamine (PCA)",
                "Ranitidine (RAN)",
                "Sulfonamides (SAs)",
                "Tacrolimus (TAC)",
                "Tenofovir (TDF)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Aminoglycosides (AG)",
            "Cisplatin (CDDP)",
            "Gentamicin (GM)",
            "Vancomycin (VCM)",
            "Ibuprofen (IBU)",
            "Rifampicin (RIF)",
            "Lithium (Li)",
            "Foscarnet (PFA)",
            "Cyclosporine A (CsA)",
            "Tacrolimus (FK506)",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Doxycycline (DOX)",
            "Amphotericin B (AMB)",
            "Contrast agents (e.g., iodinated contrast)",
            "Methotrexate (MTX)",
            "Lead (Pb)",
            "Cadmium (Cd)",
            "Mercury (Hg)",
            "Carbon tetrachloride (CCl4)",
            "Acetylsalicylic acid (ASA)",
            "Sodium valproate (VPA)",
            "Colistin (Polymyxin E)",
            "Ethylene glycol (EG)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Aminoglycosides (AG)",
                "Cisplatin (CDDP)",
                "Gentamicin (GM)",
                "Vancomycin (VCM)",
                "Ibuprofen (IBU)",
                "Rifampicin (RIF)",
                "Lithium (Li)",
                "Foscarnet (PFA)",
                "Cyclosporine A (CsA)",
                "Tacrolimus (FK506)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Doxycycline (DOX)",
                "Amphotericin B (AMB)",
                "Contrast agents (e.g., iodinated contrast)",
                "Methotrexate (MTX)",
                "Lead (Pb)",
                "Cadmium (Cd)",
                "Mercury (Hg)",
                "Carbon tetrachloride (CCl4)",
                "Acetylsalicylic acid (ASA)",
                "Sodium valproate (VPA)",
                "Colistin (Polymyxin E)",
                "Ethylene glycol (EG)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Acetylsalicylic acid (ASA)",
                "Aminoglycosides (AG)",
                "Amphotericin B (AMB)",
                "Cadmium (Cd)",
                "Carbon tetrachloride (CCl4)",
                "Cisplatin (CDDP)",
                "Colistin (Polymyxin E)",
                "Contrast agents (e.g., iodinated contrast)",
                "Cyclosporine A (CsA)",
                "Doxycycline (DOX)",
                "Ethylene glycol (EG)",
                "Foscarnet (PFA)",
                "Gentamicin (GM)",
                "Ibuprofen (IBU)",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Rifampicin (RIF)",
                "Sodium valproate (VPA)",
                "Tacrolimus (FK506)",
                "Vancomycin (VCM)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Aminoglycosides (AG)",
            "Amphotericin B (AmB)",
            "Cisplatin (CIS)",
            "Cyclosporine (CSA)",
            "Acetaminophen (APAP)",
            "Methotrexate (MTX)",
            "Vancomycin (VAN)",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Arsenic (As)",
            "Cadmium (Cd)",
            "Bismuth (Bi)",
            "Gentamicin (GM)",
            "Tobramycin (TOB)",
            "Streptomycin (STR)",
            "Bacitracin (BAC)",
            "Polymyxin B (PMB)",
            "Colistin (COL)",
            "Indomethacin (IND)",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Radiocontrast agents (RCA)",
            "Lithium (Li)",
            "Ethylene glycol (EG)",
            "Iodinated contrast media (ICM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aminoglycosides (AG)",
                "Amphotericin B (AmB)",
                "Cisplatin (CIS)",
                "Cyclosporine (CSA)",
                "Acetaminophen (APAP)",
                "Methotrexate (MTX)",
                "Vancomycin (VAN)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Arsenic (As)",
                "Cadmium (Cd)",
                "Bismuth (Bi)",
                "Gentamicin (GM)",
                "Tobramycin (TOB)",
                "Streptomycin (STR)",
                "Bacitracin (BAC)",
                "Polymyxin B (PMB)",
                "Colistin (COL)",
                "Indomethacin (IND)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Radiocontrast agents (RCA)",
                "Lithium (Li)",
                "Ethylene glycol (EG)",
                "Iodinated contrast media (ICM)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aminoglycosides (AG)",
                "Amphotericin B (AmB)",
                "Arsenic (As)",
                "Bacitracin (BAC)",
                "Bismuth (Bi)",
                "Cadmium (Cd)",
                "Cisplatin (CIS)",
                "Colistin (COL)",
                "Cyclosporine (CSA)",
                "Ethylene glycol (EG)",
                "Gentamicin (GM)",
                "Indomethacin (IND)",
                "Iodinated contrast media (ICM)",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Polymyxin B (PMB)",
                "Radiocontrast agents (RCA)",
                "Streptomycin (STR)",
                "Tobramycin (TOB)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Gentamicin (GT)",
            "Cisplatin (CP)",
            "Vancomycin (VN)",
            "Aminoglycosides",
            "Cyclosporine (CN)",
            "Acetaminophen (AP)",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Tenofovir (TFV)",
            "Amphotericin B (AMB)",
            "Methotrexate (MTX)",
            "Ibuprofen (IBU)",
            "Tacrolimus (TAC)",
            "Contrast agents",
            "Cyclophosphamide (CPA)",
            "Sulfonamides",
            "Colistin (COL)",
            "Quinolone antibiotics",
            "Ciprofloxacin (CIP)",
            "Indomethacin (IND)",
            "Foscarnet (FOS)",
            "Ifosfamide (IFO)",
            "Cephalosporins",
            "Mycophenolate mofetil (MMF)",
            "Bisphosphonates"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gentamicin (GT)",
                "Cisplatin (CP)",
                "Vancomycin (VN)",
                "Aminoglycosides",
                "Cyclosporine (CN)",
                "Acetaminophen (AP)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Tenofovir (TFV)",
                "Amphotericin B (AMB)",
                "Methotrexate (MTX)",
                "Ibuprofen (IBU)",
                "Tacrolimus (TAC)",
                "Contrast agents",
                "Cyclophosphamide (CPA)",
                "Sulfonamides",
                "Colistin (COL)",
                "Quinolone antibiotics",
                "Ciprofloxacin (CIP)",
                "Indomethacin (IND)",
                "Foscarnet (FOS)",
                "Ifosfamide (IFO)",
                "Cephalosporins",
                "Mycophenolate mofetil (MMF)",
                "Bisphosphonates"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (AP)",
                "Aminoglycosides",
                "Amphotericin B (AMB)",
                "Bisphosphonates",
                "Cephalosporins",
                "Ciprofloxacin (CIP)",
                "Cisplatin (CP)",
                "Colistin (COL)",
                "Contrast agents",
                "Cyclophosphamide (CPA)",
                "Cyclosporine (CN)",
                "Foscarnet (FOS)",
                "Gentamicin (GT)",
                "Ibuprofen (IBU)",
                "Ifosfamide (IFO)",
                "Indomethacin (IND)",
                "Methotrexate (MTX)",
                "Mycophenolate mofetil (MMF)",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Quinolone antibiotics",
                "Sulfonamides",
                "Tacrolimus (TAC)",
                "Tenofovir (TFV)",
                "Vancomycin (VN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Gentamicin",
            "Amikacin",
            "Vancomycin",
            "Amphotericin B",
            "Ibuprofen (NSAID)",
            "Indomethacin (NSAID)",
            "Cyclosporine",
            "Tacrolimus",
            "Lithium carbonate",
            "Iohexol (Radiocontrast agent)",
            "Ethylene glycol",
            "Methotrexate",
            "Colchicine",
            "Tenofovir",
            "Indinavir",
            "Ifosfamide",
            "Ketoconazole",
            "Cephalexin (Cephalosporin)",
            "Acyclovir",
            "Rifampin",
            "Isoniazid",
            "Paracetamol (Acetaminophen)",
            "Methanol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Gentamicin",
                "Amikacin",
                "Vancomycin",
                "Amphotericin B",
                "Ibuprofen (NSAID)",
                "Indomethacin (NSAID)",
                "Cyclosporine",
                "Tacrolimus",
                "Lithium carbonate",
                "Iohexol (Radiocontrast agent)",
                "Ethylene glycol",
                "Methotrexate",
                "Colchicine",
                "Tenofovir",
                "Indinavir",
                "Ifosfamide",
                "Ketoconazole",
                "Cephalexin (Cephalosporin)",
                "Acyclovir",
                "Rifampin",
                "Isoniazid",
                "Paracetamol (Acetaminophen)",
                "Methanol"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir",
                "Amikacin",
                "Amphotericin B",
                "Cephalexin (Cephalosporin)",
                "Cisplatin (CDDP)",
                "Colchicine",
                "Cyclosporine",
                "Ethylene glycol",
                "Gentamicin",
                "Ibuprofen (NSAID)",
                "Ifosfamide",
                "Indinavir",
                "Indomethacin (NSAID)",
                "Iohexol (Radiocontrast agent)",
                "Isoniazid",
                "Ketoconazole",
                "Lithium carbonate",
                "Methanol",
                "Methotrexate",
                "Paracetamol (Acetaminophen)",
                "Rifampin",
                "Tacrolimus",
                "Tenofovir",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin",
            "Gentamicin",
            "Amphotericin B",
            "Cyclosporine",
            "Ifosfamide",
            "Acyclovir",
            "Vancomycin",
            "Tacrolimus (FK506)",
            "Contrast media (iodinated)",
            "Lithium",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Aminoglycosides",
            "Cadmium",
            "Mercury",
            "Lead",
            "Aristolochic acid",
            "Tenofovir",
            "Cidofovir",
            "Polymyxin B",
            "Foscarnet",
            "Ethylene glycol",
            "Melamine",
            "Ochratoxin A",
            "Paraquat"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin",
                "Gentamicin",
                "Amphotericin B",
                "Cyclosporine",
                "Ifosfamide",
                "Acyclovir",
                "Vancomycin",
                "Tacrolimus (FK506)",
                "Contrast media (iodinated)",
                "Lithium",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Aminoglycosides",
                "Cadmium",
                "Mercury",
                "Lead",
                "Aristolochic acid",
                "Tenofovir",
                "Cidofovir",
                "Polymyxin B",
                "Foscarnet",
                "Ethylene glycol",
                "Melamine",
                "Ochratoxin A",
                "Paraquat"
            ],
            "mismatches": [],
            "true_referents": [
                "Acyclovir",
                "Aminoglycosides",
                "Amphotericin B",
                "Aristolochic acid",
                "Cadmium",
                "Cidofovir",
                "Cisplatin",
                "Contrast media (iodinated)",
                "Cyclosporine",
                "Ethylene glycol",
                "Foscarnet",
                "Gentamicin",
                "Ifosfamide",
                "Lead",
                "Lithium",
                "Melamine",
                "Mercury",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Ochratoxin A",
                "Paraquat",
                "Polymyxin B",
                "Tacrolimus (FK506)",
                "Tenofovir",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Amphotericin B",
            "Cadmium chloride (CdCl2)",
            "Cisplatin",
            "Colistin",
            "Cyclosporine A",
            "Gentamicin",
            "Ibuprofen",
            "Indomethacin",
            "Lead acetate",
            "Lithium chloride (LiCl)",
            "Mercury chloride (HgCl2)",
            "Methotrexate",
            "Neomycin",
            "Paraquat",
            "Polymyxin B",
            "Potassium dichromate (K2Cr2O7)",
            "Rifampin",
            "Tacrolimus (FK506)",
            "Tenofovir disoproxil fumarate",
            "Tobramycin",
            "Vancomycin",
            "Foscarnet",
            "Zoledronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Amphotericin B",
                "Cadmium chloride (CdCl2)",
                "Cisplatin",
                "Colistin",
                "Cyclosporine A",
                "Gentamicin",
                "Ibuprofen",
                "Indomethacin",
                "Lead acetate",
                "Lithium chloride (LiCl)",
                "Mercury chloride (HgCl2)",
                "Methotrexate",
                "Neomycin",
                "Paraquat",
                "Polymyxin B",
                "Potassium dichromate (K2Cr2O7)",
                "Rifampin",
                "Tacrolimus (FK506)",
                "Tenofovir disoproxil fumarate",
                "Tobramycin",
                "Vancomycin",
                "Foscarnet",
                "Zoledronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Amphotericin B",
                "Cadmium chloride (CdCl2)",
                "Cisplatin",
                "Colistin",
                "Cyclosporine A",
                "Foscarnet",
                "Gentamicin",
                "Ibuprofen",
                "Indomethacin",
                "Lead acetate",
                "Lithium chloride (LiCl)",
                "Mercury chloride (HgCl2)",
                "Methotrexate",
                "Neomycin",
                "Paraquat",
                "Polymyxin B",
                "Potassium dichromate (K2Cr2O7)",
                "Rifampin",
                "Tacrolimus (FK506)",
                "Tenofovir disoproxil fumarate",
                "Tobramycin",
                "Vancomycin",
                "Zoledronic acid"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Cyclosporine A (CsA)",
            "Gentamicin",
            "Amphotericin B",
            "Aristolochic acid",
            "Lead compounds",
            "Cadmium compounds",
            "Mercury compounds",
            "Ethylene glycol",
            "Contrast media agents",
            "Calcineurin inhibitors",
            "Aminoglycoside antibiotics",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Angiotensin-converting enzyme inhibitors (ACEIs)",
            "Angiotensin II receptor blockers (ARBs)",
            "Lithium",
            "Tenofovir",
            "Adefovir",
            "Cidofovir",
            "Foscarnet",
            "Valacyclovir",
            "Indinavir",
            "Zoledronic acid",
            "Ifosfamide"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Cyclosporine A (CsA)",
                "Gentamicin",
                "Amphotericin B",
                "Aristolochic acid",
                "Lead compounds",
                "Cadmium compounds",
                "Mercury compounds",
                "Ethylene glycol",
                "Contrast media agents",
                "Calcineurin inhibitors",
                "Aminoglycoside antibiotics",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Angiotensin-converting enzyme inhibitors (ACEIs)",
                "Angiotensin II receptor blockers (ARBs)",
                "Lithium",
                "Tenofovir",
                "Adefovir",
                "Cidofovir",
                "Foscarnet",
                "Valacyclovir",
                "Zoledronic acid",
                "Ifosfamide"
            ],
            "mismatches": [
                "Indinavir"
            ],
            "true_referents": [
                "Adefovir",
                "Aminoglycoside antibiotics",
                "Amphotericin B",
                "Angiotensin II receptor blockers (ARBs)",
                "Angiotensin-converting enzyme inhibitors (ACEIs)",
                "Aristolochic acid",
                "Cadmium compounds",
                "Calcineurin inhibitors",
                "Cidofovir",
                "Cisplatin (CDDP)",
                "Contrast media agents",
                "Cyclosporine A (CsA)",
                "Ethylene glycol",
                "Foscarnet",
                "Gentamicin",
                "Ifosfamide",
                "Indinavir",
                "Lead compounds",
                "Lithium",
                "Mercury compounds",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Tenofovir",
                "Valacyclovir",
                "Zoledronic acid"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Gentamicin (GENT)",
            "Cisplatin (CDDP)",
            "Amphotericin B (AMPH-B)",
            "Vancomycin (VAN)",
            "Tenofovir (TDF)",
            "Cyclosporine (CYA)",
            "Ibuprofen (IBU)",
            "Tacrolimus (TAC)",
            "Cadmium (Cd)",
            "Mercury (Hg)",
            "Lead (Pb)",
            "Aristolochic acid (AA)",
            "Ethylene glycol (EG)",
            "Melamine (MEL)",
            "Oxalate (OXA)",
            "Radiocontrast agents (RCA)",
            "Aminoglycosides (AG)",
            "Sulfonamides (SUL)",
            "Methotrexate (MTX)",
            "Furan (FUR)",
            "Glycerol (GLY)",
            "Doxorubicin (DOX)",
            "Paraquat (PQ)",
            "Potassium dichromate (K2Cr2O7)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gentamicin (GENT)",
                "Cisplatin (CDDP)",
                "Amphotericin B (AMPH-B)",
                "Vancomycin (VAN)",
                "Tenofovir (TDF)",
                "Cyclosporine (CYA)",
                "Ibuprofen (IBU)",
                "Tacrolimus (TAC)",
                "Cadmium (Cd)",
                "Mercury (Hg)",
                "Lead (Pb)",
                "Aristolochic acid (AA)",
                "Ethylene glycol (EG)",
                "Melamine (MEL)",
                "Oxalate (OXA)",
                "Radiocontrast agents (RCA)",
                "Aminoglycosides (AG)",
                "Sulfonamides (SUL)",
                "Methotrexate (MTX)",
                "Furan (FUR)",
                "Glycerol (GLY)",
                "Doxorubicin (DOX)",
                "Paraquat (PQ)",
                "Potassium dichromate (K2Cr2O7)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aminoglycosides (AG)",
                "Amphotericin B (AMPH-B)",
                "Aristolochic acid (AA)",
                "Cadmium (Cd)",
                "Cisplatin (CDDP)",
                "Cyclosporine (CYA)",
                "Doxorubicin (DOX)",
                "Ethylene glycol (EG)",
                "Furan (FUR)",
                "Gentamicin (GENT)",
                "Glycerol (GLY)",
                "Ibuprofen (IBU)",
                "Lead (Pb)",
                "Melamine (MEL)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Oxalate (OXA)",
                "Paraquat (PQ)",
                "Potassium dichromate (K2Cr2O7)",
                "Radiocontrast agents (RCA)",
                "Sulfonamides (SUL)",
                "Tacrolimus (TAC)",
                "Tenofovir (TDF)",
                "Vancomycin (VAN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Cyclosporine",
            "Aminoglycosides",
            "Vancomycin",
            "Amphotericin B",
            "Tobramycin",
            "Neomycin",
            "Gentamicin",
            "Streptomycin",
            "Sulfonamides",
            "Percyliquinone",
            "Ethylene glycol",
            "Acetaminophen",
            "Aspirin",
            "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
            "Lithium",
            "Cyclophosphamide",
            "Ifosfamide",
            "Busulfan",
            "Cisplatin analogues",
            "Oxaliplatin",
            "Carboplatin",
            "Etoposide",
            "Methotrexate",
            "Fosfomycin",
            "Sulfonamide antibiotics",
            "Phenacetin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Cyclosporine",
                "Aminoglycosides",
                "Vancomycin",
                "Amphotericin B",
                "Tobramycin",
                "Neomycin",
                "Gentamicin",
                "Streptomycin",
                "Sulfonamides",
                "Percyliquinone",
                "Ethylene glycol",
                "Acetaminophen",
                "Aspirin",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Lithium",
                "Cyclophosphamide",
                "Ifosfamide",
                "Busulfan",
                "Cisplatin analogues",
                "Oxaliplatin",
                "Carboplatin",
                "Etoposide",
                "Methotrexate"
            ],
            "mismatches": [
                "Fosfomycin",
                "Sulfonamide antibiotics",
                "Phenacetin"
            ],
            "true_referents": [
                "Acetaminophen",
                "Aminoglycosides",
                "Amphotericin B",
                "Aspirin",
                "Busulfan",
                "Carboplatin",
                "Cisplatin (CDDP)",
                "Cisplatin analogues",
                "Cyclophosphamide",
                "Cyclosporine",
                "Ethylene glycol",
                "Etoposide",
                "Gentamicin",
                "Ifosfamide",
                "Lithium",
                "Methotrexate",
                "Neomycin",
                "Nonsteroidal anti-inflammatory drugs (NSAIDs)",
                "Oxaliplatin",
                "Percyliquinone",
                "Streptomycin",
                "Sulfonamides",
                "Tobramycin",
                "Vancomycin"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": [
            "Cisplatin (CDDP)",
            "Aminoglycosides",
            "Gentamicin",
            "Streptomycin",
            "Amphotericin B",
            "Cyclosporine",
            "Tacrolimus",
            "Sirolimus",
            "Rapamycin",
            "Oxaliplatin",
            "Carboplatin",
            "Cadmium",
            "Mercury (Hg)",
            "Lead (Pb)",
            "Cadmium chloride",
            "Chloroacetaldehyde",
            "Acrolein",
            "Pentachlorophenol",
            "Hexachlorophene",
            "Paraquat",
            "Diquat",
            "Cephaloridine",
            "Mitomycin C"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cisplatin (CDDP)",
                "Aminoglycosides",
                "Gentamicin",
                "Streptomycin",
                "Amphotericin B",
                "Cyclosporine",
                "Tacrolimus",
                "Sirolimus",
                "Rapamycin",
                "Oxaliplatin",
                "Carboplatin",
                "Cadmium",
                "Mercury (Hg)",
                "Lead (Pb)",
                "Cadmium chloride",
                "Chloroacetaldehyde",
                "Acrolein",
                "Pentachlorophenol",
                "Hexachlorophene",
                "Paraquat",
                "Diquat",
                "Cephaloridine",
                "Mitomycin C"
            ],
            "mismatches": [],
            "true_referents": [
                "Acrolein",
                "Aminoglycosides",
                "Amphotericin B",
                "Cadmium",
                "Cadmium chloride",
                "Carboplatin",
                "Cephaloridine",
                "Chloroacetaldehyde",
                "Cisplatin (CDDP)",
                "Cyclosporine",
                "Diquat",
                "Gentamicin",
                "Hexachlorophene",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Mitomycin C",
                "Oxaliplatin",
                "Paraquat",
                "Pentachlorophenol",
                "Rapamycin",
                "Sirolimus",
                "Streptomycin",
                "Tacrolimus"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "nephrotoxic compound",
        "domain": "medicine",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]