[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (EtOH)",
            "Thalidomide",
            "Isotretinoin (Accutane)",
            "Warfarin",
            "Lithium",
            "Mercury",
            "Methotrexate",
            "Valproic acid",
            "Phenytoin",
            "Amitriptyline",
            "Hydroxyurea",
            "Aspirin",
            "Sulfonamides",
            "Nitrogen mustard",
            "Diethylstilbestrol (DES)",
            "Arsenic",
            "Benzene",
            "Cocaine",
            "Iodine",
            "Lead",
            "Ethylene glycol",
            "Carbon monoxide",
            "Rubella virus",
            "Cigarette smoke"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (EtOH)",
                "Thalidomide",
                "Isotretinoin (Accutane)",
                "Warfarin",
                "Lithium",
                "Mercury",
                "Methotrexate",
                "Valproic acid",
                "Phenytoin",
                "Amitriptyline",
                "Hydroxyurea",
                "Aspirin",
                "Sulfonamides",
                "Nitrogen mustard",
                "Diethylstilbestrol (DES)",
                "Arsenic",
                "Benzene",
                "Cocaine",
                "Iodine",
                "Lead",
                "Ethylene glycol",
                "Carbon monoxide",
                "Rubella virus",
                "Cigarette smoke"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (EtOH)",
                "Amitriptyline",
                "Arsenic",
                "Aspirin",
                "Benzene",
                "Carbon monoxide",
                "Cigarette smoke",
                "Cocaine",
                "Diethylstilbestrol (DES)",
                "Ethylene glycol",
                "Hydroxyurea",
                "Iodine",
                "Isotretinoin (Accutane)",
                "Lead",
                "Lithium",
                "Mercury",
                "Methotrexate",
                "Nitrogen mustard",
                "Phenytoin",
                "Rubella virus",
                "Sulfonamides",
                "Thalidomide",
                "Valproic acid",
                "Warfarin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (Ethanol)",
            "Tobacco Smoke",
            "Thalidomide",
            "Isotretinoin (Accutane)",
            "Valproic Acid",
            "Cocaine",
            "Heroin",
            "Lead",
            "Mercury",
            "Radiation (Ionizing)",
            "Arsenic",
            "Cadmium",
            "Polychlorinated Biphenyls (PCBs)",
            "Rubella Virus",
            "Cytomegalovirus",
            "Zika Virus",
            "Toxoplasmosis",
            "Syphilis",
            "Herpes Simplex Virus",
            "Methotrexate",
            "Warfarin",
            "Phenytoin",
            "Methimazole",
            "Retinoic Acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (Ethanol)",
                "Tobacco Smoke",
                "Thalidomide",
                "Isotretinoin (Accutane)",
                "Valproic Acid",
                "Cocaine",
                "Heroin",
                "Lead",
                "Mercury",
                "Radiation (Ionizing)",
                "Arsenic",
                "Cadmium",
                "Polychlorinated Biphenyls (PCBs)",
                "Rubella Virus",
                "Cytomegalovirus",
                "Zika Virus",
                "Toxoplasmosis",
                "Syphilis",
                "Herpes Simplex Virus",
                "Methotrexate",
                "Warfarin",
                "Phenytoin",
                "Methimazole",
                "Retinoic Acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (Ethanol)",
                "Arsenic",
                "Cadmium",
                "Cocaine",
                "Cytomegalovirus",
                "Heroin",
                "Herpes Simplex Virus",
                "Isotretinoin (Accutane)",
                "Lead",
                "Mercury",
                "Methimazole",
                "Methotrexate",
                "Phenytoin",
                "Polychlorinated Biphenyls (PCBs)",
                "Radiation (Ionizing)",
                "Retinoic Acid",
                "Rubella Virus",
                "Syphilis",
                "Thalidomide",
                "Tobacco Smoke",
                "Toxoplasmosis",
                "Valproic Acid",
                "Warfarin",
                "Zika Virus"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THAL)",
            "Valproic Acid (VPA)",
            "Methotrexate (MTX)",
            "Folic Acid Antagonists",
            "Retinoic Acid",
            "Alcohol (Ethanol)",
            "Cocaine",
            "Heroin",
            "Diethylstilbestrol (DES)",
            "Isotretinoin",
            "Methylmercury",
            "Lead",
            "Polychlorinated Biphenyls (PCBs)",
            "Organophosphates",
            "Cigarette Smoke",
            "Ibuprofen",
            "Warfarin",
            "Cyclophosphamide",
            "Phenytoin",
            "Aminopterin",
            "Tetracycline",
            "Lithium",
            "Amiodarone",
            "Ritonavir"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THAL)",
                "Valproic Acid (VPA)",
                "Methotrexate (MTX)",
                "Folic Acid Antagonists",
                "Retinoic Acid",
                "Alcohol (Ethanol)",
                "Cocaine",
                "Heroin",
                "Diethylstilbestrol (DES)",
                "Isotretinoin",
                "Methylmercury",
                "Lead",
                "Polychlorinated Biphenyls (PCBs)",
                "Organophosphates",
                "Cigarette Smoke",
                "Ibuprofen",
                "Warfarin",
                "Cyclophosphamide",
                "Phenytoin",
                "Aminopterin",
                "Tetracycline",
                "Lithium",
                "Amiodarone",
                "Ritonavir"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (Ethanol)",
                "Aminopterin",
                "Amiodarone",
                "Cigarette Smoke",
                "Cocaine",
                "Cyclophosphamide",
                "Diethylstilbestrol (DES)",
                "Folic Acid Antagonists",
                "Heroin",
                "Ibuprofen",
                "Isotretinoin",
                "Lead",
                "Lithium",
                "Methotrexate (MTX)",
                "Methylmercury",
                "Organophosphates",
                "Phenytoin",
                "Polychlorinated Biphenyls (PCBs)",
                "Retinoic Acid",
                "Ritonavir",
                "Tetracycline",
                "Thalidomide (THAL)",
                "Valproic Acid (VPA)",
                "Warfarin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THAL)",
            "Ethanol (EtOH)",
            "Retinoic Acid (RA)",
            "Isotretinoin (ISO)",
            "Valproic Acid (VPA)",
            "Warfarin (WARF)",
            "Diethylstilbestrol (DES)",
            "Methylmercury (MeHg)",
            "Lead (Pb)",
            "Radiation (RAD)",
            "Rubella Virus (RUBV)",
            "Cytomegalovirus (CMV)",
            "Toxoplasma gondii (Toxo)",
            "Zika Virus (ZIKV)",
            "Listeria monocytogenes (Lm)",
            "ACE Inhibitors (ACEI)",
            "Tetracycline (TET)",
            "Methotrexate (MTX)",
            "Phtalates (PHT)",
            "Bisphenol A (BPA)",
            "Polychlorinated Biphenyls (PCBs)",
            "Arsenic (As)",
            "Cadmium (Cd)",
            "Carbon Monoxide (CO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THAL)",
                "Ethanol (EtOH)",
                "Retinoic Acid (RA)",
                "Isotretinoin (ISO)",
                "Valproic Acid (VPA)",
                "Warfarin (WARF)",
                "Diethylstilbestrol (DES)",
                "Methylmercury (MeHg)",
                "Lead (Pb)",
                "Radiation (RAD)",
                "Rubella Virus (RUBV)",
                "Cytomegalovirus (CMV)",
                "Toxoplasma gondii (Toxo)",
                "Zika Virus (ZIKV)",
                "Listeria monocytogenes (Lm)",
                "ACE Inhibitors (ACEI)",
                "Tetracycline (TET)",
                "Methotrexate (MTX)",
                "Phtalates (PHT)",
                "Bisphenol A (BPA)",
                "Polychlorinated Biphenyls (PCBs)",
                "Arsenic (As)",
                "Cadmium (Cd)",
                "Carbon Monoxide (CO)"
            ],
            "mismatches": [],
            "true_referents": [
                "ACE Inhibitors (ACEI)",
                "Arsenic (As)",
                "Bisphenol A (BPA)",
                "Cadmium (Cd)",
                "Carbon Monoxide (CO)",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Ethanol (EtOH)",
                "Isotretinoin (ISO)",
                "Lead (Pb)",
                "Listeria monocytogenes (Lm)",
                "Methotrexate (MTX)",
                "Methylmercury (MeHg)",
                "Phtalates (PHT)",
                "Polychlorinated Biphenyls (PCBs)",
                "Radiation (RAD)",
                "Retinoic Acid (RA)",
                "Rubella Virus (RUBV)",
                "Tetracycline (TET)",
                "Thalidomide (THAL)",
                "Toxoplasma gondii (Toxo)",
                "Valproic Acid (VPA)",
                "Warfarin (WARF)",
                "Zika Virus (ZIKV)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THD)",
            "Alcohol (EtOH)",
            "Isotretinoin (13-cis-RA)",
            "Valproic Acid (VPA)",
            "Warfarin",
            "Methotrexate (MTX)",
            "Phenytoin (PHT)",
            "Lithium",
            "Diethylstilbestrol (DES)",
            "Carbamazepine (CBZ)",
            "Misoprostol",
            "Tetracycline",
            "Cyclophosphamide (CTX)",
            "Mycophenolate Mofetil (MMF)",
            "Cytomegalovirus (CMV)",
            "Rubella Virus",
            "Toxoplasma gondii",
            "Herpes Simplex Virus (HSV)",
            "Varicella Zoster Virus (VZV)",
            "Lead",
            "Mercury",
            "Polychlorinated Biphenyls (PCBs)",
            "Androgens",
            "Smoking (Nicotine)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THD)",
                "Alcohol (EtOH)",
                "Isotretinoin (13-cis-RA)",
                "Valproic Acid (VPA)",
                "Warfarin",
                "Methotrexate (MTX)",
                "Phenytoin (PHT)",
                "Lithium",
                "Diethylstilbestrol (DES)",
                "Carbamazepine (CBZ)",
                "Misoprostol",
                "Tetracycline",
                "Cyclophosphamide (CTX)",
                "Mycophenolate Mofetil (MMF)",
                "Cytomegalovirus (CMV)",
                "Rubella Virus",
                "Toxoplasma gondii",
                "Herpes Simplex Virus (HSV)",
                "Varicella Zoster Virus (VZV)",
                "Lead",
                "Mercury",
                "Polychlorinated Biphenyls (PCBs)",
                "Androgens",
                "Smoking (Nicotine)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (EtOH)",
                "Androgens",
                "Carbamazepine (CBZ)",
                "Cyclophosphamide (CTX)",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Herpes Simplex Virus (HSV)",
                "Isotretinoin (13-cis-RA)",
                "Lead",
                "Lithium",
                "Mercury",
                "Methotrexate (MTX)",
                "Misoprostol",
                "Mycophenolate Mofetil (MMF)",
                "Phenytoin (PHT)",
                "Polychlorinated Biphenyls (PCBs)",
                "Rubella Virus",
                "Smoking (Nicotine)",
                "Tetracycline",
                "Thalidomide (THD)",
                "Toxoplasma gondii",
                "Valproic Acid (VPA)",
                "Varicella Zoster Virus (VZV)",
                "Warfarin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THAL)",
            "Alcohol (EtOH)",
            "Valproic Acid (VPA)",
            "Isotretinoin (Accutane)",
            "Methotrexate (MTX)",
            "Cytomegalovirus (CMV)",
            "Rubella Virus (Rubella)",
            "Tetracycline (TET)",
            "Diethylstilbestrol (DES)",
            "Lithium (Li)",
            "Phenytoin (Dilantin)",
            "Warfarin (Coumadin)",
            "Cocaine (Coke)",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Aminopterin (AMT)",
            "Trimethoprim (TMP)",
            "Retinoic Acid (RA)",
            "Benzodiazepines (BDZ)",
            "Caffeine (Caf)",
            "Folic Acid Antagonists (FA)",
            "Zika Virus (ZIKV)",
            "Sodium Valproate (VPA)",
            "Chlamydia trachomatis (CT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THAL)",
                "Alcohol (EtOH)",
                "Valproic Acid (VPA)",
                "Isotretinoin (Accutane)",
                "Methotrexate (MTX)",
                "Cytomegalovirus (CMV)",
                "Rubella Virus (Rubella)",
                "Tetracycline (TET)",
                "Diethylstilbestrol (DES)",
                "Lithium (Li)",
                "Phenytoin (Dilantin)",
                "Warfarin (Coumadin)",
                "Cocaine (Coke)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Aminopterin (AMT)",
                "Trimethoprim (TMP)",
                "Retinoic Acid (RA)",
                "Benzodiazepines (BDZ)",
                "Caffeine (Caf)",
                "Folic Acid Antagonists (FA)",
                "Zika Virus (ZIKV)",
                "Sodium Valproate (VPA)",
                "Chlamydia trachomatis (CT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (EtOH)",
                "Aminopterin (AMT)",
                "Benzodiazepines (BDZ)",
                "Caffeine (Caf)",
                "Chlamydia trachomatis (CT)",
                "Cocaine (Coke)",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Folic Acid Antagonists (FA)",
                "Isotretinoin (Accutane)",
                "Lead (Pb)",
                "Lithium (Li)",
                "Mercury (Hg)",
                "Methotrexate (MTX)",
                "Phenytoin (Dilantin)",
                "Retinoic Acid (RA)",
                "Rubella Virus (Rubella)",
                "Sodium Valproate (VPA)",
                "Tetracycline (TET)",
                "Thalidomide (THAL)",
                "Trimethoprim (TMP)",
                "Valproic Acid (VPA)",
                "Warfarin (Coumadin)",
                "Zika Virus (ZIKV)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (EtOH)",
            "Tobacco (Nicotine)",
            "Thalidomide",
            "Methotrexate (MTX)",
            "Isotretinoin (Accutane)",
            "Warfarin (Coumadin)",
            "Valproic Acid (Depakote)",
            "Diethylstilbestrol (DES)",
            "Tetracycline",
            "Phenytoin (Dilantin)",
            "Lithium",
            "Mercury",
            "Lead",
            "Cocaine",
            "Zidovudine (AZT)",
            "Cytomegalovirus (CMV)",
            "Rubella Virus",
            "Herpes Simplex Virus (HSV)",
            "Toxoplasma gondii",
            "X-Rays",
            "Polychlorinated Biphenyls (PCBs)",
            "Dioxin",
            "Radiation",
            "Ethylene Glycol (Antifreeze)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (EtOH)",
                "Tobacco (Nicotine)",
                "Thalidomide",
                "Methotrexate (MTX)",
                "Isotretinoin (Accutane)",
                "Warfarin (Coumadin)",
                "Valproic Acid (Depakote)",
                "Diethylstilbestrol (DES)",
                "Tetracycline",
                "Phenytoin (Dilantin)",
                "Lithium",
                "Mercury",
                "Lead",
                "Cocaine",
                "Zidovudine (AZT)",
                "Cytomegalovirus (CMV)",
                "Rubella Virus",
                "Herpes Simplex Virus (HSV)",
                "Toxoplasma gondii",
                "X-Rays",
                "Polychlorinated Biphenyls (PCBs)",
                "Dioxin",
                "Radiation",
                "Ethylene Glycol (Antifreeze)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (EtOH)",
                "Cocaine",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Dioxin",
                "Ethylene Glycol (Antifreeze)",
                "Herpes Simplex Virus (HSV)",
                "Isotretinoin (Accutane)",
                "Lead",
                "Lithium",
                "Mercury",
                "Methotrexate (MTX)",
                "Phenytoin (Dilantin)",
                "Polychlorinated Biphenyls (PCBs)",
                "Radiation",
                "Rubella Virus",
                "Tetracycline",
                "Thalidomide",
                "Tobacco (Nicotine)",
                "Toxoplasma gondii",
                "Valproic Acid (Depakote)",
                "Warfarin (Coumadin)",
                "X-Rays",
                "Zidovudine (AZT)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "teratogen",
        "domain": "medicine",
        "response": "",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (Ethanol)",
            "Thalidomide",
            "Isotretinoin (13-cis-retinoic acid)",
            "Warfarin",
            "Rubella virus",
            "Methotrexate",
            "Zika virus",
            "Diethylstilbestrol (DES)",
            "Valproic acid (VPA)",
            "Lead",
            "Smoking (Nicotine)",
            "Aspirin",
            "Cocaine",
            "Tobacco smoke",
            "PCP (Phencyclidine)",
            "Arsenic",
            "Mercury",
            "Carbon monoxide",
            "Saxitoxin",
            "Bisphenol A (BPA)",
            "Lead acetate",
            "Fetal alcohol spectrum disorders (FASD)",
            "Dioxins",
            "Methylmercury"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (Ethanol)",
                "Thalidomide",
                "Isotretinoin (13-cis-retinoic acid)",
                "Warfarin",
                "Rubella virus",
                "Methotrexate",
                "Zika virus",
                "Diethylstilbestrol (DES)",
                "Valproic acid (VPA)",
                "Lead",
                "Smoking (Nicotine)",
                "Aspirin",
                "Cocaine",
                "Tobacco smoke",
                "PCP (Phencyclidine)",
                "Arsenic",
                "Mercury",
                "Carbon monoxide",
                "Saxitoxin",
                "Bisphenol A (BPA)",
                "Lead acetate",
                "Fetal alcohol spectrum disorders (FASD)",
                "Dioxins",
                "Methylmercury"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (Ethanol)",
                "Arsenic",
                "Aspirin",
                "Bisphenol A (BPA)",
                "Carbon monoxide",
                "Cocaine",
                "Diethylstilbestrol (DES)",
                "Dioxins",
                "Fetal alcohol spectrum disorders (FASD)",
                "Isotretinoin (13-cis-retinoic acid)",
                "Lead",
                "Lead acetate",
                "Mercury",
                "Methotrexate",
                "Methylmercury",
                "PCP (Phencyclidine)",
                "Rubella virus",
                "Saxitoxin",
                "Smoking (Nicotine)",
                "Thalidomide",
                "Tobacco smoke",
                "Valproic acid (VPA)",
                "Warfarin",
                "Zika virus"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THAL)",
            "Isotretinoin (13-cis-retinoic acid)",
            "Alcohol (ethanol)",
            "Tobacco smoke",
            "Valproic acid (VPA)",
            "Warfarin",
            "Diethylstilbestrol (DES)",
            "Methylmercury",
            "Rubella virus",
            "Cytomegalovirus (CMV)",
            "Zika virus",
            "Toxoplasma gondii",
            "X-rays",
            "Lead",
            "Polychlorinated biphenyls (PCBs)",
            "Cocaine",
            "Heroin",
            "Lithium",
            "Tetracycline antibiotics",
            "Angiotensin-converting enzyme (ACE) inhibitors",
            "Phenytoin",
            "Carbamazepine",
            "Organic mercury compounds",
            "Ionizing radiation"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THAL)",
                "Isotretinoin (13-cis-retinoic acid)",
                "Alcohol (ethanol)",
                "Tobacco smoke",
                "Valproic acid (VPA)",
                "Warfarin",
                "Diethylstilbestrol (DES)",
                "Methylmercury",
                "Rubella virus",
                "Cytomegalovirus (CMV)",
                "Zika virus",
                "Toxoplasma gondii",
                "X-rays",
                "Lead",
                "Polychlorinated biphenyls (PCBs)",
                "Cocaine",
                "Heroin",
                "Lithium",
                "Tetracycline antibiotics",
                "Angiotensin-converting enzyme (ACE) inhibitors",
                "Phenytoin",
                "Carbamazepine",
                "Organic mercury compounds",
                "Ionizing radiation"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (ethanol)",
                "Angiotensin-converting enzyme (ACE) inhibitors",
                "Carbamazepine",
                "Cocaine",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Heroin",
                "Ionizing radiation",
                "Isotretinoin (13-cis-retinoic acid)",
                "Lead",
                "Lithium",
                "Methylmercury",
                "Organic mercury compounds",
                "Phenytoin",
                "Polychlorinated biphenyls (PCBs)",
                "Rubella virus",
                "Tetracycline antibiotics",
                "Thalidomide (THAL)",
                "Tobacco smoke",
                "Toxoplasma gondii",
                "Valproic acid (VPA)",
                "Warfarin",
                "X-rays",
                "Zika virus"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide",
            "Alcohol (Ethanol)",
            "Cigarette smoke",
            "Isotretinoin (Accutane)",
            "Valproic acid",
            "Methotrexate",
            "Diethylstilbestrol (DES)",
            "Lithium",
            "Warfarin",
            "Tetracycline",
            "Phenytoin",
            "Carbamazepine",
            "Cocaine",
            "Lead",
            "Mercury",
            "Radiation",
            "Rubella virus",
            "Cytomegalovirus (CMV)",
            "Toxoplasma gondii",
            "Zika virus",
            "Parvovirus B19",
            "Herpes simplex virus (HSV)",
            "Varicella-zoster virus (VZV)",
            "Polychlorinated biphenyls (PCBs)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide",
                "Alcohol (Ethanol)",
                "Cigarette smoke",
                "Isotretinoin (Accutane)",
                "Valproic acid",
                "Methotrexate",
                "Diethylstilbestrol (DES)",
                "Lithium",
                "Warfarin",
                "Tetracycline",
                "Phenytoin",
                "Carbamazepine",
                "Cocaine",
                "Lead",
                "Mercury",
                "Radiation",
                "Rubella virus",
                "Cytomegalovirus (CMV)",
                "Toxoplasma gondii",
                "Zika virus",
                "Parvovirus B19",
                "Herpes simplex virus (HSV)",
                "Varicella-zoster virus (VZV)",
                "Polychlorinated biphenyls (PCBs)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (Ethanol)",
                "Carbamazepine",
                "Cigarette smoke",
                "Cocaine",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Herpes simplex virus (HSV)",
                "Isotretinoin (Accutane)",
                "Lead",
                "Lithium",
                "Mercury",
                "Methotrexate",
                "Parvovirus B19",
                "Phenytoin",
                "Polychlorinated biphenyls (PCBs)",
                "Radiation",
                "Rubella virus",
                "Tetracycline",
                "Thalidomide",
                "Toxoplasma gondii",
                "Valproic acid",
                "Varicella-zoster virus (VZV)",
                "Warfarin",
                "Zika virus"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (ETOH)",
            "Thalidomide (THAL)",
            "Isotretinoin (13-cis-retinoic acid)",
            "Valproic acid (VPA)",
            "Phenytoin (PHT)",
            "Warfarin",
            "Lithium",
            "Angiotensin-converting enzyme (ACE) inhibitors",
            "Antiepileptic drugs (AEDs)",
            "Selective serotonin reuptake inhibitors (SSRIs)",
            "Cigarette smoking",
            "Ionizing radiation",
            "Hyperthermia",
            "Rubella virus",
            "Cytomegalovirus (CMV)",
            "Zika virus",
            "Lead",
            "Mercury",
            "Polychlorinated biphenyls (PCBs)",
            "Dioxins",
            "Pesticides",
            "Cocaine",
            "Methamphetamine",
            "Diethylstilbestrol (DES)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (ETOH)",
                "Thalidomide (THAL)",
                "Isotretinoin (13-cis-retinoic acid)",
                "Valproic acid (VPA)",
                "Phenytoin (PHT)",
                "Warfarin",
                "Lithium",
                "Angiotensin-converting enzyme (ACE) inhibitors",
                "Antiepileptic drugs (AEDs)",
                "Selective serotonin reuptake inhibitors (SSRIs)",
                "Cigarette smoking",
                "Ionizing radiation",
                "Hyperthermia",
                "Rubella virus",
                "Cytomegalovirus (CMV)",
                "Zika virus",
                "Lead",
                "Mercury",
                "Polychlorinated biphenyls (PCBs)",
                "Dioxins",
                "Pesticides",
                "Cocaine",
                "Methamphetamine",
                "Diethylstilbestrol (DES)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (ETOH)",
                "Angiotensin-converting enzyme (ACE) inhibitors",
                "Antiepileptic drugs (AEDs)",
                "Cigarette smoking",
                "Cocaine",
                "Cytomegalovirus (CMV)",
                "Diethylstilbestrol (DES)",
                "Dioxins",
                "Hyperthermia",
                "Ionizing radiation",
                "Isotretinoin (13-cis-retinoic acid)",
                "Lead",
                "Lithium",
                "Mercury",
                "Methamphetamine",
                "Pesticides",
                "Phenytoin (PHT)",
                "Polychlorinated biphenyls (PCBs)",
                "Rubella virus",
                "Selective serotonin reuptake inhibitors (SSRIs)",
                "Thalidomide (THAL)",
                "Valproic acid (VPA)",
                "Warfarin",
                "Zika virus"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Alcohol (EtOH)",
            "Thalidomide (Thal)",
            "Valproic Acid (VPA)",
            "Isotretinoin (13-cis-RA)",
            "Warfarin (Coumadin)",
            "Methylmercury (MeHg)",
            "Lead (Pb)",
            "Polychlorinated Biphenyls (PCBs)",
            "Dioxins",
            "Diethylstilbestrol (DES)",
            "Radiation",
            "Cigarette Smoke",
            "Cocaine",
            "Heroin",
            "Methamphetamine",
            "Antiepileptic Drugs",
            "Antidepressants",
            "Antipsychotics",
            "Chemotherapeutic Agents",
            "Immunosuppressants",
            "Pesticides",
            "Phthalates",
            "Bisphenol A (BPA)",
            "Perfluorinated Compounds (PFCs)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alcohol (EtOH)",
                "Thalidomide (Thal)",
                "Valproic Acid (VPA)",
                "Isotretinoin (13-cis-RA)",
                "Warfarin (Coumadin)",
                "Methylmercury (MeHg)",
                "Lead (Pb)",
                "Polychlorinated Biphenyls (PCBs)",
                "Dioxins",
                "Diethylstilbestrol (DES)",
                "Radiation",
                "Cigarette Smoke",
                "Cocaine",
                "Heroin",
                "Methamphetamine",
                "Antiepileptic Drugs",
                "Antidepressants",
                "Antipsychotics",
                "Chemotherapeutic Agents",
                "Immunosuppressants",
                "Pesticides",
                "Phthalates",
                "Bisphenol A (BPA)",
                "Perfluorinated Compounds (PFCs)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol (EtOH)",
                "Antidepressants",
                "Antiepileptic Drugs",
                "Antipsychotics",
                "Bisphenol A (BPA)",
                "Chemotherapeutic Agents",
                "Cigarette Smoke",
                "Cocaine",
                "Diethylstilbestrol (DES)",
                "Dioxins",
                "Heroin",
                "Immunosuppressants",
                "Isotretinoin (13-cis-RA)",
                "Lead (Pb)",
                "Methamphetamine",
                "Methylmercury (MeHg)",
                "Perfluorinated Compounds (PFCs)",
                "Pesticides",
                "Phthalates",
                "Polychlorinated Biphenyls (PCBs)",
                "Radiation",
                "Thalidomide (Thal)",
                "Valproic Acid (VPA)",
                "Warfarin (Coumadin)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (Thalidomide)",
            "Methotrexate (Methotrexate)",
            "Warfarin (Warfarin)",
            "Valproic acid (Valproate)",
            "Ethanol (Ethanol)",
            "Cocaine (Cocaine)",
            "Methamphetamine (Methamphetamine)",
            "Heroin (Heroin)",
            "Cigarette smoke (Cigarette smoke)",
            "Diethylstilbestrol (Diethylstilbestrol)",
            "Aminopterin (Aminopterin)",
            "Phenytoin (Phenytoin)",
            "Tetracycline (Tetracycline)",
            "Retinoic acid (Retinoic acid)",
            "Vitamin A (Vitamin A)",
            "Folic acid (Folic acid)",
            "Bisphenol A (Bisphenol A)",
            "Phthalates (Phthalates)",
            "Polychlorinated biphenyls (PCBs)",
            "Pesticides (Pesticides)",
            "Lead (Lead)",
            "Mercury (Mercury)",
            "Arsenic (Arsenic)",
            "Benzene (Benzene)",
            "Acrylamide (Acrylamide)",
            "Methyl mercury (Methyl mercury)",
            "Radon (Radon)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (Thalidomide)",
                "Methotrexate (Methotrexate)",
                "Warfarin (Warfarin)",
                "Valproic acid (Valproate)",
                "Ethanol (Ethanol)",
                "Cocaine (Cocaine)",
                "Methamphetamine (Methamphetamine)",
                "Heroin (Heroin)",
                "Cigarette smoke (Cigarette smoke)",
                "Diethylstilbestrol (Diethylstilbestrol)",
                "Aminopterin (Aminopterin)",
                "Phenytoin (Phenytoin)",
                "Tetracycline (Tetracycline)",
                "Retinoic acid (Retinoic acid)",
                "Vitamin A (Vitamin A)",
                "Folic acid (Folic acid)",
                "Bisphenol A (Bisphenol A)",
                "Phthalates (Phthalates)",
                "Polychlorinated biphenyls (PCBs)",
                "Pesticides (Pesticides)",
                "Lead (Lead)",
                "Mercury (Mercury)",
                "Arsenic (Arsenic)",
                "Benzene (Benzene)"
            ],
            "mismatches": [
                "Acrylamide (Acrylamide)",
                "Methyl mercury (Methyl mercury)",
                "Radon (Radon)"
            ],
            "true_referents": [
                "Aminopterin (Aminopterin)",
                "Arsenic (Arsenic)",
                "Benzene (Benzene)",
                "Bisphenol A (Bisphenol A)",
                "Cigarette smoke (Cigarette smoke)",
                "Cocaine (Cocaine)",
                "Diethylstilbestrol (Diethylstilbestrol)",
                "Ethanol (Ethanol)",
                "Folic acid (Folic acid)",
                "Heroin (Heroin)",
                "Lead (Lead)",
                "Mercury (Mercury)",
                "Methamphetamine (Methamphetamine)",
                "Methotrexate (Methotrexate)",
                "Pesticides (Pesticides)",
                "Phenytoin (Phenytoin)",
                "Phthalates (Phthalates)",
                "Polychlorinated biphenyls (PCBs)",
                "Retinoic acid (Retinoic acid)",
                "Tetracycline (Tetracycline)",
                "Thalidomide (Thalidomide)",
                "Valproic acid (Valproate)",
                "Vitamin A (Vitamin A)",
                "Warfarin (Warfarin)"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            "Thalidomide (THD)",
            "Isotretinoin (13-cis retinoic acid)",
            "Valproic acid (VPA)",
            "Warfarin (Coumadin)",
            "Phenytoin (PHT)",
            "Trimethadione (TMO)",
            "Retinoic acid",
            "Diethylstilbestrol (DES)",
            "Methotrexate (MTX)",
            "Cyclophosphamide (CP)",
            "Busulfan (MYLERAN)",
            "Chlorambucil (LEUKERAN)",
            "Ethanol (alcohol)",
            "Cigarette smoke",
            "Polychlorinated biphenyls (PCBs)",
            "Tetrachloroethylene (PCE)",
            "Vinyl chloride",
            "Lead",
            "Methylmercury",
            "Thimerosal",
            "Deoxycholic acid",
            "Trypan blue",
            "Rubella virus",
            "Varicella-zoster virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thalidomide (THD)",
                "Isotretinoin (13-cis retinoic acid)",
                "Valproic acid (VPA)",
                "Warfarin (Coumadin)",
                "Phenytoin (PHT)",
                "Trimethadione (TMO)",
                "Retinoic acid",
                "Diethylstilbestrol (DES)",
                "Methotrexate (MTX)",
                "Cyclophosphamide (CP)",
                "Busulfan (MYLERAN)",
                "Chlorambucil (LEUKERAN)",
                "Ethanol (alcohol)",
                "Cigarette smoke",
                "Polychlorinated biphenyls (PCBs)",
                "Tetrachloroethylene (PCE)",
                "Vinyl chloride",
                "Lead",
                "Methylmercury",
                "Thimerosal",
                "Deoxycholic acid",
                "Trypan blue",
                "Rubella virus",
                "Varicella-zoster virus"
            ],
            "mismatches": [],
            "true_referents": [
                "Busulfan (MYLERAN)",
                "Chlorambucil (LEUKERAN)",
                "Cigarette smoke",
                "Cyclophosphamide (CP)",
                "Deoxycholic acid",
                "Diethylstilbestrol (DES)",
                "Ethanol (alcohol)",
                "Isotretinoin (13-cis retinoic acid)",
                "Lead",
                "Methotrexate (MTX)",
                "Methylmercury",
                "Phenytoin (PHT)",
                "Polychlorinated biphenyls (PCBs)",
                "Retinoic acid",
                "Rubella virus",
                "Tetrachloroethylene (PCE)",
                "Thalidomide (THD)",
                "Thimerosal",
                "Trimethadione (TMO)",
                "Trypan blue",
                "Valproic acid (VPA)",
                "Varicella-zoster virus",
                "Vinyl chloride",
                "Warfarin (Coumadin)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "teratogen",
        "domain": "medicine",
        "response": [
            {
                "Referent": "alcohol",
                "Canonical name": "ethanol"
            },
            {
                "Referent": "tobacco",
                "Canonical name": "nicotine"
            },
            {
                "Referent": "marijuana",
                "Canonical name": "tetrahydrocannabinol"
            },
            {
                "Referent": "opioids",
                "Canonical name": "morphine"
            },
            {
                "Referent": "caffeine",
                "Canonical name": "caffeine"
            },
            {
                "Referent": "anticonvulsants",
                "Canonical name": "valproic acid"
            },
            {
                "Referent": "antipsychotics",
                "Canonical name": "risperidone"
            },
            {
                "Referent": "antidepressants",
                "Canonical name": "fluoxetine"
            },
            {
                "Referent": "antihistamines",
                "Canonical name": "diphenhydramine"
            },
            {
                "Referent": "antihypertensives",
                "Canonical name": "methyldopa"
            },
            {
                "Referent": "anticoagulants",
                "Canonical name": "warfarin"
            },
            {
                "Referent": "antidiabetics",
                "Canonical name": "metformin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "penicillin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "cephalosporin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "amoxicillin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "tetracycline"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "vancomycin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "gentamicin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "clindamycin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "azithromycin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "levofloxacin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "ciprofloxacin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "ofloxacin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "amoxicillin-clavulanate"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "cefuroxime"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "cephalexin"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "trimethoprim-sulfamethoxazole"
            },
            {
                "Referent": "antibiotics",
                "Canonical name": "clindamycin-phosphate"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethanol",
                "nicotine",
                "tetrahydrocannabinol",
                "morphine",
                "caffeine",
                "valproic acid",
                "risperidone",
                "fluoxetine",
                "diphenhydramine",
                "methyldopa",
                "warfarin",
                "metformin",
                "penicillin",
                "cephalosporin",
                "amoxicillin",
                "tetracycline",
                "vancomycin",
                "gentamicin",
                "clindamycin",
                "azithromycin",
                "levofloxacin",
                "ciprofloxacin",
                "ofloxacin",
                "amoxicillin-clavulanate"
            ],
            "mismatches": [
                "cefuroxime",
                "cephalexin",
                "trimethoprim-sulfamethoxazole",
                "clindamycin-phosphate"
            ],
            "true_referents": [
                "{\"Referent\": \"alcohol\", \"Canonical name\": \"ethanol\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"amoxicillin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"amoxicillin-clavulanate\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"azithromycin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"cephalosporin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"ciprofloxacin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"clindamycin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"gentamicin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"levofloxacin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"ofloxacin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"penicillin\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"tetracycline\"}",
                "{\"Referent\": \"antibiotics\", \"Canonical name\": \"vancomycin\"}",
                "{\"Referent\": \"anticoagulants\", \"Canonical name\": \"warfarin\"}",
                "{\"Referent\": \"anticonvulsants\", \"Canonical name\": \"valproic acid\"}",
                "{\"Referent\": \"antidepressants\", \"Canonical name\": \"fluoxetine\"}",
                "{\"Referent\": \"antidiabetics\", \"Canonical name\": \"metformin\"}",
                "{\"Referent\": \"antihistamines\", \"Canonical name\": \"diphenhydramine\"}",
                "{\"Referent\": \"antihypertensives\", \"Canonical name\": \"methyldopa\"}",
                "{\"Referent\": \"antipsychotics\", \"Canonical name\": \"risperidone\"}",
                "{\"Referent\": \"caffeine\", \"Canonical name\": \"caffeine\"}",
                "{\"Referent\": \"marijuana\", \"Canonical name\": \"tetrahydrocannabinol\"}",
                "{\"Referent\": \"opioids\", \"Canonical name\": \"morphine\"}",
                "{\"Referent\": \"tobacco\", \"Canonical name\": \"nicotine\"}"
            ],
            "TP": 24,
            "FP": 4,
            "FN": 0
        }
    }
]