[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Methanol",
            "Ethanol",
            "Mercury (Hg)",
            "Lead (Pb)",
            "Arsenic (As)",
            "Manganese (Mn)",
            "Cyanide (CN-)",
            "Amyloid-beta (A\u03b2)",
            "Lithium (Li+)",
            "Thallium (Tl+)",
            "Cocaine",
            "Methamphetamine",
            "Lindane",
            "DDT",
            "Paraquat",
            "Rotenone",
            "MPTP",
            "Organophosphates (OPs)",
            "Carbon monoxide (CO)",
            "Methylmercury (MeHg)",
            "Alzheimer's disease-related tau protein"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Ethanol",
                "Methamphetamine",
                "Carbon monoxide"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Methanol",
                "Mercury (Hg)",
                "Lead (Pb)",
                "Arsenic (As)",
                "Manganese (Mn)",
                "Cyanide (CN-)",
                "Amyloid-beta (A\u03b2)",
                "Lithium (Li+)",
                "Thallium (Tl+)",
                "Cocaine",
                "Lindane",
                "DDT",
                "Paraquat",
                "Rotenone",
                "MPTP",
                "Organophosphates (OPs)",
                "Methylmercury (MeHg)",
                "Alzheimer's disease-related tau protein"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(E)-nitenpyram",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "3,4-methylenedioxymethamphetamine",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "L-dopa",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "allyl cyanide",
                "ammonia",
                "amphetamine",
                "carbon monoxide",
                "ethanol",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lanthanum trichloride",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "phencyclidine",
                "potassium cyanide",
                "salsolinol",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thallium(I) acetate",
                "toluene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Alpha-latrotoxin",
            "Tetanospasmin",
            "Alpha-bungarotoxin",
            "Neurotoxic shellfish poisoning (NSP) toxins",
            "Curare",
            "Ciguatoxin",
            "Brevetoxin",
            "Cobra venom factor",
            "Conotoxin",
            "Ricin",
            "Strychnine",
            "Tubocurarine",
            "Batrachotoxin",
            "Vesamicol",
            "Bungarotoxin",
            "Tityustoxin",
            "\u03b2-Methylamino-L-alanine (BMAA)",
            "\u03b2-N-methylamino-L-alanine (BMAA)",
            "Conantokin G",
            "Palytoxin",
            "Mu-conotoxin",
            "Domoic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Batrachotoxin",
                "\u03b2-Methylamino-L-alanine (BMAA)",
                "\u03b2-N-methylamino-L-alanine (BMAA)"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Alpha-latrotoxin",
                "Tetanospasmin",
                "Alpha-bungarotoxin",
                "Neurotoxic shellfish poisoning (NSP) toxins",
                "Curare",
                "Ciguatoxin",
                "Brevetoxin",
                "Cobra venom factor",
                "Conotoxin",
                "Ricin",
                "Strychnine",
                "Tubocurarine",
                "Vesamicol",
                "Bungarotoxin",
                "Tityustoxin",
                "Conantokin G",
                "Palytoxin",
                "Mu-conotoxin",
                "Domoic acid"
            ],
            "true_referents": [
                "(E)-dinotefuran",
                "3,4-methylenedioxymethamphetamine",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Gonyautoxin 1",
                "Gonyautoxin 2",
                "Gonyautoxin 5",
                "L-BMAA",
                "L-dopa",
                "VX nerve agent",
                "acrylamide",
                "aetokthonotoxin",
                "bicuculline",
                "dextromethorphan",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "rac-gabaculine",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thiamethoxam",
                "tinyatoxin"
            ],
            "TP": 5,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Botox (Botulinum Toxin)",
            "Conotoxin",
            "Ciguatoxin",
            "Batrachotoxin",
            "Anatoxin-a",
            "Domoic Acid",
            "Anatoxin-a(s)",
            "A\u03b242",
            "\u03b1-Latrotoxin",
            "\u03b2-N-methylamino-L-alanine (BMAA)",
            "\u03b2-Amyloid",
            "Aconitine",
            "Aminolevulinic Acid",
            "Arachidonic Acid",
            "Acetylcholine Esterase Inhibitors",
            "Bungarotoxin",
            "Curare",
            "Dioxin",
            "Doxorubicin",
            "Ethanol",
            "Lead",
            "Mercury"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Batrachotoxin",
                "Domoic Acid",
                "Ethanol"
            ],
            "mismatches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Botox (Botulinum Toxin)",
                "Conotoxin",
                "Ciguatoxin",
                "Anatoxin-a",
                "Anatoxin-a(s)",
                "A\u03b242",
                "\u03b1-Latrotoxin",
                "\u03b2-N-methylamino-L-alanine (BMAA)",
                "\u03b2-Amyloid",
                "Aconitine",
                "Aminolevulinic Acid",
                "Arachidonic Acid",
                "Acetylcholine Esterase Inhibitors",
                "Bungarotoxin",
                "Curare",
                "Dioxin",
                "Doxorubicin",
                "Lead",
                "Mercury"
            ],
            "true_referents": [
                "2,5-hexanedione",
                "3,4-methylenedioxymethamphetamine",
                "3-nitropropanoic acid",
                "4-hydroxybutyric acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-BMAA",
                "L-dopa",
                "acromelic acid A",
                "acrylamide",
                "aetokthonotoxin",
                "ammonia",
                "dexormaplatin",
                "diethyl phthalate",
                "domoic acid",
                "esketamine",
                "ethanol",
                "hexane",
                "ketamine",
                "lead(0)",
                "lead-207",
                "leptophos",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "ormaplatin",
                "paralytic shellfish toxin",
                "phencyclidine",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Ciguatoxin (CTX)",
            "Brevetoxin (BTX)",
            "Domoic acid (DA)",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Arsenic (As)",
            "Acrylamide (ACR)",
            "Organophosphates (OPs)",
            "Pyrethroids (PYR)",
            "Cyanide (CN-)",
            "Methanol (MeOH)",
            "Ethanol (EtOH) - chronic exposure",
            "Glutamate - excess levels",
            "Gamma-aminobutyric acid (GABA) antagonists",
            "Diethylene glycol (DEG)",
            "Ethylene glycol (EG)",
            "Aluminum (Al)",
            "Manganese (Mn)",
            "Carbon monoxide (CO)",
            "Mycotoxins (e.g., Aflatoxin B1)",
            "\u03b2-N-methylamino-L-alanine (BMAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Domoic acid (DA)",
                "Acrylamide (ACR)",
                "Ethanol (EtOH) - chronic exposure",
                "Carbon monoxide (CO)",
                "\u03b2-N-methylamino-L-alanine (BMAA)"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Ciguatoxin (CTX)",
                "Brevetoxin (BTX)",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Arsenic (As)",
                "Organophosphates (OPs)",
                "Pyrethroids (PYR)",
                "Cyanide (CN-)",
                "Methanol (MeOH)",
                "Glutamate - excess levels",
                "Gamma-aminobutyric acid (GABA) antagonists",
                "Diethylene glycol (DEG)",
                "Ethylene glycol (EG)",
                "Aluminum (Al)",
                "Manganese (Mn)",
                "Mycotoxins (e.g., Aflatoxin B1)"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-imidacloprid",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "3,4-methylenedioxymethamphetamine",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "L-BMAA",
                "L-dopa",
                "acrylamide",
                "allyl cyanide",
                "ammonia",
                "butane-1,4-diol",
                "carbon monoxide",
                "decabromodiphenyl ether",
                "diethyl phthalate",
                "domoic acid",
                "ethanol",
                "gamma-butyrolactone",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "neosaxitoxin",
                "phencyclidine",
                "potassium cyanide",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Conotoxin",
            "\u03b1-Bungarotoxin",
            "\u03b2-Bungarotoxin",
            "Domoic acid",
            "Batrachotoxin",
            "Anatoxin-a",
            "Anatoxin-a(s)",
            "Palytoxin",
            "Brevetoxin",
            "Ciguatoxin",
            "Aflatoxin",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Arsenic (As)",
            "Methylmercury",
            "Dieldrin",
            "Chlorpyrifos",
            "Paraquat",
            "Strychnine",
            "Curare",
            "Aconitine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Domoic acid",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Conotoxin",
                "\u03b1-Bungarotoxin",
                "\u03b2-Bungarotoxin",
                "Anatoxin-a",
                "Anatoxin-a(s)",
                "Palytoxin",
                "Brevetoxin",
                "Ciguatoxin",
                "Aflatoxin",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Arsenic (As)",
                "Methylmercury",
                "Dieldrin",
                "Chlorpyrifos",
                "Paraquat",
                "Strychnine",
                "Curare",
                "Aconitine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "acromelic acid A",
                "acrylamide",
                "ammonia",
                "diethyl phthalate",
                "domoic acid",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Domoic acid (DA)",
            "Aconitine",
            "Curare (d-tubocurarine)",
            "Ricin (from Ricinus communis)",
            "Ciguatoxin",
            "Palytoxin",
            "Phalloidin",
            "Batrachotoxin",
            "Conotoxin",
            "Venom from black widow spider (Latrotoxin)",
            "Venom from king cobra (Neurotoxin)",
            "Maitotoxin",
            "Abrin (from Abrus precatorius)",
            "Gymnodimine",
            "Okadaic acid",
            "Vespa mandarinia venom",
            "Naja naja venom (cobra venom)",
            "Solenodonte toxin",
            "Krait venom (Bungarus spp.)",
            "Poison hemlock (Conium maculatum)",
            "Crotaline venom (pit viper venom)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Domoic acid",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Aconitine",
                "Curare (d-tubocurarine)",
                "Ricin (from Ricinus communis)",
                "Ciguatoxin",
                "Palytoxin",
                "Phalloidin",
                "Conotoxin",
                "Venom from black widow spider (Latrotoxin)",
                "Venom from king cobra (Neurotoxin)",
                "Maitotoxin",
                "Abrin (from Abrus precatorius)",
                "Gymnodimine",
                "Okadaic acid",
                "Vespa mandarinia venom",
                "Naja naja venom (cobra venom)",
                "Solenodonte toxin",
                "Krait venom (Bungarus spp.)",
                "Poison hemlock (Conium maculatum)",
                "Crotaline venom (pit viper venom)"
            ],
            "true_referents": [
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(R)-ketamine",
                "(R)-sarin",
                "(Z)-thiacloprid",
                "2,5-hexanedione",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-dopa",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "anisatin",
                "decabromodiphenyl ether",
                "domoic acid",
                "endrin",
                "gamma-butyrolactone",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "oltipraz",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "potassium cyanide",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Bungarotoxin",
            "Saxitoxin (STX)",
            "Tetanus toxin (TeNT)",
            "Ciguatoxin (CTX)",
            "Domoic acid (DA)",
            "Batrachotoxin (BTX)",
            "Palytoxin (PTX)",
            "Anatoxin-a (ATX)",
            "Conotoxin",
            "Dendrotoxin",
            "Alpha-Amanitin",
            "Beta-Bungarotoxin",
            "Kainic acid (KA)",
            "Latrotoxin",
            "Maitotoxin (MTX)",
            "Microcystin (MC)",
            "Ouabain",
            "Picrotoxin (PTC)",
            "Strychnine",
            "Veratridine",
            "Zetekitoxin AB",
            "Alpha-neurotoxin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Domoic acid",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Bungarotoxin",
                "Tetanus toxin (TeNT)",
                "Ciguatoxin (CTX)",
                "Palytoxin (PTX)",
                "Anatoxin-a (ATX)",
                "Conotoxin",
                "Dendrotoxin",
                "Alpha-Amanitin",
                "Beta-Bungarotoxin",
                "Kainic acid (KA)",
                "Latrotoxin",
                "Maitotoxin (MTX)",
                "Microcystin (MC)",
                "Ouabain",
                "Picrotoxin (PTC)",
                "Strychnine",
                "Veratridine",
                "Zetekitoxin AB",
                "Alpha-neurotoxin"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-ketamine",
                "3-nitropropanoic acid",
                "4-hydroxybutyric acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "T-2 toxin",
                "VX nerve agent",
                "acrylamide",
                "aetokthonotoxin",
                "amphetamine",
                "anisatin",
                "decabromodiphenyl ether",
                "decarbamoylsaxitoxin",
                "domoic acid",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "phencyclidine",
                "potassium cyanide",
                "resiniferatoxin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin",
            "Botulinum toxin (BoNT)",
            "Cyanide",
            "Lead",
            "Mercury",
            "Venom of black widow spider",
            "Sarin",
            "Methanol",
            "Ethanol",
            "Paraquat",
            "Carbon monoxide",
            "Methylmercury",
            "Botox",
            "Ricin",
            "Aconitine",
            "Chlorpyrifos",
            "Ethylene glycol",
            "Anatoxin-a",
            "MPTP",
            "Domoic acid",
            "Ciguatoxin",
            "Saxitoxin",
            "Coniine",
            "Strychnine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Sarin",
                "Ethanol",
                "Carbon monoxide",
                "Domoic acid",
                "Saxitoxin"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Cyanide",
                "Lead",
                "Mercury",
                "Venom of black widow spider",
                "Methanol",
                "Paraquat",
                "Methylmercury",
                "Botox",
                "Ricin",
                "Aconitine",
                "Chlorpyrifos",
                "Ethylene glycol",
                "Anatoxin-a",
                "MPTP",
                "Ciguatoxin",
                "Coniine",
                "Strychnine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-dopa",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "allyl cyanide",
                "ammonia",
                "carbon monoxide",
                "domoic acid",
                "ethanol",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lead(0)",
                "lead-207",
                "leptophos",
                "maneb",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "paralytic shellfish toxin",
                "phencyclidine",
                "potassium cyanide",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Botulinum toxin (BoNT)",
            "Domoic acid",
            "Batrachotoxin",
            "\u03b1-Bungarotoxin",
            "\u03b2-Bungarotoxin",
            "\u03c9-Conotoxin GVIA",
            "Palytoxin",
            "Strychnine",
            "Tetanus toxin",
            "Veratridine",
            "Methylmercury",
            "Lead acetate",
            "Dieldrin",
            "Apitoxin (Bee venom)",
            "BMAA",
            "Maitotoxin",
            "Brevetoxin",
            "Ciguatoxin",
            "Anatoxin-a",
            "Okadaic acid",
            "\u03b1-Latrotoxin",
            "Scarcity toxin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Domoic acid",
                "Batrachotoxin",
                "omega-conotoxin GVIA"
            ],
            "mismatches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Botulinum toxin (BoNT)",
                "\u03b1-Bungarotoxin",
                "\u03b2-Bungarotoxin",
                "Palytoxin",
                "Strychnine",
                "Tetanus toxin",
                "Veratridine",
                "Methylmercury",
                "Lead acetate",
                "Dieldrin",
                "Apitoxin (Bee venom)",
                "BMAA",
                "Maitotoxin",
                "Brevetoxin",
                "Ciguatoxin",
                "Anatoxin-a",
                "Okadaic acid",
                "\u03b1-Latrotoxin",
                "Scarcity toxin"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-BMAA",
                "L-dopa",
                "T-2 toxin",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "aetokthonotoxin",
                "ammonia",
                "amphetamine",
                "diethyl phthalate",
                "domoic acid",
                "gamma-butyrolactone",
                "hydrogen peroxide",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thallium(I) acetate",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Botulinum toxin (BoNT)",
            "Saxitoxin (STX)",
            "\u03b1-Bungarotoxin",
            "Domoic acid",
            "Sarin (GB)",
            "VX nerve agent",
            "Lead (Pb)",
            "Mercury (Hg)",
            "Arsenic (As)",
            "Organophosphates",
            "Ethanol",
            "Methanol",
            "Acrylamide",
            "Manganese (Mn)",
            "Chlorpyrifos",
            "Brevetoxin",
            "Ciguatoxin",
            "Anatoxin-a",
            "6-Hydroxydopamine (6-OHDA)",
            "MPTP (1-methyl-4-phenyl-1,2,3,6-tetrahydropyridine)",
            "Kainic acid",
            "Rotenone",
            "Paraquat"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Domoic acid",
                "Sarin (GB)",
                "VX nerve agent",
                "Ethanol",
                "Acrylamide",
                "1-methyl-4-phenyl-1,2,3,6-tetrahydropyridine"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "\u03b1-Bungarotoxin",
                "Lead (Pb)",
                "Mercury (Hg)",
                "Arsenic (As)",
                "Organophosphates",
                "Methanol",
                "Manganese (Mn)",
                "Chlorpyrifos",
                "Brevetoxin",
                "Ciguatoxin",
                "Anatoxin-a",
                "6-Hydroxydopamine (6-OHDA)",
                "Kainic acid",
                "Rotenone",
                "Paraquat"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "1-methyl-4-phenyl-1,2,3,6-tetrahydropyridine",
                "3,4-methylenedioxymethamphetamine",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-dopa",
                "VX nerve agent",
                "acrylamide",
                "ammonia",
                "diethyl phthalate",
                "domoic acid",
                "esketamine",
                "ethanol",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "neosaxitoxin",
                "omega-conotoxin GVIA",
                "oxidopamine",
                "paralytic shellfish toxin",
                "phencyclidine",
                "salsolinol",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Anatoxin-a",
            "Homoanatoxin-a",
            "Anatoxin-a(S)",
            "Tetanus toxin (TeNT)",
            "\u03b2-N-methylamino-L-alanine (BMAA)",
            "Domoic acid",
            "Kainic acid",
            "Acromelic acid A",
            "\u03b1-Conotoxin GI",
            "\u03a9-Conotoxin MVIIA (Ziconotide)",
            "\u03b1-Bungarotoxin",
            "Ibotenic acid",
            "Muscimol",
            "Quinolinic acid",
            "Manganese",
            "Mercury (Hg)",
            "Lead (Pb)",
            "Methylmercury (MeHg)",
            "Trimethyltin (TMT)",
            "1-Methyl-4-phenyl-1,2,3,6-tetrahydropyridine (MPTP)",
            "6-Hydroxydopamine (6-OHDA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Ibotenic acid",
                "Domoic acid",
                "Acromelic acid A",
                "Trimethyltin (TMT)",
                "1-Methyl-4-phenyl-1,2,3,6-tetrahydropyridine (MPTP)"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Anatoxin-a",
                "Homoanatoxin-a",
                "Anatoxin-a(S)",
                "Tetanus toxin (TeNT)",
                "\u03b2-N-methylamino-L-alanine (BMAA)",
                "Kainic acid",
                "\u03b1-Conotoxin GI",
                "\u03a9-Conotoxin MVIIA (Ziconotide)",
                "\u03b1-Bungarotoxin",
                "Muscimol",
                "Quinolinic acid",
                "Manganese",
                "Mercury (Hg)",
                "Lead (Pb)",
                "Methylmercury (MeHg)",
                "6-Hydroxydopamine (6-OHDA)"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(S)-nicotine",
                "(S)-sarin",
                "(Z)-thiacloprid",
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "1-methyl-4-phenyl-1,2,3,6-tetrahydropyridine",
                "3,4-methylenedioxymethamphetamine",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-BMAA",
                "L-dopa",
                "T-2 toxin",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "ammonia",
                "carbon monoxide",
                "domoic acid",
                "esketamine",
                "gamma-butyrolactone",
                "ketamine",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "omega-conotoxin GVIA",
                "oxidopamine",
                "phencyclidine",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "trimethyltin"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Botulinum Toxin (BoNT)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Tetanus Toxin (TeNT)",
            "Ricin (RCA60)",
            "Anatoxin-a (ATX)",
            "Ciguatoxin (CTX)",
            "Maitotoxin (MTX)",
            "Aflatoxin B1 (AFB1)",
            "Domoic Acid (DA)",
            "Brevetoxin (PbTx)",
            "Palytoxin (PlTX)",
            "Amatoxin (\u03b1-Amanitin)",
            "Ouabain (G-Strophanthin)",
            "Ibotenic Acid (IBO)",
            "Lyngbyatoxin-a (LTX-a)",
            "Cylindrospermopsin (CYN)",
            "Microcystin-LR (MC-LR)",
            "Anatoxin-a(S) (AnTX-a(S))",
            "Nodularin (NOD)",
            "Bryostatins (Bryo)",
            "Grayanotoxin (GTX)",
            "Cycad Neurotoxin (BMAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin",
                "Domoic Acid",
                "Ibotenic Acid"
            ],
            "mismatches": [
                "Botulinum Toxin",
                "Tetanus Toxin",
                "Ricin",
                "Anatoxin-a",
                "Ciguatoxin",
                "Maitotoxin",
                "Aflatoxin B1",
                "Brevetoxin",
                "Palytoxin",
                "Amatoxin",
                "Ouabain",
                "Lyngbyatoxin-a",
                "Cylindrospermopsin",
                "Microcystin-LR",
                "Anatoxin-a(S)",
                "Nodularin",
                "Bryostatins",
                "Grayanotoxin",
                "Cycad Neurotoxin"
            ],
            "true_referents": [
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(E)-thiamethoxam",
                "(R)-ketamine",
                "(R)-sarin",
                "(Z)-thiacloprid",
                "1,3-dinitrobenzene",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Gonyautoxin 1",
                "Gonyautoxin 5",
                "Ibotenic acid",
                "L-BMAA",
                "T-2 toxin",
                "VX nerve agent",
                "acrylamide",
                "allyl cyanide",
                "anisatin",
                "decabromodiphenyl ether",
                "decarbamoylsaxitoxin",
                "diethyl phthalate",
                "domoic acid",
                "gamma-butyrolactone",
                "neosaxitoxin",
                "potassium cyanide",
                "resiniferatoxin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum Toxin (BoNT)",
            "Tetrodontoxin (TTX)",
            "Saxitoxin (STX)",
            "Domoic Acid (DA)",
            "Ciguatoxin (CTX)",
            "Microcystin (MC)",
            "Anatoxin-a (ANA-a)",
            "Brevetoxin (PbTx)",
            "Maitotoxin (MTX)",
            "Palytoxin (PLTX)",
            "Batrachotoxin (BTX)",
            "Aconitine",
            "Cicutoxin",
            "Soman (GD)",
            "Tabun (GA)",
            "Sarin (GB)",
            "VX",
            "Ricin",
            "Aflatoxin",
            "Dioxin (TCDD)",
            "Methyl Mercury (MeHg)",
            "Lead (Pb)",
            "Arsenic (As)",
            "Cadmium (Cd)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Saxitoxin",
                "Domoic Acid",
                "Batrachotoxin",
                "Sarin"
            ],
            "mismatches": [
                "Botulinum Toxin (BoNT)",
                "Tetrodontoxin (TTX)",
                "Ciguatoxin (CTX)",
                "Microcystin (MC)",
                "Anatoxin-a (ANA-a)",
                "Brevetoxin (PbTx)",
                "Maitotoxin (MTX)",
                "Palytoxin (PLTX)",
                "Aconitine",
                "Cicutoxin",
                "Soman (GD)",
                "Tabun (GA)",
                "VX",
                "Ricin",
                "Aflatoxin",
                "Dioxin (TCDD)",
                "Methyl Mercury (MeHg)",
                "Lead (Pb)",
                "Arsenic (As)",
                "Cadmium (Cd)"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(E)-imidacloprid",
                "(R)-sarin",
                "(S)-nicotine",
                "(S)-sarin",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "NSTX-3",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "ammonia",
                "decabromodiphenyl ether",
                "diethyl phthalate",
                "dinotefuran",
                "domoic acid",
                "hexane",
                "ketamine",
                "lead(0)",
                "lead-207",
                "neosaxitoxin",
                "nicotine",
                "oltipraz",
                "potassium cyanide",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thiamethoxam",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Batrachotoxin (BTX)",
            "Anatoxin-a",
            "Saxitoxin (STX)",
            "Tetrahydrocannabinol (THC)",
            "Bungarotoxin (BTX)",
            "Alpha-bungarotoxin",
            "Ciguatoxin",
            "Maitotoxin",
            "Domoic acid",
            "Okadaic acid",
            "Microcystin-LR",
            "Brevetoxin",
            "Palytoxin",
            "Azaspiracid",
            "Oleandrin",
            "Grayanotoxin",
            "Amanitin",
            "Phalloidin",
            "Ricin",
            "Shiga toxin",
            "Cholera toxin",
            "Botulinum toxin (BTX)",
            "Tetanus toxin",
            "Clostridium difficile toxin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Batrachotoxin",
                "Saxitoxin",
                "Domoic acid"
            ],
            "mismatches": [
                "Anatoxin-a",
                "Tetrahydrocannabinol",
                "Bungarotoxin",
                "Alpha-bungarotoxin",
                "Ciguatoxin",
                "Maitotoxin",
                "Okadaic acid",
                "Microcystin-LR",
                "Brevetoxin",
                "Palytoxin",
                "Azaspiracid",
                "Oleandrin",
                "Grayanotoxin",
                "Amanitin",
                "Phalloidin",
                "Ricin",
                "Shiga toxin",
                "Cholera toxin",
                "Botulinum toxin",
                "Tetanus toxin",
                "Clostridium difficile toxin"
            ],
            "true_referents": [
                "(E)-imidacloprid",
                "(Z)-acetamiprid",
                "(Z)-imidacloprid",
                "(Z)-thiacloprid",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-BMAA",
                "L-dopa",
                "T-2 toxin",
                "VX nerve agent",
                "acrylamide",
                "anisatin",
                "clothianidin",
                "decarbamoylsaxitoxin",
                "domoic acid",
                "endrin",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "oltipraz",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin",
                "toluene"
            ],
            "TP": 4,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Botulinum toxin (BoNT)",
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Domoic acid",
            "Anatoxin-a",
            "Homobatrachotoxin",
            "Tetanospasmin",
            "Batrachotoxin",
            "Cholera toxin",
            "Veratridine",
            "Cycloheximide",
            "Rotenone",
            "1-Methyl-4-phenylpyridinium (MPP+)",
            "6-Hydroxydopamine",
            "Oxidopamine",
            "Colchicine",
            "Podophyllotoxin",
            "Taxol",
            "Vinblastine",
            "Vincristine",
            "Ricin",
            "Abrin",
            "Modeccin",
            "Bulgin",
            "Volensin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Domoic acid",
                "Batrachotoxin",
                "1-Methyl-4-phenylpyridinium",
                "Oxidopamine"
            ],
            "mismatches": [
                "Botulinum toxin (BoNT)",
                "Anatoxin-a",
                "Homobatrachotoxin",
                "Tetanospasmin",
                "Cholera toxin",
                "Veratridine",
                "Cycloheximide",
                "Rotenone",
                "6-Hydroxydopamine",
                "Colchicine",
                "Podophyllotoxin",
                "Taxol",
                "Vinblastine",
                "Vincristine",
                "Ricin",
                "Abrin",
                "Modeccin",
                "Bulgin",
                "Volensin"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(Z)-thiacloprid",
                "1-methyl-4-phenyl-1,2,3,6-tetrahydropyridine",
                "3,4-methylenedioxymethamphetamine",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "L-dopa",
                "N-methyl-4-phenylpyridinium",
                "T-2 toxin",
                "VX nerve agent",
                "acrylamide",
                "amphetamine",
                "anisatin",
                "cycloxaprid",
                "dexormaplatin",
                "domoic acid",
                "endrin",
                "maneb",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "oltipraz",
                "ormaplatin",
                "oxidopamine",
                "phencyclidine",
                "salsolinol",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thiamethoxam",
                "tinyatoxin"
            ],
            "TP": 6,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            },
            {
                "Referent": "Alpha-latrotoxin",
                "Canonical name": "Alpha-latrotoxin"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Alpha-latrotoxin"
            ],
            "true_referents": [
                "(S)-sarin",
                "neosaxitoxin"
            ],
            "TP": 0,
            "FP": 1,
            "FN": 0
        }
    }
]