[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "tetrodotoxin (TTX)",
            "saxitoxin (STX)",
            "conotoxins",
            "veratridine",
            "batrachotoxin",
            "bungarotoxin",
            "tacrine",
            "strychnine",
            "curare",
            "tetraethylammonium (TEA)",
            "lidocaine",
            "tetracaine",
            "procaine",
            "bupivacaine",
            "mepivacaine",
            "prilocaine",
            "tocainide",
            "quinine",
            "chloroquine",
            "pentobarbital",
            "gabapentin",
            "lidamycin",
            "chlorpromazine",
            "diltiazem"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "batrachotoxin"
            ],
            "mismatches": [
                "tetrodotoxin (TTX)",
                "saxitoxin (STX)",
                "conotoxins",
                "veratridine",
                "bungarotoxin",
                "tacrine",
                "strychnine",
                "curare",
                "tetraethylammonium (TEA)",
                "lidocaine",
                "tetracaine",
                "procaine",
                "bupivacaine",
                "mepivacaine",
                "prilocaine",
                "tocainide",
                "quinine",
                "chloroquine",
                "pentobarbital",
                "gabapentin",
                "lidamycin",
                "chlorpromazine",
                "diltiazem"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(E)-thiamethoxam",
                "(R)-ketamine",
                "(R)-sarin",
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "Batrachotoxin",
                "acrylamide",
                "amphetamine",
                "bicuculline",
                "dextromethorphan",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "phencyclidine",
                "sarin",
                "saxitoxin",
                "tetrodotoxin"
            ],
            "TP": 1,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Conotoxin",
            "Bungarotoxin",
            "Curare",
            "Alpha-bungarotoxin",
            "Dendrotoxin",
            "Alpha-latrotoxin",
            "Bungarotoxin (Alpha)",
            "Omega-conotoxin",
            "Conantokin-G",
            "Strychnine",
            "Tubocurarine",
            "Botulinum toxin",
            "Alpha-bungarotoxin (from Bungarus multicinctus)",
            "Vesamicol",
            "Mambalgin",
            "Lanthionine",
            "Chlorotoxin",
            "Agitoxin",
            "RgIA",
            "Omega-agatoxin",
            "Glycine receptor antagonist"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Batrachotoxin (BTX)",
                "Conotoxin",
                "Bungarotoxin",
                "Curare",
                "Alpha-bungarotoxin",
                "Dendrotoxin",
                "Alpha-latrotoxin",
                "Bungarotoxin (Alpha)",
                "Omega-conotoxin",
                "Conantokin-G",
                "Strychnine",
                "Tubocurarine",
                "Botulinum toxin",
                "Alpha-bungarotoxin (from Bungarus multicinctus)",
                "Vesamicol",
                "Mambalgin",
                "Lanthionine",
                "Chlorotoxin",
                "Agitoxin",
                "RgIA",
                "Omega-agatoxin",
                "Glycine receptor antagonist"
            ],
            "true_referents": [
                "(R)-ketamine",
                "(R)-sarin",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "Batrachotoxin",
                "Gonyautoxin 1",
                "Gonyautoxin 2",
                "Gonyautoxin 5",
                "L-BMAA",
                "L-dopa",
                "VX nerve agent",
                "aetokthonotoxin",
                "amphetamine",
                "bicuculline",
                "clothianidin",
                "decarbamoylsaxitoxin",
                "dextromethorphan",
                "endrin",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "maneb",
                "mercury(0)",
                "methamphetamine",
                "neosaxitoxin",
                "omega-conotoxin GVIA",
                "phencyclidine",
                "rac-gabaculine",
                "resiniferatoxin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 3,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Conotoxin",
            "Batrotoxin",
            "Ciguatoxin",
            "Brevotoxin",
            "Anatoxin-a",
            "Anatoxin-s",
            "Palytoxin",
            "Microcystin-LR",
            "Nodularin",
            "Abrin",
            "Ricin",
            "\u03b1-Latrotoxin",
            "\u03b2-Bungarotoxin",
            "Crotoxin",
            "Dendrotoxin",
            "Kallikrein",
            "Botox",
            "Tetrodotoxin",
            "Oxytocin",
            "Propranolol",
            "Phenytoin",
            "Lidocaine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin"
            ],
            "mismatches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Conotoxin",
                "Batrotoxin",
                "Ciguatoxin",
                "Brevotoxin",
                "Anatoxin-a",
                "Anatoxin-s",
                "Palytoxin",
                "Microcystin-LR",
                "Nodularin",
                "Abrin",
                "Ricin",
                "\u03b1-Latrotoxin",
                "\u03b2-Bungarotoxin",
                "Crotoxin",
                "Dendrotoxin",
                "Kallikrein",
                "Botox",
                "Oxytocin",
                "Propranolol",
                "Phenytoin",
                "Lidocaine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(Z)-thiacloprid",
                "1,3-dinitrobenzene",
                "Batrachotoxin",
                "L-BMAA",
                "L-dopa",
                "VX nerve agent",
                "acrylamide",
                "aetokthonotoxin",
                "amphetamine",
                "anisatin",
                "decarbamoylsaxitoxin",
                "dextromethorphan",
                "endrin",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "oltipraz",
                "omega-conotoxin GVIA",
                "oxidopamine",
                "paralytic shellfish toxin",
                "phencyclidine",
                "resiniferatoxin",
                "salsolinol",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 2,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Botulinum Toxin (BoNT)",
            "Curare (CUR)",
            "Cone Snail Venom (CV)",
            "Pufferfish Poison (PP)",
            "Batrachotoxin (BTX)",
            "Dendrotoxins (DTX)",
            "Alpha-Latrotoxin (\u03b1-LTX)",
            "Black Mamba Venom (BMV)",
            "Procaine (PRC)",
            "Lidocaine (LDC)",
            "Bupivacaine (BVC)",
            "Novocaine (NC)",
            "Tetracaine (TTC)",
            "Rocuronium (ROC)",
            "Vecuronium (VEC)",
            "Pancuronium (PNC)",
            "Atracurium (ATC)",
            "Cisatracurium (CAT)",
            "Mivacurium (MVC)",
            "Gallamine (GAL)",
            "Tubocurarine (TBC)",
            "Neostigmine (NST)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Botulinum Toxin",
                "Curare",
                "Cone Snail Venom",
                "Pufferfish Poison",
                "Dendrotoxins",
                "Alpha-Latrotoxin",
                "Black Mamba Venom",
                "Procaine",
                "Lidocaine",
                "Bupivacaine",
                "Novocaine",
                "Tetracaine",
                "Rocuronium",
                "Vecuronium",
                "Pancuronium",
                "Atracurium",
                "Cisatracurium",
                "Mivacurium",
                "Gallamine",
                "Tubocurarine",
                "Neostigmine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-nitenpyram",
                "(R)-ketamine",
                "(R)-sarin",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "L-BMAA",
                "L-dopa",
                "N-butylbenzenesulfonamide",
                "NSTX-3",
                "VX nerve agent",
                "aetokthonotoxin",
                "bicuculline",
                "decarbamoylsaxitoxin",
                "ketamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "rac-gabaculine",
                "resiniferatoxin",
                "saxitoxin",
                "tetrodotoxin"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Conotoxin",
            "Dendrotoxin",
            "Tetracaine",
            "Lidocaine",
            "Bupivacaine",
            "Cocaine",
            "Procaine",
            "Prilocaine",
            "Articaine",
            "Ropivacaine",
            "Mepivacaine",
            "Chloroprocaine",
            "Dibucaine",
            "Levobupivacaine",
            "Benzocaine",
            "QX-314",
            "QX-222",
            "Aconitine",
            "Grayanotoxin",
            "Brevetoxin",
            "Anatoxin-a"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Conotoxin",
                "Dendrotoxin",
                "Tetracaine",
                "Lidocaine",
                "Bupivacaine",
                "Cocaine",
                "Procaine",
                "Prilocaine",
                "Articaine",
                "Ropivacaine",
                "Mepivacaine",
                "Chloroprocaine",
                "Dibucaine",
                "Levobupivacaine",
                "Benzocaine",
                "QX-314",
                "QX-222",
                "Aconitine",
                "Grayanotoxin",
                "Brevetoxin",
                "Anatoxin-a"
            ],
            "true_referents": [
                "(R)-ketamine",
                "(S)-2-chloropropanoic acid",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "L-dopa",
                "NSTX-3",
                "acromelic acid A",
                "acrylamide",
                "amphetamine",
                "bicuculline",
                "decarbamoylsaxitoxin",
                "dextromethorphan",
                "esketamine",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lead-207",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "phencyclidine",
                "rac-gabaculine",
                "resiniferatoxin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Lidocaine (LIDO)",
            "Procaine (PRO)",
            "Novocaine (NOV)",
            "Ropivacaine (ROP)",
            "Cocaine (COC)",
            "Dantrolene (DANT)",
            "Curare (CUR)",
            "Botulinum toxin (BTX)",
            "Pufferfish venom (PFV)",
            "Aconitine (ACON)",
            "Veratridine (VERA)",
            "Conotoxin (CONO)",
            "Alpha-latrotoxin (ALAT)",
            "Dioxins (DIOX)",
            "Sodium channel blockers (SCB)",
            "Anticonvulsants (ACV)",
            "Muscle relaxants (MR)",
            "Local anesthetics (LA)",
            "Nerve agents (NA)",
            "Neurotoxins (NT)",
            "Ziconotide (ZIC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin"
            ],
            "mismatches": [
                "Lidocaine",
                "Procaine",
                "Novocaine",
                "Ropivacaine",
                "Cocaine",
                "Dantrolene",
                "Curare",
                "Botulinum toxin",
                "Pufferfish venom",
                "Aconitine",
                "Veratridine",
                "Conotoxin",
                "Alpha-latrotoxin",
                "Dioxins",
                "Sodium channel blockers",
                "Anticonvulsants",
                "Muscle relaxants",
                "Local anesthetics",
                "Nerve agents",
                "Neurotoxins",
                "Ziconotide"
            ],
            "true_referents": [
                "(E)-dinotefuran",
                "(R)-ketamine",
                "(R)-sarin",
                "(S)-sarin",
                "(Z)-acetamiprid",
                "(Z)-imidacloprid",
                "(Z)-thiacloprid",
                "1,3-dinitrobenzene",
                "3,4-methylenedioxymethamphetamine",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "L-BMAA",
                "L-dopa",
                "VX nerve agent",
                "acromelic acid A",
                "aetokthonotoxin",
                "amphetamine",
                "decarbamoylsaxitoxin",
                "dextromethorphan",
                "diethyl phthalate",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "saxitoxin",
                "tetrodotoxin"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Potassium Chloride (KCl)",
            "Sodium Pentothal (Thiopental)",
            "Etidocaine (Duranest)",
            "Cocaine",
            "Lidocaine (Xylocaine)",
            "Bupivacaine (Marcaine)",
            "Mepivacaine (Carbocaine)",
            "Prilocaine (Citanest)",
            "Articaine (Septocaine)",
            "Chloroprocaine (Nesacaine)",
            "Hexobarbital (Evipan)",
            "Sodium Thiopental (Pentothal)",
            "Propofol (Diprivan)",
            "Etomidate (Amidate)",
            "Ketamine (Ketalar)",
            "Rocuronium (Zemuron)",
            "Vecuronium (Norcuron)",
            "Pancuronium (Pavulon)",
            "D-tubocurarine (Tubocurarine)",
            "Decamethonium",
            "Succinylcholine (Anectine)",
            "Atracurium (Tracrium)",
            "Cisatracurium (Nimbex)",
            "Mivacurium (Mivacron)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ketamine (Ketalar)"
            ],
            "mismatches": [
                "Potassium Chloride (KCl)",
                "Sodium Pentothal (Thiopental)",
                "Etidocaine (Duranest)",
                "Cocaine",
                "Lidocaine (Xylocaine)",
                "Bupivacaine (Marcaine)",
                "Mepivacaine (Carbocaine)",
                "Prilocaine (Citanest)",
                "Articaine (Septocaine)",
                "Chloroprocaine (Nesacaine)",
                "Hexobarbital (Evipan)",
                "Sodium Thiopental (Pentothal)",
                "Propofol (Diprivan)",
                "Etomidate (Amidate)",
                "Rocuronium (Zemuron)",
                "Vecuronium (Norcuron)",
                "Pancuronium (Pavulon)",
                "D-tubocurarine (Tubocurarine)",
                "Decamethonium",
                "Succinylcholine (Anectine)",
                "Atracurium (Tracrium)",
                "Cisatracurium (Nimbex)",
                "Mivacurium (Mivacron)"
            ],
            "true_referents": [
                "(E)-dinotefuran",
                "(E)-nitenpyram",
                "(E)-thiamethoxam",
                "(R)-ketamine",
                "(S)-2-chloropropanoic acid",
                "(Z)-acetamiprid",
                "(Z)-thiacloprid",
                "A-230 nerve agent",
                "NSTX-3",
                "acrylamide",
                "amphetamine",
                "bicuculline",
                "decabromodiphenyl ether",
                "dextromethorphan",
                "diethyl phthalate",
                "esketamine",
                "ketamine",
                "methamphetamine",
                "nicotine",
                "nitenpyram",
                "oltipraz",
                "phencyclidine",
                "potassium cyanide",
                "rac-gabaculine",
                "thiamethoxam",
                "toluene"
            ],
            "TP": 1,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": "",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Botulinum toxin (BoNT)",
            "Curare",
            "Strychnine",
            "Saxitoxin (STX)",
            "Batrachotoxin",
            "\u03b1-Bungarotoxin",
            "Lidocaine (Lid)",
            "Procaine (Novocain)",
            "Tetraethylammonium (TEA)",
            "Quinine",
            "Tetramethrin",
            "Amatoxin",
            "Malathion",
            "Hexamethonium",
            "Veratridine",
            "Phencyclidine (PCP)",
            "Domoic acid",
            "Ciguatoxin",
            "Mercury(II) chloride",
            "Lead acetate",
            "Tetrachlorodibenzodioxin (TCDD)",
            "Epibatidine",
            "Tetramethylammonium"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Batrachotoxin",
                "Phencyclidine",
                "Domoic acid"
            ],
            "mismatches": [
                "Tetrodotoxin (TTX)",
                "Botulinum toxin (BoNT)",
                "Curare",
                "Strychnine",
                "Saxitoxin (STX)",
                "\u03b1-Bungarotoxin",
                "Lidocaine (Lid)",
                "Procaine (Novocain)",
                "Tetraethylammonium (TEA)",
                "Quinine",
                "Tetramethrin",
                "Amatoxin",
                "Malathion",
                "Hexamethonium",
                "Veratridine",
                "Ciguatoxin",
                "Mercury(II) chloride",
                "Lead acetate",
                "Tetrachlorodibenzodioxin (TCDD)",
                "Epibatidine",
                "Tetramethylammonium"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-clothianidin",
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(E)-thiamethoxam",
                "(R)-ketamine",
                "(R)-sarin",
                "(S)-2-chloropropanoic acid",
                "(S)-sarin",
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "2,5-hexanedione",
                "3-nitropropanoic acid",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "Ibotenic acid",
                "acetamiprid",
                "ammonia",
                "amphetamine",
                "anisatin",
                "diethyl phthalate",
                "domoic acid",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "lead(0)",
                "lead-207",
                "mercury(0)",
                "methamphetamine",
                "neonicotinoid insectide",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "thallium(I) acetate",
                "tinyatoxin"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin",
            "Lidocaine",
            "Bupivacaine",
            "Ropivacaine",
            "Procaine",
            "Cocaine",
            "Mepivacaine",
            "Prilocaine",
            "Tetracaine",
            "Benzocaine",
            "Botulinum toxin",
            "Curare",
            "Succinylcholine",
            "Aconitine",
            "Veratridine",
            "Ciguatoxin",
            "Brevetoxin",
            "Grayanotoxin",
            "Neosaxitoxin",
            "Gonyautoxin",
            "Conotoxin",
            "Chloroform"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin",
                "Neosaxitoxin"
            ],
            "mismatches": [
                "Lidocaine",
                "Bupivacaine",
                "Ropivacaine",
                "Procaine",
                "Cocaine",
                "Mepivacaine",
                "Prilocaine",
                "Tetracaine",
                "Benzocaine",
                "Botulinum toxin",
                "Curare",
                "Succinylcholine",
                "Aconitine",
                "Veratridine",
                "Ciguatoxin",
                "Brevetoxin",
                "Grayanotoxin",
                "Gonyautoxin",
                "Conotoxin",
                "Chloroform"
            ],
            "true_referents": [
                "(R)-ketamine",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "Batrachotoxin",
                "Gonyautoxin 1",
                "Gonyautoxin 2",
                "Gonyautoxin 5",
                "VX nerve agent",
                "acromelic acid A",
                "acrylamide",
                "amphetamine",
                "decarbamoylsaxitoxin",
                "dextromethorphan",
                "esketamine",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "paralytic shellfish toxin",
                "phencyclidine",
                "saxitoxin",
                "tetrodotoxin",
                "toluene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin",
            "Tetanus toxin",
            "Botulinum toxin (BoNT)",
            "Anatoxin-a",
            "Maculotoxin",
            "Bungarotoxin",
            "Conotoxin",
            "Aconitine",
            "Pyrethroid insecticides",
            "Neonicotinoid insecticides",
            "Organophosphate insecticides",
            "Carbamate insecticides",
            "Lidocaine",
            "Procaine",
            "Bupivacaine",
            "Ropivacaine",
            "Mepivacaine",
            "Prilocaine",
            "Benzocaine",
            "Tetracaine",
            "Dibucaine",
            "Proparacaine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin (TTX)",
                "Saxitoxin (STX)",
                "Batrachotoxin",
                "Neonicotinoid insecticides"
            ],
            "mismatches": [
                "Tetanus toxin",
                "Botulinum toxin (BoNT)",
                "Anatoxin-a",
                "Maculotoxin",
                "Bungarotoxin",
                "Conotoxin",
                "Aconitine",
                "Pyrethroid insecticides",
                "Organophosphate insecticides",
                "Carbamate insecticides",
                "Lidocaine",
                "Procaine",
                "Bupivacaine",
                "Ropivacaine",
                "Mepivacaine",
                "Prilocaine",
                "Benzocaine",
                "Tetracaine",
                "Dibucaine",
                "Proparacaine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-ketamine",
                "A-230 nerve agent",
                "A-232 nerve agent",
                "A-234 nerve agent",
                "Batrachotoxin",
                "T-2 toxin",
                "acetamiprid",
                "acromelic acid A",
                "acrylamide",
                "bicuculline",
                "dextromethorphan",
                "esketamine",
                "hydrogen peroxide",
                "imidacloprid",
                "ketamine",
                "methamphetamine",
                "neonicotinoid insectide",
                "neosaxitoxin",
                "nicotine",
                "omega-conotoxin GVIA",
                "phencyclidine",
                "rac-gabaculine",
                "resiniferatoxin",
                "sarin",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Aconitine",
            "Grayanotoxin",
            "Veratridine",
            "Ciguatoxin",
            "Maitotoxin",
            "Palytoxin",
            "Brevetoxin",
            "Yessotoxin",
            "Gambierol",
            "Gymnodimine",
            "Spirolide",
            "Pinnatoxin",
            "Prorocentrolide",
            "Neosaxitoxin",
            "Gonyautoxin",
            "C-toxin",
            "Decarbamoylsaxitoxin",
            "Tetracaine",
            "Lidocaine",
            "Procaine",
            "Bupivacaine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin",
                "Neosaxitoxin",
                "Decarbamoylsaxitoxin"
            ],
            "mismatches": [
                "Aconitine",
                "Grayanotoxin",
                "Veratridine",
                "Ciguatoxin",
                "Maitotoxin",
                "Palytoxin",
                "Brevetoxin",
                "Yessotoxin",
                "Gambierol",
                "Gymnodimine",
                "Spirolide",
                "Pinnatoxin",
                "Prorocentrolide",
                "Gonyautoxin",
                "C-toxin",
                "Tetracaine",
                "Lidocaine",
                "Procaine",
                "Bupivacaine"
            ],
            "true_referents": [
                "(E)-imidacloprid",
                "(E)-nitenpyram",
                "(R)-ketamine",
                "(Z)-imidacloprid",
                "(Z)-thiacloprid",
                "1-bromopropane",
                "2,5-hexanedione",
                "Batrachotoxin",
                "Decarbamoylgonyautoxin III",
                "Gonyautoxin 1",
                "Gonyautoxin 2",
                "Gonyautoxin 5",
                "N-methylnorsalsolinol",
                "T-2 toxin",
                "acromelic acid A",
                "acrylamide",
                "amphetamine",
                "butane-1,4-diol",
                "decarbamoylsaxitoxin",
                "dysiherbaine",
                "esketamine",
                "gamma-butyrolactone",
                "hydrogen peroxide",
                "ketamine",
                "methamphetamine",
                "neosaxitoxin",
                "nicotine",
                "paralytic shellfish toxin",
                "phencyclidine",
                "resiniferatoxin",
                "salsolinol",
                "saxitoxin",
                "tetrodotoxin",
                "tinyatoxin"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Lidocaine (Lido)",
            "Procaine (Proc)",
            "Bupivacaine (Bupi)",
            "Ropivacaine (Ropi)",
            "Tetracaine (Tetra)",
            "Cocaine (Coc)",
            "Novocaine (Novo)",
            "Benzocaine (Benzo)",
            "Articaine (Arti)",
            "Mepivacaine (Mepi)",
            "Prilocaine (Prilo)",
            "Etidocaine (Etido)",
            "Dibucaine (Dibu)",
            "Pramoxine (Pram)",
            "Dyclonine (Dyclo)",
            "Oxybuprocaine (Oxybu)",
            "Proparacaine (Propara)",
            "Levobupivacaine (Levobu)",
            "Eutectix (EMLA)",
            "Piperocaine (Pipero)",
            "Hexylcaine (Hexyl)",
            "Butacaine (Buta)",
            "Cyclomethycaine (Cyclo)",
            "Phenacaine (Phena)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Lidocaine (Lido)",
                "Procaine (Proc)",
                "Bupivacaine (Bupi)",
                "Ropivacaine (Ropi)",
                "Tetracaine (Tetra)",
                "Cocaine (Coc)",
                "Novocaine (Novo)",
                "Benzocaine (Benzo)",
                "Articaine (Arti)",
                "Mepivacaine (Mepi)",
                "Prilocaine (Prilo)",
                "Etidocaine (Etido)",
                "Dibucaine (Dibu)",
                "Pramoxine (Pram)",
                "Dyclonine (Dyclo)",
                "Oxybuprocaine (Oxybu)",
                "Proparacaine (Propara)",
                "Levobupivacaine (Levobu)",
                "Eutectix (EMLA)",
                "Piperocaine (Pipero)",
                "Hexylcaine (Hexyl)",
                "Butacaine (Buta)",
                "Cyclomethycaine (Cyclo)",
                "Phenacaine (Phena)"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(E)-dinotefuran",
                "(R)-ketamine",
                "(S)-amphetamine",
                "(S)-nicotine",
                "(Z)-thiacloprid",
                "2,5-hexanedione",
                "3,4-methylenedioxymethamphetamine",
                "L-BMAA",
                "L-dopa",
                "NSTX-3",
                "amphetamine",
                "bicuculline",
                "butane-1,4-diol",
                "gamma-butyrolactone",
                "hexane",
                "ketamine",
                "methamphetamine",
                "nicotine",
                "phencyclidine",
                "rac-gabaculine",
                "toluene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Curare (Curare)",
            "D-Tubocurarine",
            "Methocurarine",
            "Alcuronium",
            "Chandonium",
            "Gallamine",
            "Suxamethonium",
            "Atracurium",
            "Cisatracurium",
            "Vecuronium",
            "Pancuronium",
            "Pipecuronium",
            "Rocuronium",
            "Doxacurium",
            "Mivacurium",
            "Rapacuronium",
            "Gantacurium",
            "Laudanosine",
            "Doxacurium",
            "Mivacurium",
            "Rapacuronium",
            "Gantacurium",
            "Laudanosine",
            "Atracurium",
            "Cisatracurium",
            "Vecuronium",
            "Pancuronium",
            "Pipecuronium",
            "Rocuronium"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Curare (Curare)",
                "D-Tubocurarine",
                "Methocurarine",
                "Alcuronium",
                "Chandonium",
                "Gallamine",
                "Suxamethonium",
                "Atracurium",
                "Cisatracurium",
                "Vecuronium",
                "Pancuronium",
                "Pipecuronium",
                "Rocuronium",
                "Doxacurium",
                "Mivacurium",
                "Rapacuronium",
                "Gantacurium",
                "Laudanosine"
            ],
            "true_referents": [
                "(E)-dinotefuran",
                "(E)-nitenpyram",
                "(R)-ketamine",
                "3,4-methylenedioxymethamphetamine",
                "A-234 nerve agent",
                "L-dopa",
                "VX nerve agent",
                "amphetamine",
                "bicuculline",
                "decabromodiphenyl ether",
                "dextromethorphan",
                "domoic acid",
                "esketamine",
                "ketamine",
                "methamphetamine",
                "nitenpyram",
                "phencyclidine",
                "rac-gabaculine",
                "thiamethoxam"
            ],
            "TP": 0,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": [
            "Tetrodotoxin (TTX)",
            "Saxitoxin (STX)",
            "Batrachotoxin (BTX)",
            "Ciguatoxin (CTX)",
            "Neosaxitoxin (NSTX)",
            "Gonyautoxin (GTX)",
            "Lolitotoxin (LTX)",
            "Brevetoxin (PbTx)",
            "Cyclothiazide",
            "Procainamide",
            "Lidocaine",
            "Tetracaine",
            "Bupivacaine",
            "Ropivacaine",
            "Levobupivacaine",
            "Dibucaine",
            "Chloroprocaine",
            "Mepivacaine",
            "Prilocaine",
            "Etidocaine",
            "Articaine",
            "Trimecaine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tetrodotoxin",
                "Saxitoxin",
                "Batrachotoxin",
                "Neosaxitoxin"
            ],
            "mismatches": [
                "Ciguatoxin",
                "Gonyautoxin",
                "Lolitotoxin",
                "Brevetoxin",
                "Cyclothiazide",
                "Procainamide",
                "Lidocaine",
                "Tetracaine",
                "Bupivacaine",
                "Ropivacaine",
                "Levobupivacaine",
                "Dibucaine",
                "Chloroprocaine",
                "Mepivacaine",
                "Prilocaine",
                "Etidocaine",
                "Articaine",
                "Trimecaine"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-ketamine",
                "(S)-2-chloropropanoic acid",
                "(Z)-thiacloprid",
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "Batrachotoxin",
                "Gonyautoxin 1",
                "Gonyautoxin 2",
                "Gonyautoxin 5",
                "L-dopa",
                "NSTX-3",
                "acrylamide",
                "bicuculline",
                "dextromethorphan",
                "esketamine",
                "ketamine",
                "neosaxitoxin",
                "nicotine",
                "phencyclidine",
                "potassium cyanide",
                "rac-gabaculine",
                "saxitoxin",
                "tetrodotoxin"
            ],
            "TP": 4,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "neurotoxin",
        "domain": "medicine",
        "response": {
            "1": "Local anesthetic",
            "2": "Procaine",
            "3": "Lidocaine",
            "4": "Bupivacaine",
            "5": "Ropivacaine",
            "6": "Tetracaine",
            "7": "Benzocaine",
            "8": "Articaine",
            "9": "Cocaine",
            "10": "Mepivacaine",
            "11": "Prilocaine",
            "12": "Etidocaine",
            "13": "Chloroprocaine",
            "14": "Levobupivacaine",
            "15": "Bupivacaine liposome",
            "16": "Ropivacaine liposome",
            "17": "Lidocaine liposome",
            "18": "Benzocaine liposome",
            "19": "Mepivacaine liposome",
            "20": "Etidocaine liposome",
            "21": "Articaine liposome",
            "22": "Cocaine liposome",
            "23": "Tetracaine liposome",
            "24": "Procaine liposome"
        },
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Local anesthetic",
                "Procaine",
                "Lidocaine",
                "Bupivacaine",
                "Ropivacaine",
                "Tetracaine",
                "Benzocaine",
                "Articaine",
                "Cocaine",
                "Mepivacaine",
                "Prilocaine",
                "Etidocaine",
                "Chloroprocaine",
                "Levobupivacaine",
                "Bupivacaine liposome",
                "Ropivacaine liposome",
                "Lidocaine liposome",
                "Benzocaine liposome",
                "Mepivacaine liposome",
                "Etidocaine liposome",
                "Articaine liposome",
                "Cocaine liposome",
                "Tetracaine liposome",
                "Procaine liposome"
            ],
            "true_referents": [
                "(E)-acetamiprid",
                "(R)-ketamine",
                "(R)-sarin",
                "(S)-2-chloropropanoic acid",
                "(S)-amphetamine",
                "(S)-nicotine",
                "3,4-methylenedioxymethamphetamine",
                "A-234 nerve agent",
                "L-BMAA",
                "L-dopa",
                "acrylamide",
                "amphetamine",
                "hexane",
                "hydrogen peroxide",
                "ketamine",
                "lead-207",
                "methamphetamine",
                "phencyclidine",
                "tetrodotoxin"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    }
]