[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Alprazolam (Xanax)",
            "Benzodiazepine",
            "Buspirone",
            "Chlordiazepoxide (Librium)",
            "Clonazepam (Klonopin)",
            "Clorazepate (Tranxene)",
            "Diazepam (Valium)",
            "Doxepin",
            "Flurazepam (Dalmane)",
            "Gabapentin",
            "Lorazepam (Ativan)",
            "Midazolam (Versed)",
            "Minoxidil",
            "Oxazepam (Serax)",
            "Pregabalin (Lyrica)",
            "Quazepam (Doral)",
            "Ramelteon",
            "Secobarbital (Seconal)",
            "Temazepam (Restoril)",
            "Trazodone",
            "Triazolam (Halcion)",
            "Zaleplon (Sonata)",
            "Zolpidem (Ambien)",
            "Zopiclone (Imovane)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alprazolam (Xanax)",
                "Buspirone",
                "Chlordiazepoxide (Librium)",
                "Diazepam (Valium)",
                "Flurazepam (Dalmane)",
                "Midazolam (Versed)",
                "Secobarbital (Seconal)",
                "Trazodone",
                "Triazolam (Halcion)",
                "Zaleplon (Sonata)",
                "Zolpidem (Ambien)",
                "Zopiclone (Imovane)"
            ],
            "mismatches": [
                "Benzodiazepine",
                "Clonazepam (Klonopin)",
                "Clorazepate (Tranxene)",
                "Doxepin",
                "Gabapentin",
                "Lorazepam (Ativan)",
                "Minoxidil",
                "Oxazepam (Serax)",
                "Pregabalin (Lyrica)",
                "Quazepam (Doral)",
                "Ramelteon",
                "Temazepam (Restoril)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "(R)-thalidomide",
                "(Z)-chlorprothixene",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "chlorzoxazone",
                "dexmedetomidine",
                "diazepam",
                "doxylamine",
                "eszopiclone",
                "flunitrazepam",
                "flurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "nordazepam",
                "pacatal",
                "phenobarbital",
                "promethazine",
                "secobarbital",
                "secobarbital sodium",
                "thalidomide",
                "trazodone",
                "triazolam",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Alprazolam (Xanax)",
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Midazolam (Versed)",
            "Chlordiazepoxide (Librium)",
            "Oxazepam (Serax)",
            "Temazepam (Restoril)",
            "Lidocaine (Lignocaine) - topical",
            "Hydroxyzine (Atarax)",
            "Diphenhydramine (Benadryl)",
            "Melatonin",
            "Valerian Root Extract",
            "Passiflora incarnata (Passionflower)",
            "L-theanine",
            "Tryptophan",
            "Magnesium (as a supplement)",
            "GABA (Gamma-Aminobutyric Acid) supplements",
            "Lavender Oil",
            "Chamomile Tea",
            "Cannabidiol (CBD)",
            "Zolpidem (Ambien)",
            "Zaleplon (Sonata)",
            "Eszopiclone (Lunesta)",
            "Trazodone (Desyrel)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alprazolam (Xanax)",
                "Diazepam (Valium)",
                "Midazolam (Versed)",
                "Chlordiazepoxide (Librium)",
                "Diphenhydramine (Benadryl)",
                "Zolpidem (Ambien)",
                "Zaleplon (Sonata)",
                "Eszopiclone (Lunesta)",
                "Trazodone (Desyrel)"
            ],
            "mismatches": [
                "Lorazepam (Ativan)",
                "Oxazepam (Serax)",
                "Temazepam (Restoril)",
                "Lidocaine (Lignocaine) - topical",
                "Hydroxyzine (Atarax)",
                "Melatonin",
                "Valerian Root Extract",
                "Passiflora incarnata (Passionflower)",
                "L-theanine",
                "Tryptophan",
                "Magnesium (as a supplement)",
                "GABA (Gamma-Aminobutyric Acid) supplements",
                "Lavender Oil",
                "Chamomile Tea",
                "Cannabidiol (CBD)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-thalidomide",
                "3alpha-hydroxy-5beta-pregnan-20-one",
                "alprazolam",
                "brexanolone",
                "butalbital",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "eszopiclone",
                "flurazepam",
                "lactucin",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "nordazepam",
                "promethazine",
                "remifentanil",
                "secobarbital",
                "thalidomide",
                "thiamylal",
                "thiopental",
                "trazodone",
                "trazodone hydrochloride",
                "triazolam",
                "valerenic acid",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Alprazolam (Xanax)",
            "Clonazepam (Klonopin)",
            "Chlordiazepoxide (Librium)",
            "Oxazepam (Serax)",
            "Temazepam (Restoril)",
            "Midazolam (Versed)",
            "Zolpidem (Ambien)",
            "Zopiclone (Imovane)",
            "Eszopiclone (Lunesta)",
            "Ramelteon (Rozerem)",
            "Suvorexant (Belsomra)",
            "Doxylamine (Unisom)",
            "Diphenhydramine (Benadryl)",
            "Hydroxyzine (Vistaril)",
            "Chamomile",
            "Valerian",
            "Kava",
            "Lavender",
            "Melatonin",
            "Ashwagandha",
            "Passionflower",
            "GABA (Gamma-Aminobutyric Acid)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (Valium)",
                "Alprazolam (Xanax)",
                "Chlordiazepoxide (Librium)",
                "Midazolam (Versed)",
                "Zolpidem (Ambien)",
                "Zopiclone (Imovane)",
                "Eszopiclone (Lunesta)",
                "Doxylamine (Unisom)",
                "Diphenhydramine (Benadryl)"
            ],
            "mismatches": [
                "Lorazepam (Ativan)",
                "Clonazepam (Klonopin)",
                "Oxazepam (Serax)",
                "Temazepam (Restoril)",
                "Ramelteon (Rozerem)",
                "Suvorexant (Belsomra)",
                "Hydroxyzine (Vistaril)",
                "Chamomile",
                "Valerian",
                "Kava",
                "Lavender",
                "Melatonin",
                "Ashwagandha",
                "Passionflower",
                "GABA (Gamma-Aminobutyric Acid)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "alprazolam",
                "brexanolone",
                "capuride",
                "chlordiazepoxide",
                "chlorzoxazone",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "eszopiclone",
                "etorphine",
                "flurazepam",
                "lormetazepam",
                "midazolam",
                "nordazepam",
                "phenobarbital",
                "promethazine",
                "promethazine hydrochloride",
                "thalidomide",
                "trazodone",
                "triazolam",
                "valerenic acid",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (DZP)",
            "Lorazepam (LRZ)",
            "Alprazolam (ALP)",
            "Melatonin (MEL)",
            "Chamomile Tea (CHT)",
            "Lavender Oil (LVO)",
            "Valerian Root (VRR)",
            "Passionflower (PAS)",
            "Kava (KAV)",
            "Lemon Balm (LBM)",
            "Magnesium (MG)",
            "L-Theanine (L-TH)",
            "GABA (GABA)",
            "Phenobarbital (PB)",
            "Chlorpromazine (CPZ)",
            "Risperidone (RIS)",
            "Quetiapine (QTP)",
            "Olanzapine (OLA)",
            "Amitriptyline (AMI)",
            "Buspirone (BUS)",
            "Propranolol (PRP)",
            "Hydroxyzine (HYD)",
            "Diphenhydramine (DPH)",
            "Cannabidiol (CBD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (DZP)",
                "Alprazolam (ALP)",
                "Phenobarbital (PB)",
                "Buspirone (BUS)",
                "Diphenhydramine (DPH)"
            ],
            "mismatches": [
                "Lorazepam (LRZ)",
                "Melatonin (MEL)",
                "Chamomile Tea (CHT)",
                "Lavender Oil (LVO)",
                "Valerian Root (VRR)",
                "Passionflower (PAS)",
                "Kava (KAV)",
                "Lemon Balm (LBM)",
                "Magnesium (MG)",
                "L-Theanine (L-TH)",
                "GABA (GABA)",
                "Chlorpromazine (CPZ)",
                "Risperidone (RIS)",
                "Quetiapine (QTP)",
                "Olanzapine (OLA)",
                "Amitriptyline (AMI)",
                "Propranolol (PRP)",
                "Hydroxyzine (HYD)",
                "Cannabidiol (CBD)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "(S)-aceprometazine",
                "3alpha-hydroxy-5beta-pregnan-20-one",
                "4-hydroxybutyric acid",
                "aceprometazine",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "capuride",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "etorphine",
                "flurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "phenobarbital",
                "promethazine",
                "promethazine hydrochloride",
                "propiomazine",
                "propofol",
                "secobarbital",
                "thiamylal",
                "trazodone",
                "triazolam",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Alprazolam (Xanax)",
            "Clonazepam (Klonopin)",
            "Chlordiazepoxide (Librium)",
            "Oxazepam (Serax)",
            "Temazepam (Restoril)",
            "Midazolam (Versed)",
            "Triazolam (Halcion)",
            "Phenobarbital",
            "Pentobarbital (Nembutal)",
            "Secobarbital (Seconal)",
            "Zolpidem (Ambien)",
            "Eszopiclone (Lunesta)",
            "Zaleplon (Sonata)",
            "Buspirone (Buspar)",
            "Doxepin (Silenor)",
            "Hydroxyzine (Vistaril)",
            "Meprobamate (Miltown)",
            "Chloral hydrate",
            "Ramelteon (Rozerem)",
            "Trazodone",
            "Amitriptyline",
            "Mirtazapine (Remeron)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam",
                "Alprazolam",
                "Chlordiazepoxide",
                "Midazolam",
                "Triazolam",
                "Phenobarbital",
                "Secobarbital",
                "Zolpidem",
                "Eszopiclone",
                "Zaleplon",
                "Buspirone",
                "Chloral hydrate",
                "Trazodone"
            ],
            "mismatches": [
                "Lorazepam",
                "Clonazepam",
                "Oxazepam",
                "Temazepam",
                "Pentobarbital",
                "Doxepin",
                "Hydroxyzine",
                "Meprobamate",
                "Ramelteon",
                "Amitriptyline",
                "Mirtazapine"
            ],
            "true_referents": [
                "(E)-chlorprothixene",
                "(R)-aceprometazine",
                "(R)-thalidomide",
                "(Z)-chlorprothixene",
                "aceprometazine",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chloral hydrate",
                "chlordiazepoxide",
                "diazepam",
                "doxylamine",
                "eszopiclone",
                "flurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "nordazepam",
                "phenobarbital",
                "promethazine",
                "promethazine hydrochloride",
                "propiomazine",
                "secobarbital",
                "secobarbital sodium",
                "thalidomide",
                "trazodone",
                "triazolam",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Alprazolam (Xanax)",
            "Clonazepam (Klonopin)",
            "Buspirone (Buspar)",
            "Hydroxyzine (Atarax)",
            "Diphenhydramine (Benadryl)",
            "Melatonin (Melatonin)",
            "Valerian root (Valerian)",
            "Kava (Kava)",
            "Chamomile (Chamomile)",
            "L-theanine (L-theanine)",
            "Lavender oil (Lavender)",
            "Passionflower (Passionflower)",
            "5-HTP (5-Hydroxytryptophan)",
            "Ashwagandha (Withania somnifera)",
            "Glycine (Glycine)",
            "Magnesium (Magnesium)",
            "Trazodone (Desyrel)",
            "Quetiapine (Seroquel)",
            "Gabapentin (Neurontin)",
            "Pregabalin (Lyrica)",
            "Sertraline (Zoloft)",
            "Fluoxetine (Prozac)",
            "Citalopram (Celexa)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam",
                "Alprazolam",
                "Buspirone",
                "Diphenhydramine",
                "Trazodone"
            ],
            "mismatches": [
                "Lorazepam",
                "Clonazepam",
                "Hydroxyzine",
                "Melatonin",
                "Valerian root",
                "Kava",
                "Chamomile",
                "L-theanine",
                "Lavender oil",
                "Passionflower",
                "5-HTP",
                "Ashwagandha",
                "Glycine",
                "Magnesium",
                "Quetiapine",
                "Gabapentin",
                "Pregabalin",
                "Sertraline",
                "Fluoxetine",
                "Citalopram"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(E)-chlorprothixene",
                "(R)-aceprometazine",
                "(S)-aceprometazine",
                "3alpha-hydroxy-5beta-pregnan-20-one",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "flurazepam",
                "mianserin",
                "midazolam",
                "promethazine",
                "remifentanil",
                "thiamylal",
                "trazodone",
                "trazodone hydrochloride",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 5,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (DZP)",
            "Alprazolam (Xanax)",
            "Lorazepam (Ativan)",
            "Clonazepam (Klonopin)",
            "Chlordiazepoxide (Librium)",
            "Oxazepam (Serax)",
            "Temazepam (Restoril)",
            "Triazolam (Halcion)",
            "Flurazepam (Dalmane)",
            "Estazolam (ProSom)",
            "Quazepam (Doral)",
            "Zolpidem (Ambien)",
            "Zaleplon (Sonata)",
            "Eszopiclone (Lunesta)",
            "Ramelteon (Rozerem)",
            "Buspirone (Buspar)",
            "Meprobamate (Miltown)",
            "Barbiturates (Phenobarbital)",
            "Methaqualone (Quaaludes)",
            "Antihistamines (Diphenhydramine)",
            "Chloral Hydrate (Noctec)",
            "Paraldehyde (Paral)",
            "Ethchlorvynol (Placidyl)",
            "Methyprylon (Noludar)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (DZP)",
                "Alprazolam (Xanax)",
                "Chlordiazepoxide (Librium)",
                "Triazolam (Halcion)",
                "Flurazepam (Dalmane)",
                "Zolpidem (Ambien)",
                "Zaleplon (Sonata)",
                "Eszopiclone (Lunesta)",
                "Buspirone (Buspar)",
                "Phenobarbital",
                "Methaqualone (Quaaludes)",
                "Diphenhydramine",
                "Chloral Hydrate (Noctec)",
                "Paraldehyde (Paral)",
                "Ethchlorvynol (Placidyl)"
            ],
            "mismatches": [
                "Lorazepam (Ativan)",
                "Clonazepam (Klonopin)",
                "Oxazepam (Serax)",
                "Temazepam (Restoril)",
                "Estazolam (ProSom)",
                "Quazepam (Doral)",
                "Ramelteon (Rozerem)",
                "Meprobamate (Miltown)",
                "Methyprylon (Noludar)"
            ],
            "true_referents": [
                "(2R,3S,11bS)-benzquinamide",
                "(E)-chlorprothixene",
                "(R)-aceprometazine",
                "(R)-thalidomide",
                "(Z)-chlorprothixene",
                "4-hydroxybutyric acid",
                "aceprometazine",
                "alprazolam",
                "benzquinamide",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chloral hydrate",
                "chlordiazepoxide",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "eszopiclone",
                "ethchlorvynol",
                "etorphine",
                "eucommiol",
                "flunitrazepam",
                "flurazepam",
                "lormetazepam",
                "methapyrilene",
                "methapyrilene hydrochloride",
                "methaqualone",
                "mianserin",
                "midazolam",
                "nordazepam",
                "paraldehyde",
                "phenobarbital",
                "promethazine",
                "propiomazine",
                "secobarbital",
                "thalidomide",
                "triazolam",
                "valerenic acid",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Lavender essential oil",
            "Valerian root extract",
            "Chamomile tea",
            "Melatonin supplement",
            "Benzodiazepines (e.g. Diazepam)",
            "Antihistamines (e.g. Diphenhydramine)",
            "GABA (gamma-aminobutyric acid)",
            "Magnesium supplement",
            "Passionflower extract",
            "Kava root extract",
            "Lemon balm tea",
            "Ashwagandha supplement",
            "L-theanine supplement",
            "Hops extract",
            "Skullcap extract",
            "Holy basil tea",
            "Linden flower tea",
            "Rose essential oil",
            "Peppermint tea",
            "St. John's Wort extract",
            "Glycine supplement",
            "Catnip tea",
            "Lemon verbena tea",
            "L-theanine supplement"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam",
                "Diphenhydramine"
            ],
            "mismatches": [
                "Lavender essential oil",
                "Valerian root extract",
                "Chamomile tea",
                "Melatonin supplement",
                "Benzodiazepines (e.g. Diazepam)",
                "Antihistamines (e.g. Diphenhydramine)",
                "GABA (gamma-aminobutyric acid)",
                "Magnesium supplement",
                "Passionflower extract",
                "Kava root extract",
                "Lemon balm tea",
                "Ashwagandha supplement",
                "L-theanine supplement",
                "Hops extract",
                "Skullcap extract",
                "Holy basil tea",
                "Linden flower tea",
                "Rose essential oil",
                "Peppermint tea",
                "St. John's Wort extract",
                "Glycine supplement",
                "Catnip tea",
                "Lemon verbena tea"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "adinazolam",
                "alprazolam",
                "brexanolone",
                "butalbital",
                "chlordiazepoxide",
                "cinolazepam",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "flunitrazepam",
                "hydroxyethylflurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "phenobarbital",
                "promethazine",
                "propiomazine",
                "secobarbital",
                "terpinolene",
                "thiamylal",
                "trazodone",
                "triazolam",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 2,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Alprazolam (Xanax)",
            "Clonazepam (Klonopin)",
            "Temazepam (Restoril)",
            "Hydroxyzine (Atarax)",
            "Buspirone (Buspar)",
            "Valerian Root",
            "Gabapentin",
            "Trazodone",
            "Amitriptyline",
            "Melatonin",
            "Propranolol",
            "Quetiapine (Seroquel)",
            "Zolpidem (Ambien)",
            "Eszopiclone (Lunesta)",
            "Pregabalin (Lyrica)",
            "Sertraline (Zoloft)",
            "Chloral Hydrate",
            "Phenobarbital",
            "Bromazepam (Lexotan)",
            "Meprobamate",
            "Midazolam",
            "Clonidine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (Valium)",
                "Alprazolam (Xanax)",
                "Buspirone (Buspar)",
                "Trazodone",
                "Zolpidem (Ambien)",
                "Eszopiclone (Lunesta)",
                "Chloral Hydrate",
                "Phenobarbital",
                "Midazolam"
            ],
            "mismatches": [
                "Lorazepam (Ativan)",
                "Clonazepam (Klonopin)",
                "Temazepam (Restoril)",
                "Hydroxyzine (Atarax)",
                "Valerian Root",
                "Gabapentin",
                "Amitriptyline",
                "Melatonin",
                "Propranolol",
                "Quetiapine (Seroquel)",
                "Pregabalin (Lyrica)",
                "Sertraline (Zoloft)",
                "Bromazepam (Lexotan)",
                "Meprobamate",
                "Clonidine"
            ],
            "true_referents": [
                "(E)-chlorprothixene",
                "(Z)-chlorprothixene",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chloral hydrate",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "doxylamine",
                "eszopiclone",
                "flurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "phenobarbital",
                "promethazine",
                "propofol",
                "secobarbital",
                "trazodone",
                "valerenic acid",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Diazepam (Valium)",
            "Alprazolam (Xanax)",
            "Lorazepam (Ativan)",
            "Clonazepam (Klonopin)",
            "Chamomile tea",
            "Lavender essential oil",
            "Melatonin",
            "Valerian root",
            "L-theanine",
            "Passionflower extract",
            "Gamma-aminobutyric acid (GABA)",
            "Kava kava",
            "Lemon balm",
            "Magnesium glycinate",
            "Ashwagandha",
            "5-Hydroxytryptophan (5-HTP)",
            "Phenobarbital",
            "Zolpidem (Ambien)",
            "Trazodone",
            "Buspirone (BuSpar)",
            "Hydroxyzine (Vistaril)",
            "Pregabalin (Lyrica)",
            "Gabapentin (Neurontin)",
            "Propranolol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (Valium)",
                "Alprazolam (Xanax)",
                "Phenobarbital",
                "Zolpidem (Ambien)",
                "Trazodone",
                "Buspirone (BuSpar)"
            ],
            "mismatches": [
                "Lorazepam (Ativan)",
                "Clonazepam (Klonopin)",
                "Chamomile tea",
                "Lavender essential oil",
                "Melatonin",
                "Valerian root",
                "L-theanine",
                "Passionflower extract",
                "Gamma-aminobutyric acid (GABA)",
                "Kava kava",
                "Lemon balm",
                "Magnesium glycinate",
                "Ashwagandha",
                "5-Hydroxytryptophan (5-HTP)",
                "Hydroxyzine (Vistaril)",
                "Pregabalin (Lyrica)",
                "Gabapentin (Neurontin)",
                "Propranolol"
            ],
            "true_referents": [
                "(S)-aceprometazine",
                "4-hydroxybutyric acid",
                "alprazolam",
                "brexanolone",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine salicylate",
                "doxylamine",
                "flurazepam",
                "hydroxyethylflurazepam",
                "mianserin",
                "midazolam",
                "phenobarbital",
                "promethazine",
                "promethazine hydrochloride",
                "propofol",
                "remifentanil",
                "secobarbital",
                "thiamylal",
                "trazodone",
                "triazolam",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Lavender (Lavandula)",
            "Chamomile (Matricaria chamomilla)",
            "Valerian root (Valeriana officinalis)",
            "Lemon balm (Melissa officinalis)",
            "Passionflower (Passiflora incarnata)",
            "Kava kava (Piper methysticum)",
            "St. John's Wort (Hypericum perforatum)",
            "Ashwagandha (Withania somnifera)",
            "L-Theanine",
            "GABA (Gamma-Aminobutyric Acid)",
            "Melatonin",
            "Magnesium",
            "Alprazolam (Xanax)",
            "Diazepam (Valium)",
            "Lorazepam (Ativan)",
            "Clonazepam (Klonopin)",
            "Buspirone (Buspar)",
            "Hydroxyzine (Atarax, Vistaril)",
            "Trazodone",
            "Gabapentin (Neurontin)",
            "Pregabalin (Lyrica)",
            "Phenibut",
            "Kratom (Mitragyna speciosa)",
            "CBD (Cannabidiol)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "alprazolam",
                "diazepam",
                "buspirone",
                "trazodone"
            ],
            "mismatches": [
                "Lavender (Lavandula)",
                "Chamomile (Matricaria chamomilla)",
                "Valerian root (Valeriana officinalis)",
                "Lemon balm (Melissa officinalis)",
                "Passionflower (Passiflora incarnata)",
                "Kava kava (Piper methysticum)",
                "St. John's Wort (Hypericum perforatum)",
                "Ashwagandha (Withania somnifera)",
                "L-Theanine",
                "GABA (Gamma-Aminobutyric Acid)",
                "Melatonin",
                "Magnesium",
                "Lorazepam (Ativan)",
                "Clonazepam (Klonopin)",
                "Hydroxyzine (Atarax, Vistaril)",
                "Gabapentin (Neurontin)",
                "Pregabalin (Lyrica)",
                "Phenibut",
                "Kratom (Mitragyna speciosa)",
                "CBD (Cannabidiol)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "3alpha-hydroxy-5beta-pregnan-20-one",
                "alprazolam",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine salicylate",
                "doxylamine",
                "flurazepam",
                "mianserin",
                "midazolam",
                "phenobarbital",
                "promethazine",
                "remifentanil",
                "thiamylal",
                "trazodone",
                "triazolam",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Valerian (VL)",
            "Chamomile (CM)",
            "Lavender (LV)",
            "Melatonin (MT)",
            "Kava (KV)",
            "Passionflower (PF)",
            "Lemon balm (LB)",
            "St. John's wort (SJW)",
            "Glycine (GLY)",
            "Zolpidem (ZLD)",
            "Alprazolam (ALP)",
            "Diazepam (DZP)",
            "Lorazepam (LZP)",
            "Clonazepam (CZP)",
            "Temazepam (TZP)",
            "Ramelteon (RML)",
            "Doxepin (DXP)",
            "Trazodone (TRZ)",
            "Amitriptyline (AMT)",
            "Mirtazapine (MRZ)",
            "Diphenhydramine (DPH)",
            "Doxylamine (DXL)",
            "Cannabidiol (CBD)",
            "Gamma-Aminobutyric Acid (GABA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Zolpidem",
                "Alprazolam",
                "Diazepam",
                "Trazodone",
                "Diphenhydramine",
                "Doxylamine"
            ],
            "mismatches": [
                "Valerian (VL)",
                "Chamomile (CM)",
                "Lavender (LV)",
                "Melatonin (MT)",
                "Kava (KV)",
                "Passionflower (PF)",
                "Lemon balm (LB)",
                "St. John's wort (SJW)",
                "Glycine (GLY)",
                "Lorazepam (LZP)",
                "Clonazepam (CZP)",
                "Temazepam (TZP)",
                "Ramelteon (RML)",
                "Doxepin (DXP)",
                "Amitriptyline (AMT)",
                "Mirtazapine (MRZ)",
                "Cannabidiol (CBD)",
                "Gamma-Aminobutyric Acid (GABA)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "(R)-thalidomide",
                "(S)-aceprometazine",
                "4-hydroxybutyric acid",
                "adinazolam",
                "alprazolam",
                "butalbital",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "etorphine",
                "flurazepam",
                "mianserin",
                "midazolam",
                "nitrazepam",
                "phenobarbital",
                "promethazine",
                "thiamylal",
                "trazodone",
                "trazodone hydrochloride",
                "triazolam",
                "valerenic acid",
                "zaleplon",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Aspirin (ASA)",
            "Valium (DZP)",
            "Ibuprofen (IBU)",
            "Xanax (ALP)",
            "Chamomile (CHA)",
            "Lavender (LAV)",
            "Melatonin (MEL)",
            "Diazepam (DZP)",
            "Lorazepam (LOR)",
            "Clonazepam (CLO)",
            "Alprazolam (ALP)",
            "Zolpidem (ZOL)",
            "Trazodone (TRA)",
            "Gabapentin (GAB)",
            "Pregabalin (PRE)",
            "Kava (KAV)",
            "Passionflower (PAS)",
            "Lemon Balm (LEM)",
            "Hops (HOP)",
            "Chamomile Tea (CHA)",
            "Lavender Oil (LAV)",
            "Valerian (VAL)",
            "Magnesium (MAG)",
            "Glycine (GLY)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diazepam (DZP)",
                "Alprazolam (ALP)",
                "Zolpidem (ZOL)",
                "Trazodone (TRA)"
            ],
            "mismatches": [
                "Aspirin (ASA)",
                "Valium (DZP)",
                "Ibuprofen (IBU)",
                "Chamomile (CHA)",
                "Lavender (LAV)",
                "Melatonin (MEL)",
                "Lorazepam (LOR)",
                "Clonazepam (CLO)",
                "Gabapentin (GAB)",
                "Pregabalin (PRE)",
                "Kava (KAV)",
                "Passionflower (PAS)",
                "Lemon Balm (LEM)",
                "Hops (HOP)",
                "Chamomile Tea (CHA)",
                "Lavender Oil (LAV)",
                "Valerian (VAL)",
                "Magnesium (MAG)",
                "Glycine (GLY)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "(S)-aceprometazine",
                "alprazolam",
                "butalbital",
                "capuride",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine salicylate",
                "doxylamine",
                "etorphine",
                "flurazepam",
                "lormetazepam",
                "midazolam",
                "phenobarbital",
                "secobarbital",
                "thalidomide",
                "thiamylal",
                "trazodone",
                "trazodone hydrochloride",
                "triazolam",
                "valerenic acid",
                "zolpidem"
            ],
            "TP": 4,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Benzodiazepine (BZD) (Alprazolam)",
            "Barbiturate (Barbital)",
            "Chlordiazepoxide",
            "Clonazepam",
            "Diazepam",
            "Flurazepam",
            "Lorazepam",
            "Midazolam",
            "Oxazepam",
            "Temazepam",
            "Triazolam",
            "Buspirone",
            "Gabapentin",
            "Pregabalin",
            "Valproate",
            "Clobazam",
            "Clonidine",
            "Hydroxyzine",
            "Meprobamate",
            "Methaqualone",
            "Nimetazepam",
            "Nitrazepam",
            "Phenobarbital",
            "Secobarbital",
            "Sodium oxybate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alprazolam",
                "Chlordiazepoxide",
                "Diazepam",
                "Flurazepam",
                "Midazolam",
                "Triazolam",
                "Buspirone",
                "Methaqualone",
                "Nimetazepam",
                "Nitrazepam",
                "Phenobarbital",
                "Secobarbital"
            ],
            "mismatches": [
                "Barbiturate (Barbital)",
                "Clonazepam",
                "Lorazepam",
                "Oxazepam",
                "Temazepam",
                "Gabapentin",
                "Pregabalin",
                "Valproate",
                "Clobazam",
                "Clonidine",
                "Hydroxyzine",
                "Meprobamate",
                "Sodium oxybate"
            ],
            "true_referents": [
                "alprazolam",
                "benzquinamide",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "dexmedetomidine",
                "diazepam",
                "doxylamine",
                "flunitrazepam",
                "flurazepam",
                "methaqualone",
                "mianserin",
                "midazolam",
                "nimetazepam",
                "nitrazepam",
                "nordazepam",
                "phenobarbital",
                "promethazine",
                "secobarbital",
                "secobarbital sodium",
                "trazodone",
                "triazolam",
                "zolpidem"
            ],
            "TP": 12,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            "Alprazolam (Xanax)",
            "Chlordiazepoxide (Librium)",
            "Diazepam (Valium)",
            "Clonazepam (Klonopin)",
            "Lorazepam (Ativan)",
            "Oxazepam (Serax)",
            "Flunitrazepam (Rohypnol)",
            "Estazolam (ProSom)",
            "Temazepam (Restoril)",
            "Triazolam (Halcion)",
            "Midazolam (Versed)",
            "Buspirone (Buspar)",
            "Hydroxyzine (Vistaril)",
            "Chlorpromazine (Thorazine)",
            "Promethazine (Phenergan)",
            "Diphenhydramine (Benadryl)",
            "Carisoprodol (Soma)",
            "Cyclobenzaprine (Flexeril)",
            "Methocarbamol (Robaxin)",
            "Metaxalone (Skelaxin)",
            "Chlorzoxazone (Parafon Forte)",
            "Baclofen (Kemstro)",
            "Tizanidine (Zanaflex)",
            "Gabapentin (Neurontin)",
            "Pregabalin (Lyrica)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alprazolam (Xanax)",
                "Chlordiazepoxide (Librium)",
                "Diazepam (Valium)",
                "Flunitrazepam (Rohypnol)",
                "Triazolam (Halcion)",
                "Midazolam (Versed)",
                "Buspirone (Buspar)",
                "Diphenhydramine (Benadryl)",
                "Chlorzoxazone (Parafon Forte)",
                "Promethazine (Phenergan)"
            ],
            "mismatches": [
                "Clonazepam (Klonopin)",
                "Lorazepam (Ativan)",
                "Oxazepam (Serax)",
                "Estazolam (ProSom)",
                "Temazepam (Restoril)",
                "Hydroxyzine (Vistaril)",
                "Chlorpromazine (Thorazine)",
                "Carisoprodol (Soma)",
                "Cyclobenzaprine (Flexeril)",
                "Methocarbamol (Robaxin)",
                "Metaxalone (Skelaxin)",
                "Baclofen (Kemstro)",
                "Tizanidine (Zanaflex)",
                "Gabapentin (Neurontin)",
                "Pregabalin (Lyrica)"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(R)-aceprometazine",
                "(R)-thalidomide",
                "(S)-aceprometazine",
                "(Z)-chlorprothixene",
                "aceprometazine",
                "alprazolam",
                "brexanolone",
                "buspirone",
                "buspirone hydrochloride",
                "butalbital",
                "chlordiazepoxide",
                "chlorprothixene",
                "chlorzoxazone",
                "dexmedetomidine",
                "diazepam",
                "diphenhydramine",
                "diphenhydramine hydrochloride",
                "diphenhydramine salicylate",
                "doxylamine",
                "flunitrazepam",
                "flurazepam",
                "lormetazepam",
                "mianserin",
                "midazolam",
                "nitrazepam",
                "nordazepam",
                "phenobarbital",
                "promethazine",
                "promethazine hydrochloride",
                "propiomazine",
                "remifentanil",
                "secobarbital",
                "triazolam",
                "zolpidem",
                "zopiclone"
            ],
            "TP": 10,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "sedative",
        "domain": "medicine",
        "response": [
            {
                "Referent": "valerian root",
                "Canonical Name": "Valeriana officinalis"
            },
            {
                "Referent": "passionflower",
                "Canonical Name": "Passiflora incarnata"
            },
            {
                "Referent": "lemon balm",
                "Canonical Name": "Melissa officinalis"
            },
            {
                "Referent": "lavender",
                "Canonical Name": "Lavandula angustifolia"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "kava",
                "Canonical Name": "Piper methysticum"
            },
            {
                "Referent": "valerian",
                "Canonical Name": "Valeriana officinalis"
            },
            {
                "Referent": "lemon verbena",
                "Canonical Name": "Aloysia triphylla"
            },
            {
                "Referent": "catnip",
                "Canonical Name": "Nepeta cataria"
            },
            {
                "Referent": "hops",
                "Canonical Name": "Humulus lupulus"
            },
            {
                "Referent": "passionflower",
                "Canonical Name": "Passiflora incarnata"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            },
            {
                "Referent": "chamomile",
                "Canonical Name": "Matricaria chamomilla"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "valerian root",
                "passionflower",
                "lemon balm",
                "lavender",
                "chamomile",
                "kava",
                "valerian",
                "lemon verbena",
                "catnip",
                "hops"
            ],
            "true_referents": [
                "(5R)-zopiclone",
                "(E)-chlorprothixene",
                "(R)-aceprometazine",
                "(S)-aceprometazine",
                "3alpha-hydroxy-5beta-pregnan-20-one",
                "butalbital",
                "diphenhydramine",
                "phenobarbital",
                "valerenic acid"
            ],
            "TP": 0,
            "FP": 10,
            "FN": 0
        }
    }
]