[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Lactulose (LAC)",
            "Docusate (DSS)",
            "Senna (SEN)",
            "Polyethylene glycol (PEG)",
            "Aloe vera (ALV)",
            "Psyllium (PSY)",
            "Methylcellulose (MC)",
            "Linaclotide (LIN)",
            "Ispaghula husk (ISPH)",
            "Loperamide (LOP)",
            "Hydroxypropyl methylcellulose (HPMC)",
            "Linseed (LINSE)",
            "Magnesium hydroxide (MAGH)",
            "Methyl salicylate (MS)",
            "Prunes (PRU)",
            "Polyoxyl 35 castor oil (POCO)",
            "Terminallia chebula (TERCHE)",
            "Triphala (TRI)",
            "Castor oil (CASO)",
            "Sorbitol (SOR)",
            "Magnesium citrate (MAGC)",
            "Rhubarb (RHUB)",
            "Cascara sagrada (CASCAS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lactulose (LAC)",
                "Docusate (DSS)",
                "Senna (SEN)",
                "Polyethylene glycol (PEG)",
                "Aloe vera (ALV)",
                "Psyllium (PSY)",
                "Methylcellulose (MC)",
                "Linaclotide (LIN)",
                "Ispaghula husk (ISPH)",
                "Loperamide (LOP)",
                "Hydroxypropyl methylcellulose (HPMC)",
                "Linseed (LINSE)",
                "Magnesium hydroxide (MAGH)",
                "Methyl salicylate (MS)",
                "Prunes (PRU)",
                "Polyoxyl 35 castor oil (POCO)",
                "Terminallia chebula (TERCHE)",
                "Triphala (TRI)",
                "Castor oil (CASO)",
                "Sorbitol (SOR)",
                "Magnesium citrate (MAGC)",
                "Rhubarb (RHUB)",
                "Cascara sagrada (CASCAS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aloe vera (ALV)",
                "Cascara sagrada (CASCAS)",
                "Castor oil (CASO)",
                "Docusate (DSS)",
                "Hydroxypropyl methylcellulose (HPMC)",
                "Ispaghula husk (ISPH)",
                "Lactulose (LAC)",
                "Linaclotide (LIN)",
                "Linseed (LINSE)",
                "Loperamide (LOP)",
                "Magnesium citrate (MAGC)",
                "Magnesium hydroxide (MAGH)",
                "Methyl salicylate (MS)",
                "Methylcellulose (MC)",
                "Polyethylene glycol (PEG)",
                "Polyoxyl 35 castor oil (POCO)",
                "Prunes (PRU)",
                "Psyllium (PSY)",
                "Rhubarb (RHUB)",
                "Senna (SEN)",
                "Sorbitol (SOR)",
                "Terminallia chebula (TERCHE)",
                "Triphala (TRI)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate Sodium (DOC-S)",
            "Docusate Calcium (DOC-Ca)",
            "Docusate Potassium (DOC-K)",
            "Bisacodyl (BIS)",
            "Lubiprostone (Amitiza)",
            "Linaclotide (Linzess)",
            "Lactulose (LactuLose)",
            "Magnesium Hydroxide (Milk of Magnesia)",
            "Magnesium Citrate (Citrate of Magnesia)",
            "Polyethylene Glycol 3350 (Miralax)",
            "Senna (Senokot)",
            "Bisacodyl Suppositories (Dulcolax Suppositories)",
            "Glycerin Suppositories (GlycoLax)",
            "Mineral Oil (Mineral Oil Enema)",
            "Docusate with Sorbitol (Surfak with Sorbitol)",
            "Ispaghula Husk (Fybogel)",
            "Psyllium (Metamucil)",
            "Methylcellulose (Citrucel)",
            "Castor Oil (Castor Oil Suppositories)",
            "Senna Leaf Extract (Senokot S)",
            "Sodium Picosulfate (Prepopik)",
            "Magnesium Oxide (Mag-Ox 400)",
            "Docusate with Bisacodyl (Dulcolax)",
            "Sodium Phosphate (Fleet Phospho-soda)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate Sodium (DOC-S)",
                "Docusate Calcium (DOC-Ca)",
                "Docusate Potassium (DOC-K)",
                "Bisacodyl (BIS)",
                "Lubiprostone (Amitiza)",
                "Linaclotide (Linzess)",
                "Lactulose (LactuLose)",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Magnesium Citrate (Citrate of Magnesia)",
                "Polyethylene Glycol 3350 (Miralax)",
                "Senna (Senokot)",
                "Bisacodyl Suppositories (Dulcolax Suppositories)",
                "Glycerin Suppositories (GlycoLax)",
                "Mineral Oil (Mineral Oil Enema)",
                "Docusate with Sorbitol (Surfak with Sorbitol)",
                "Ispaghula Husk (Fybogel)",
                "Psyllium (Metamucil)",
                "Methylcellulose (Citrucel)",
                "Castor Oil (Castor Oil Suppositories)",
                "Senna Leaf Extract (Senokot S)",
                "Sodium Picosulfate (Prepopik)",
                "Magnesium Oxide (Mag-Ox 400)",
                "Docusate with Bisacodyl (Dulcolax)",
                "Sodium Phosphate (Fleet Phospho-soda)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisacodyl (BIS)",
                "Bisacodyl Suppositories (Dulcolax Suppositories)",
                "Castor Oil (Castor Oil Suppositories)",
                "Docusate Calcium (DOC-Ca)",
                "Docusate Potassium (DOC-K)",
                "Docusate Sodium (DOC-S)",
                "Docusate with Bisacodyl (Dulcolax)",
                "Docusate with Sorbitol (Surfak with Sorbitol)",
                "Glycerin Suppositories (GlycoLax)",
                "Ispaghula Husk (Fybogel)",
                "Lactulose (LactuLose)",
                "Linaclotide (Linzess)",
                "Lubiprostone (Amitiza)",
                "Magnesium Citrate (Citrate of Magnesia)",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Magnesium Oxide (Mag-Ox 400)",
                "Methylcellulose (Citrucel)",
                "Mineral Oil (Mineral Oil Enema)",
                "Polyethylene Glycol 3350 (Miralax)",
                "Psyllium (Metamucil)",
                "Senna (Senokot)",
                "Senna Leaf Extract (Senokot S)",
                "Sodium Phosphate (Fleet Phospho-soda)",
                "Sodium Picosulfate (Prepopik)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Bisacodyl (Dulcolax)",
            "Docusate Sodium (Colace)",
            "Senna (Senokot)",
            "Castor Oil",
            "Magnesium Citrate",
            "Magnesium Hydroxide (Milk of Magnesia)",
            "Polyethylene Glycol (MiraLAX)",
            "Lactulose",
            "Glycerin (Glycerol)",
            "Psyllium (Metamucil)",
            "Docusate Calcium",
            "Sodium Picosulfate",
            "Bisacodyl and Senna combination",
            "Lactitol",
            "Sorbitol",
            "Linaclotide (LINZESS)",
            "Prucalopride (Resolor)",
            "Lubiprostone (Amitiza)",
            "Naloxegol (Movantik)",
            "Methylnaltrexone (Relistor)",
            "Peginterferon alfa-2b (Sylatron)",
            "Polycarbophil (FiberCon)",
            "Polyethylene Glycol 3350 (PEG 3350)",
            "Sodium Bicarbonate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bisacodyl (Dulcolax)",
                "Docusate Sodium (Colace)",
                "Senna (Senokot)",
                "Castor Oil",
                "Magnesium Citrate",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Polyethylene Glycol (MiraLAX)",
                "Lactulose",
                "Glycerin (Glycerol)",
                "Psyllium (Metamucil)",
                "Docusate Calcium",
                "Sodium Picosulfate",
                "Bisacodyl and Senna combination",
                "Lactitol",
                "Sorbitol",
                "Linaclotide (LINZESS)",
                "Prucalopride (Resolor)",
                "Lubiprostone (Amitiza)",
                "Naloxegol (Movantik)",
                "Methylnaltrexone (Relistor)",
                "Peginterferon alfa-2b (Sylatron)",
                "Polycarbophil (FiberCon)",
                "Polyethylene Glycol 3350 (PEG 3350)",
                "Sodium Bicarbonate"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisacodyl (Dulcolax)",
                "Bisacodyl and Senna combination",
                "Castor Oil",
                "Docusate Calcium",
                "Docusate Sodium (Colace)",
                "Glycerin (Glycerol)",
                "Lactitol",
                "Lactulose",
                "Linaclotide (LINZESS)",
                "Lubiprostone (Amitiza)",
                "Magnesium Citrate",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Methylnaltrexone (Relistor)",
                "Naloxegol (Movantik)",
                "Peginterferon alfa-2b (Sylatron)",
                "Polycarbophil (FiberCon)",
                "Polyethylene Glycol (MiraLAX)",
                "Polyethylene Glycol 3350 (PEG 3350)",
                "Prucalopride (Resolor)",
                "Psyllium (Metamucil)",
                "Senna (Senokot)",
                "Sodium Bicarbonate",
                "Sodium Picosulfate",
                "Sorbitol"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Bisacodyl (BIS)",
            "Senna (SEN)",
            "Polyethylene Glycol 3350 (PEG)",
            "Magnesium Hydroxide (MGOH)",
            "Lactulose (LAC)",
            "Docusate Sodium (DOC)",
            "Castor Oil (CAS)",
            "Mineral Oil (MIN)",
            "Glycerin (GLY)",
            "Sodium Picosulfate (SP)",
            "Linaclotide (LNC)",
            "Plecanatide (PLC)",
            "Lubiprostone (LBP)",
            "Methylcellulose (MEC)",
            "Psyllium Husk (PSY)",
            "Polycarbophil Calcium (PCC)",
            "Sodium Phosphate (SPH)",
            "Magnesium Citrate (MGC)",
            "Aloe Vera (ALV)",
            "Rhubarb (RHU)",
            "Prune Juice (PRJ)",
            "Chlordiazepoxide (CDZ)",
            "Hyoscyamine (HYO)",
            "Dicyclomine (DCY)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bisacodyl (BIS)",
                "Senna (SEN)",
                "Polyethylene Glycol 3350 (PEG)",
                "Magnesium Hydroxide (MGOH)",
                "Lactulose (LAC)",
                "Docusate Sodium (DOC)",
                "Castor Oil (CAS)",
                "Mineral Oil (MIN)",
                "Glycerin (GLY)",
                "Sodium Picosulfate (SP)",
                "Linaclotide (LNC)",
                "Plecanatide (PLC)",
                "Lubiprostone (LBP)",
                "Methylcellulose (MEC)",
                "Psyllium Husk (PSY)",
                "Polycarbophil Calcium (PCC)",
                "Sodium Phosphate (SPH)",
                "Magnesium Citrate (MGC)",
                "Aloe Vera (ALV)",
                "Rhubarb (RHU)",
                "Prune Juice (PRJ)"
            ],
            "mismatches": [
                "Chlordiazepoxide (CDZ)",
                "Hyoscyamine (HYO)",
                "Dicyclomine (DCY)"
            ],
            "true_referents": [
                "Aloe Vera (ALV)",
                "Bisacodyl (BIS)",
                "Castor Oil (CAS)",
                "Chlordiazepoxide (CDZ)",
                "Dicyclomine (DCY)",
                "Docusate Sodium (DOC)",
                "Glycerin (GLY)",
                "Hyoscyamine (HYO)",
                "Lactulose (LAC)",
                "Linaclotide (LNC)",
                "Lubiprostone (LBP)",
                "Magnesium Citrate (MGC)",
                "Magnesium Hydroxide (MGOH)",
                "Methylcellulose (MEC)",
                "Mineral Oil (MIN)",
                "Plecanatide (PLC)",
                "Polycarbophil Calcium (PCC)",
                "Polyethylene Glycol 3350 (PEG)",
                "Prune Juice (PRJ)",
                "Psyllium Husk (PSY)",
                "Rhubarb (RHU)",
                "Senna (SEN)",
                "Sodium Phosphate (SPH)",
                "Sodium Picosulfate (SP)"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate (DSS)",
            "Bisacodyl",
            "Senna",
            "Polyethylene Glycol (PEG)",
            "Lactulose",
            "Magnesium Citrate",
            "Magnesium Hydroxide",
            "Sodium Picosulfate",
            "Cascara Sagrada",
            "Castor Oil",
            "Sodium Phosphate",
            "Sorbitol",
            "Methylcellulose",
            "Psyllium",
            "Polycarbophil",
            "Linaclotide",
            "Plecanatide",
            "Lubiprostone",
            "Prucalopride",
            "Rifaximin",
            "Methylnaltrexone",
            "Naloxegol",
            "Alvimopan",
            "Elobixibat"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate (DSS)",
                "Bisacodyl",
                "Senna",
                "Polyethylene Glycol (PEG)",
                "Lactulose",
                "Magnesium Citrate",
                "Magnesium Hydroxide",
                "Sodium Picosulfate",
                "Cascara Sagrada",
                "Castor Oil",
                "Sodium Phosphate",
                "Sorbitol",
                "Methylcellulose",
                "Psyllium",
                "Polycarbophil",
                "Linaclotide",
                "Plecanatide",
                "Lubiprostone",
                "Prucalopride",
                "Rifaximin",
                "Methylnaltrexone",
                "Naloxegol",
                "Alvimopan",
                "Elobixibat"
            ],
            "mismatches": [],
            "true_referents": [
                "Alvimopan",
                "Bisacodyl",
                "Cascara Sagrada",
                "Castor Oil",
                "Docusate (DSS)",
                "Elobixibat",
                "Lactulose",
                "Linaclotide",
                "Lubiprostone",
                "Magnesium Citrate",
                "Magnesium Hydroxide",
                "Methylcellulose",
                "Methylnaltrexone",
                "Naloxegol",
                "Plecanatide",
                "Polycarbophil",
                "Polyethylene Glycol (PEG)",
                "Prucalopride",
                "Psyllium",
                "Rifaximin",
                "Senna",
                "Sodium Phosphate",
                "Sodium Picosulfate",
                "Sorbitol"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Polyethylene Glycol (PEG)",
            "Lactulose",
            "Sorbitol",
            "Magnesium Hydroxide",
            "Docusate Sodium",
            "Senna",
            "Bisacodyl",
            "Psyllium Husk",
            "Milk of Magnesia",
            "Castor Oil",
            "Glycerin Suppositories",
            "Rhubarb Extract",
            "Aloe Vera (oral)",
            "Chia Seeds",
            "Flaxseed Oil",
            "Coconut Oil",
            "Prune Juice",
            "Apple Juice",
            "Pectin",
            "Guar Gum",
            "Methylcellulose",
            "Wheat Bran",
            "Cascara Sagrada",
            "Sodium Phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Polyethylene Glycol (PEG)",
                "Lactulose",
                "Sorbitol",
                "Magnesium Hydroxide",
                "Docusate Sodium",
                "Senna",
                "Bisacodyl",
                "Psyllium Husk",
                "Milk of Magnesia",
                "Castor Oil",
                "Glycerin Suppositories",
                "Rhubarb Extract",
                "Aloe Vera (oral)",
                "Chia Seeds",
                "Flaxseed Oil",
                "Coconut Oil",
                "Prune Juice",
                "Apple Juice",
                "Pectin",
                "Guar Gum",
                "Methylcellulose",
                "Wheat Bran",
                "Cascara Sagrada",
                "Sodium Phosphate"
            ],
            "mismatches": [],
            "true_referents": [
                "Aloe Vera (oral)",
                "Apple Juice",
                "Bisacodyl",
                "Cascara Sagrada",
                "Castor Oil",
                "Chia Seeds",
                "Coconut Oil",
                "Docusate Sodium",
                "Flaxseed Oil",
                "Glycerin Suppositories",
                "Guar Gum",
                "Lactulose",
                "Magnesium Hydroxide",
                "Methylcellulose",
                "Milk of Magnesia",
                "Pectin",
                "Polyethylene Glycol (PEG)",
                "Prune Juice",
                "Psyllium Husk",
                "Rhubarb Extract",
                "Senna",
                "Sodium Phosphate",
                "Sorbitol",
                "Wheat Bran"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate Sodium (Colace)",
            "Magnesium Hydroxide (Milk of Magnesia)",
            "Bisacodyl (Dulcolax)",
            "Senna (Senokot)",
            "Methylcellulose (Citrucel)",
            "Polyethylene Glycol 3350 (MiraLAX)",
            "Lactulose (Kristalose)",
            "Lubiprostone (Amitiza)",
            "Psyllium (Metamucil)",
            "Linaclotide (Linzess)",
            "Plecanatide (Trulance)",
            "Tegaserod (Zelnorm)",
            "Prucalopride (Resotran)",
            "Eluxadoline (Viberzi)",
            "Rifaximin (Xifaxan)",
            "Osmotic Laxatives (Golytely)",
            "Stimulant Laxatives (Ex-Lax)",
            "Stool Softeners (Surfak)",
            "Lubricant Laxatives (Mineral Oil)",
            "Prokinetics (Motegrity)",
            "Guanylate cyclase-C agonists (Linzess)",
            "Chloride channel activators (Amitiza)",
            "5-HT4 agonists (Prucalopride)",
            "Bile acid transporter inhibitors (Trulance)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate Sodium (Colace)",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Bisacodyl (Dulcolax)",
                "Senna (Senokot)",
                "Methylcellulose (Citrucel)",
                "Polyethylene Glycol 3350 (MiraLAX)",
                "Lactulose (Kristalose)",
                "Lubiprostone (Amitiza)",
                "Psyllium (Metamucil)",
                "Linaclotide (Linzess)",
                "Plecanatide (Trulance)",
                "Tegaserod (Zelnorm)",
                "Prucalopride (Resotran)",
                "Eluxadoline (Viberzi)",
                "Rifaximin (Xifaxan)",
                "Osmotic Laxatives (Golytely)",
                "Stimulant Laxatives (Ex-Lax)",
                "Stool Softeners (Surfak)",
                "Lubricant Laxatives (Mineral Oil)",
                "Prokinetics (Motegrity)",
                "Guanylate cyclase-C agonists (Linzess)",
                "Chloride channel activators (Amitiza)",
                "5-HT4 agonists (Prucalopride)",
                "Bile acid transporter inhibitors (Trulance)"
            ],
            "mismatches": [],
            "true_referents": [
                "5-HT4 agonists (Prucalopride)",
                "Bile acid transporter inhibitors (Trulance)",
                "Bisacodyl (Dulcolax)",
                "Chloride channel activators (Amitiza)",
                "Docusate Sodium (Colace)",
                "Eluxadoline (Viberzi)",
                "Guanylate cyclase-C agonists (Linzess)",
                "Lactulose (Kristalose)",
                "Linaclotide (Linzess)",
                "Lubiprostone (Amitiza)",
                "Lubricant Laxatives (Mineral Oil)",
                "Magnesium Hydroxide (Milk of Magnesia)",
                "Methylcellulose (Citrucel)",
                "Osmotic Laxatives (Golytely)",
                "Plecanatide (Trulance)",
                "Polyethylene Glycol 3350 (MiraLAX)",
                "Prokinetics (Motegrity)",
                "Prucalopride (Resotran)",
                "Psyllium (Metamucil)",
                "Rifaximin (Xifaxan)",
                "Senna (Senokot)",
                "Stimulant Laxatives (Ex-Lax)",
                "Stool Softeners (Surfak)",
                "Tegaserod (Zelnorm)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Laxative (LAX)",
            "Stool softener (SS)",
            "Fiber supplement (FS)",
            "Osmotic laxative (OL)",
            "Emollient laxative (EL)",
            "Bulk-forming laxative (BL)",
            "Stimulant laxative (SL)",
            "Mineral oil (MO)",
            "Polyethylene glycol (PEG)",
            "Docusate sodium (DS)",
            "Psyllium (PS)",
            "Methylcellulose (MC)",
            "Senna (SE)",
            "Bisacodyl (BI)",
            "Castor oil (CO)",
            "Magnesium hydroxide (MH)",
            "Lactulose (LA)",
            "Glycerin suppository (GS)",
            "Sorbitol (SO)",
            "Dioctyl sodium sulfosuccinate (DSS)",
            "Lubiprostone (LU)",
            "Linaclotide (LI)",
            "Plecanatide (PL)",
            "Tenapanor (TE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Laxative (LAX)",
                "Stool softener (SS)",
                "Fiber supplement (FS)",
                "Osmotic laxative (OL)",
                "Emollient laxative (EL)",
                "Bulk-forming laxative (BL)",
                "Stimulant laxative (SL)",
                "Mineral oil (MO)",
                "Polyethylene glycol (PEG)",
                "Docusate sodium (DS)",
                "Psyllium (PS)",
                "Methylcellulose (MC)",
                "Senna (SE)",
                "Bisacodyl (BI)",
                "Castor oil (CO)",
                "Magnesium hydroxide (MH)",
                "Lactulose (LA)",
                "Glycerin suppository (GS)",
                "Sorbitol (SO)",
                "Dioctyl sodium sulfosuccinate (DSS)",
                "Lubiprostone (LU)",
                "Linaclotide (LI)",
                "Plecanatide (PL)",
                "Tenapanor (TE)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisacodyl (BI)",
                "Bulk-forming laxative (BL)",
                "Castor oil (CO)",
                "Dioctyl sodium sulfosuccinate (DSS)",
                "Docusate sodium (DS)",
                "Emollient laxative (EL)",
                "Fiber supplement (FS)",
                "Glycerin suppository (GS)",
                "Lactulose (LA)",
                "Laxative (LAX)",
                "Linaclotide (LI)",
                "Lubiprostone (LU)",
                "Magnesium hydroxide (MH)",
                "Methylcellulose (MC)",
                "Mineral oil (MO)",
                "Osmotic laxative (OL)",
                "Plecanatide (PL)",
                "Polyethylene glycol (PEG)",
                "Psyllium (PS)",
                "Senna (SE)",
                "Sorbitol (SO)",
                "Stimulant laxative (SL)",
                "Stool softener (SS)",
                "Tenapanor (TE)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate sodium (Colace)",
            "Docusate calcium (Surfak)",
            "Psyllium husk (Metamucil)",
            "Methylcellulose (Citrucel)",
            "Lactulose (Chronulac)",
            "Bisacodyl (Dulcolax)",
            "Senna (Senokot)",
            "Magnesium hydroxide (Milk of Magnesia)",
            "Magnesium citrate",
            "Polyethylene glycol 3350 (Miralax)",
            "Lubiprostone (Amitiza)",
            "Linaclotide (Linzess)",
            "Prucalopride (Resolor)",
            "Sodium picosulfate",
            "Castor oil",
            "Cascara sagrada",
            "Calcium polycarbophil (FiberCon)",
            "Sorbitol",
            "Polycarbophil",
            "Sodium phosphate",
            "Glycerin suppositories",
            "Sennosides",
            "Hydrolyzed lignin",
            "Isosmotic polyethylene glycol-electrolyte solution"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate sodium (Colace)",
                "Docusate calcium (Surfak)",
                "Psyllium husk (Metamucil)",
                "Methylcellulose (Citrucel)",
                "Lactulose (Chronulac)",
                "Bisacodyl (Dulcolax)",
                "Senna (Senokot)",
                "Magnesium hydroxide (Milk of Magnesia)",
                "Magnesium citrate",
                "Polyethylene glycol 3350 (Miralax)",
                "Lubiprostone (Amitiza)",
                "Linaclotide (Linzess)",
                "Prucalopride (Resolor)",
                "Sodium picosulfate",
                "Castor oil",
                "Cascara sagrada",
                "Calcium polycarbophil (FiberCon)",
                "Sorbitol",
                "Polycarbophil",
                "Sodium phosphate",
                "Glycerin suppositories",
                "Sennosides",
                "Hydrolyzed lignin",
                "Isosmotic polyethylene glycol-electrolyte solution"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisacodyl (Dulcolax)",
                "Calcium polycarbophil (FiberCon)",
                "Cascara sagrada",
                "Castor oil",
                "Docusate calcium (Surfak)",
                "Docusate sodium (Colace)",
                "Glycerin suppositories",
                "Hydrolyzed lignin",
                "Isosmotic polyethylene glycol-electrolyte solution",
                "Lactulose (Chronulac)",
                "Linaclotide (Linzess)",
                "Lubiprostone (Amitiza)",
                "Magnesium citrate",
                "Magnesium hydroxide (Milk of Magnesia)",
                "Methylcellulose (Citrucel)",
                "Polycarbophil",
                "Polyethylene glycol 3350 (Miralax)",
                "Prucalopride (Resolor)",
                "Psyllium husk (Metamucil)",
                "Senna (Senokot)",
                "Sennosides",
                "Sodium phosphate",
                "Sodium picosulfate",
                "Sorbitol"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate sodium (DSS)",
            "Methylcellulose",
            "Psyllium husk",
            "Lactulose",
            "Polyethylene glycol (PEG)",
            "Magnesium hydroxide",
            "Bisacodyl",
            "Senna",
            "Castor oil",
            "Mineral oil",
            "Sorbitol",
            "Glycerin suppositories",
            "Lubiprostone",
            "Linaclotide",
            "Plecanatide",
            "Prucalopride",
            "Tegaserod",
            "Sodium picosulfate",
            "Macrogol",
            "Ispaghula husk",
            "Sterculia",
            "Frangula bark",
            "Cascara sagrada",
            "Aloe vera latex"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate sodium (DSS)",
                "Methylcellulose",
                "Psyllium husk",
                "Lactulose",
                "Polyethylene glycol (PEG)",
                "Magnesium hydroxide",
                "Bisacodyl",
                "Senna",
                "Castor oil",
                "Mineral oil",
                "Sorbitol",
                "Glycerin suppositories",
                "Lubiprostone",
                "Linaclotide",
                "Plecanatide",
                "Prucalopride",
                "Tegaserod",
                "Sodium picosulfate",
                "Macrogol",
                "Ispaghula husk",
                "Sterculia",
                "Frangula bark",
                "Cascara sagrada",
                "Aloe vera latex"
            ],
            "mismatches": [],
            "true_referents": [
                "Aloe vera latex",
                "Bisacodyl",
                "Cascara sagrada",
                "Castor oil",
                "Docusate sodium (DSS)",
                "Frangula bark",
                "Glycerin suppositories",
                "Ispaghula husk",
                "Lactulose",
                "Linaclotide",
                "Lubiprostone",
                "Macrogol",
                "Magnesium hydroxide",
                "Methylcellulose",
                "Mineral oil",
                "Plecanatide",
                "Polyethylene glycol (PEG)",
                "Prucalopride",
                "Psyllium husk",
                "Senna",
                "Sodium picosulfate",
                "Sorbitol",
                "Sterculia",
                "Tegaserod"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Psyllium husk",
            "Methylcellulose",
            "Docusate sodium",
            "Magnesium hydroxide",
            "Magnesium citrate",
            "Sorbitol",
            "Lactulose",
            "Polyethylene glycol (PEG)",
            "Senna",
            "Bisacodyl",
            "Castor oil",
            "Mineral oil",
            "Glycerin suppositories",
            "Flaxseed",
            "Chia seeds",
            "Prunes",
            "Figs",
            "Apples",
            "Pears",
            "Kiwifruit",
            "Aloe vera juice",
            "Triphala",
            "Slippery elm bark",
            "Marshmallow root"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Psyllium husk",
                "Methylcellulose",
                "Docusate sodium",
                "Magnesium hydroxide",
                "Magnesium citrate",
                "Sorbitol",
                "Lactulose",
                "Polyethylene glycol (PEG)",
                "Senna",
                "Bisacodyl",
                "Castor oil",
                "Mineral oil",
                "Glycerin suppositories",
                "Flaxseed",
                "Chia seeds",
                "Prunes",
                "Figs",
                "Apples",
                "Pears",
                "Kiwifruit",
                "Aloe vera juice",
                "Triphala",
                "Slippery elm bark",
                "Marshmallow root"
            ],
            "mismatches": [],
            "true_referents": [
                "Aloe vera juice",
                "Apples",
                "Bisacodyl",
                "Castor oil",
                "Chia seeds",
                "Docusate sodium",
                "Figs",
                "Flaxseed",
                "Glycerin suppositories",
                "Kiwifruit",
                "Lactulose",
                "Magnesium citrate",
                "Magnesium hydroxide",
                "Marshmallow root",
                "Methylcellulose",
                "Mineral oil",
                "Pears",
                "Polyethylene glycol (PEG)",
                "Prunes",
                "Psyllium husk",
                "Senna",
                "Slippery elm bark",
                "Sorbitol",
                "Triphala"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Lactulose (C12H22O11)",
            "Polyethylene Glycol (PEG)",
            "Senna (Cassia senna)",
            "Bisacodyl",
            "Docusate Sodium",
            "Magnesium Hydroxide",
            "Magnesium Citrate",
            "Sodium Picosulfate",
            "Lubiprostone",
            "Linaclotide",
            "Plecanatide",
            "Prucalopride",
            "Methylnaltrexone Bromide",
            "Alvimopan",
            "Naloxegol",
            "Psyllium (Plantago ovata)",
            "Cascara Sagrada (Rhamnus purshiana)",
            "Castor Oil (Ricinus communis)",
            "Milk of Magnesia (Magnesium Hydroxide)",
            "Dulcolax (Bisacodyl)",
            "Senokot (Senna)",
            "Miralax (Polyethylene Glycol)",
            "Amitiza (Lubiprostone)",
            "Linzess (Linaclotide)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lactulose (C12H22O11)",
                "Polyethylene Glycol (PEG)",
                "Senna (Cassia senna)",
                "Bisacodyl",
                "Docusate Sodium",
                "Magnesium Hydroxide",
                "Magnesium Citrate",
                "Sodium Picosulfate",
                "Lubiprostone",
                "Linaclotide",
                "Plecanatide",
                "Prucalopride",
                "Methylnaltrexone Bromide",
                "Alvimopan",
                "Naloxegol",
                "Psyllium (Plantago ovata)",
                "Cascara Sagrada (Rhamnus purshiana)",
                "Castor Oil (Ricinus communis)",
                "Milk of Magnesia (Magnesium Hydroxide)",
                "Dulcolax (Bisacodyl)",
                "Senokot (Senna)",
                "Miralax (Polyethylene Glycol)",
                "Amitiza (Lubiprostone)",
                "Linzess (Linaclotide)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alvimopan",
                "Amitiza (Lubiprostone)",
                "Bisacodyl",
                "Cascara Sagrada (Rhamnus purshiana)",
                "Castor Oil (Ricinus communis)",
                "Docusate Sodium",
                "Dulcolax (Bisacodyl)",
                "Lactulose (C12H22O11)",
                "Linaclotide",
                "Linzess (Linaclotide)",
                "Lubiprostone",
                "Magnesium Citrate",
                "Magnesium Hydroxide",
                "Methylnaltrexone Bromide",
                "Milk of Magnesia (Magnesium Hydroxide)",
                "Miralax (Polyethylene Glycol)",
                "Naloxegol",
                "Plecanatide",
                "Polyethylene Glycol (PEG)",
                "Prucalopride",
                "Psyllium (Plantago ovata)",
                "Senna (Cassia senna)",
                "Senokot (Senna)",
                "Sodium Picosulfate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate Sodium (DOC)",
            "Magnesium Hydroxide (MAG)",
            "Polyethylene Glycol (PEG)",
            "Lactulose (LAC)",
            "Senna (SEN)",
            "Bisacodyl (BIS)",
            "Castor Oil (CAS)",
            "Mineral Oil (MIN)",
            "Glycerin Suppository (GLY)",
            "Methylcellulose (MET)",
            "Psyllium (PSY)",
            "Lubiprostone (LUB)",
            "Linaclotide (LIN)",
            "Plecanatide (PLE)",
            "Prucalopride (PRU)",
            "Tegaserod (TEG)",
            "Loperamide (LOP)",
            "Simethicone (SIM)",
            "Polyvinylpyrrolidone (PVP)",
            "Danthron (DAN)",
            "Sodium Picosulfate (SOD)",
            "Cascara Sagrada (CAS)",
            "Aloe Vera (ALO)",
            "Rhubarb (RHU)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate Sodium (DOC)",
                "Magnesium Hydroxide (MAG)",
                "Polyethylene Glycol (PEG)",
                "Lactulose (LAC)",
                "Senna (SEN)",
                "Bisacodyl (BIS)",
                "Castor Oil (CAS)",
                "Mineral Oil (MIN)",
                "Glycerin Suppository (GLY)",
                "Methylcellulose (MET)",
                "Psyllium (PSY)",
                "Lubiprostone (LUB)",
                "Linaclotide (LIN)",
                "Plecanatide (PLE)",
                "Prucalopride (PRU)",
                "Tegaserod (TEG)",
                "Loperamide (LOP)",
                "Simethicone (SIM)",
                "Polyvinylpyrrolidone (PVP)",
                "Danthron (DAN)",
                "Sodium Picosulfate (SOD)",
                "Cascara Sagrada (CAS)",
                "Aloe Vera (ALO)",
                "Rhubarb (RHU)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aloe Vera (ALO)",
                "Bisacodyl (BIS)",
                "Cascara Sagrada (CAS)",
                "Castor Oil (CAS)",
                "Danthron (DAN)",
                "Docusate Sodium (DOC)",
                "Glycerin Suppository (GLY)",
                "Lactulose (LAC)",
                "Linaclotide (LIN)",
                "Loperamide (LOP)",
                "Lubiprostone (LUB)",
                "Magnesium Hydroxide (MAG)",
                "Methylcellulose (MET)",
                "Mineral Oil (MIN)",
                "Plecanatide (PLE)",
                "Polyethylene Glycol (PEG)",
                "Polyvinylpyrrolidone (PVP)",
                "Prucalopride (PRU)",
                "Psyllium (PSY)",
                "Rhubarb (RHU)",
                "Senna (SEN)",
                "Simethicone (SIM)",
                "Sodium Picosulfate (SOD)",
                "Tegaserod (TEG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Laxative (Senna)",
            "Senna (Senna alexandrina)",
            "Cascara (Rhamnus purshiana)",
            "Dandelion root (Taraxacum officinale)",
            "Psyllium (Plantago ovata)",
            "Glycerin",
            "Docusate",
            "Polyethylene glycol (PEG)",
            "Bisacodyl (Dulcolax)",
            "Sodium picosulfate (Picolax)",
            "Lactulose (Chlor-Lactulose)",
            "Magnesium citrate",
            "MPEG (Methoxy polyethylene glycol)",
            "Polyethylene oxide (PEO)",
            "Bisacodyl (Bisalax)",
            "Sodium docusate (Colace)",
            "Glycerin suppository",
            "Colace (Docusate)",
            "MiraLax (Polyethylene glycol)",
            "Citrulline",
            "Lactulose (Lactulose syrup)",
            "Colyte (Polyethylene glycol)",
            "Moviprep (Polyethylene glycol)",
            "Golytely (Polyethylene glycol)",
            "PEG-3350 (Polyethylene glycol)",
            "Colofac (Docusate)",
            "Gastrografin (Diatrizoate)",
            "Visicol (Sodium docusate)",
            "Dulcolax (Bisacodyl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Laxative (Senna)",
                "Senna (Senna alexandrina)",
                "Cascara (Rhamnus purshiana)",
                "Dandelion root (Taraxacum officinale)",
                "Psyllium (Plantago ovata)",
                "Glycerin",
                "Docusate",
                "Polyethylene glycol (PEG)",
                "Bisacodyl (Dulcolax)",
                "Sodium picosulfate (Picolax)",
                "Lactulose (Chlor-Lactulose)",
                "Magnesium citrate",
                "MPEG (Methoxy polyethylene glycol)",
                "Polyethylene oxide (PEO)",
                "Bisacodyl (Bisalax)",
                "Sodium docusate (Colace)",
                "Glycerin suppository",
                "Colace (Docusate)",
                "MiraLax (Polyethylene glycol)",
                "Citrulline",
                "Lactulose (Lactulose syrup)",
                "Colyte (Polyethylene glycol)",
                "Moviprep (Polyethylene glycol)",
                "Golytely (Polyethylene glycol)"
            ],
            "mismatches": [
                "PEG-3350 (Polyethylene glycol)",
                "Colofac (Docusate)",
                "Gastrografin (Diatrizoate)",
                "Visicol (Sodium docusate)",
                "Dulcolax (Bisacodyl)"
            ],
            "true_referents": [
                "Bisacodyl (Bisalax)",
                "Bisacodyl (Dulcolax)",
                "Cascara (Rhamnus purshiana)",
                "Citrulline",
                "Colace (Docusate)",
                "Colyte (Polyethylene glycol)",
                "Dandelion root (Taraxacum officinale)",
                "Docusate",
                "Glycerin",
                "Glycerin suppository",
                "Golytely (Polyethylene glycol)",
                "Lactulose (Chlor-Lactulose)",
                "Lactulose (Lactulose syrup)",
                "Laxative (Senna)",
                "MPEG (Methoxy polyethylene glycol)",
                "Magnesium citrate",
                "MiraLax (Polyethylene glycol)",
                "Moviprep (Polyethylene glycol)",
                "Polyethylene glycol (PEG)",
                "Polyethylene oxide (PEO)",
                "Psyllium (Plantago ovata)",
                "Senna (Senna alexandrina)",
                "Sodium docusate (Colace)",
                "Sodium picosulfate (Picolax)"
            ],
            "TP": 24,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Docusate (Colace)",
            "Dioctyl sodium sulfosuccinate (DSS)",
            "Glycerin",
            "Lactulose",
            "Polyethylene glycol (PEG)",
            "Sorbitol",
            "Magnesium hydroxide",
            "Magnesium oxide",
            "Magnesium citrate",
            "Bisacodyl (Dulcolax)",
            "Senna",
            "Castor oil",
            "Mineral oil",
            "Phenolphthalein",
            "Psyllium",
            "Methylcellulose",
            "Polycarbophil",
            "Lubiprostone (Amitiza)",
            "Linaclotide (Linzess)",
            "Plecanatide (Trulance)",
            "Prucalopride (Resotran)",
            "Naloxegol (Movantik)",
            "Naproxen (Aleve)",
            "Oxyphenisatin (Oxypyr)",
            "Bisoxatrol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Docusate (Colace)",
                "Dioctyl sodium sulfosuccinate (DSS)",
                "Glycerin",
                "Lactulose",
                "Polyethylene glycol (PEG)",
                "Sorbitol",
                "Magnesium hydroxide",
                "Magnesium oxide",
                "Magnesium citrate",
                "Bisacodyl (Dulcolax)",
                "Senna",
                "Castor oil",
                "Mineral oil",
                "Phenolphthalein",
                "Psyllium",
                "Methylcellulose",
                "Polycarbophil",
                "Lubiprostone (Amitiza)",
                "Linaclotide (Linzess)",
                "Plecanatide (Trulance)",
                "Prucalopride (Resotran)",
                "Naloxegol (Movantik)",
                "Naproxen (Aleve)",
                "Oxyphenisatin (Oxypyr)"
            ],
            "mismatches": [
                "Bisoxatrol"
            ],
            "true_referents": [
                "Bisacodyl (Dulcolax)",
                "Castor oil",
                "Dioctyl sodium sulfosuccinate (DSS)",
                "Docusate (Colace)",
                "Glycerin",
                "Lactulose",
                "Linaclotide (Linzess)",
                "Lubiprostone (Amitiza)",
                "Magnesium citrate",
                "Magnesium hydroxide",
                "Magnesium oxide",
                "Methylcellulose",
                "Mineral oil",
                "Naloxegol (Movantik)",
                "Naproxen (Aleve)",
                "Oxyphenisatin (Oxypyr)",
                "Phenolphthalein",
                "Plecanatide (Trulance)",
                "Polycarbophil",
                "Polyethylene glycol (PEG)",
                "Prucalopride (Resotran)",
                "Psyllium",
                "Senna",
                "Sorbitol"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "laxative",
        "domain": "medicine",
        "response": [
            "Laxative",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)",
            "Laxative (drug)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Laxative",
                "Laxative (drug)"
            ],
            "mismatches": [],
            "true_referents": [
                "Laxative",
                "Laxative (drug)"
            ],
            "TP": 2,
            "FP": 0,
            "FN": 0
        }
    }
]