[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Adenosine 5'-triphosphate (ATP)",
            "Digitoxin",
            "Glycyrrhizin",
            "Isoflavone",
            "Lactose",
            "Maltose",
            "Raffinose",
            "Resveratrol",
            "Salicin",
            "Sennoside",
            "Stevioside",
            "Theobromine",
            "Vincristine",
            "Vinblastine",
            "Amygdalin",
            "Bitter apricot kernel glycoside",
            "Cyanogenic glycoside",
            "Erythritol",
            "Fructooligosaccharide",
            "Gossypol",
            "Ibogaine",
            "Kukoamine",
            "Lycium barbarum polysaccharide",
            "Phlorizin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digitoxin",
                "Amygdalin",
                "Sennoside",
                "Phlorizin"
            ],
            "mismatches": [
                "Adenosine 5'-triphosphate (ATP)",
                "Glycyrrhizin",
                "Isoflavone",
                "Lactose",
                "Maltose",
                "Raffinose",
                "Resveratrol",
                "Salicin",
                "Stevioside",
                "Theobromine",
                "Vincristine",
                "Vinblastine",
                "Bitter apricot kernel glycoside",
                "Cyanogenic glycoside",
                "Erythritol",
                "Fructooligosaccharide",
                "Gossypol",
                "Ibogaine",
                "Kukoamine",
                "Lycium barbarum polysaccharide"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Adenosine Diphosphate Glucose",
                "Adenosine Diphosphate Ribose",
                "Amygdalin",
                "Anthracyclines",
                "Arbutin",
                "Canagliflozin",
                "Cardiac Glycosides",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Etoposide",
                "Galactosides",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Holothurin",
                "Iridoid Glucosides",
                "Iridoid Glycosides",
                "Isopropyl Thiogalactoside",
                "Menogaril",
                "Ouabain",
                "Phlorhizin",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Streptomycin",
                "Streptozocin",
                "Uridine Diphosphate Glucose"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Digoxin (Digitalis glycoside)",
            "Digitoxin (Digitalis glycoside)",
            "Strophanthin (Strophanthidin glycoside)",
            "Conessine (Conessine glycoside)",
            "Sennoside A",
            "Sennoside B",
            "Aescin (Hederagenin trilactone)",
            "Amygdalin",
            "Vincristine (Vinca alkaloid glycoside)",
            "Vinblastine (Vinca alkaloid glycoside)",
            "Cyanidin 3-glucoside",
            "Quercetin 3-rutinoside (Rutin)",
            "Glycyrrhizin (Glycyrrhiza glabra glycoside)",
            "Salicin (Salix glycoside)",
            "Catechin 3-gallate",
            "Cyanidin 3-arabinoside",
            "Taxol (Paclitaxel, Taxus brevifolia glycoside)",
            "Ginsenoside Rb1",
            "Ginsenoside Rg1",
            "Ginsenoside Re",
            "Cephalosporin C (Cephalosporin glycoside)",
            "Vancomycin (Vancomycin glycoside)",
            "Aralen (Chloroquine, 4-aminoquinoline glycoside)",
            "Prunasin (Amygdalin precursor)",
            "Cyanogenic glycoside"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digoxin",
                "Digitoxin",
                "Amygdalin",
                "Sennosides",
                "Ginsenosides"
            ],
            "mismatches": [
                "Strophanthin (Strophanthidin glycoside)",
                "Conessine (Conessine glycoside)",
                "Aescin (Hederagenin trilactone)",
                "Vincristine (Vinca alkaloid glycoside)",
                "Vinblastine (Vinca alkaloid glycoside)",
                "Cyanidin 3-glucoside",
                "Quercetin 3-rutinoside (Rutin)",
                "Glycyrrhizin (Glycyrrhiza glabra glycoside)",
                "Salicin (Salix glycoside)",
                "Catechin 3-gallate",
                "Cyanidin 3-arabinoside",
                "Taxol (Paclitaxel, Taxus brevifolia glycoside)",
                "Cephalosporin C (Cephalosporin glycoside)",
                "Vancomycin (Vancomycin glycoside)",
                "Aralen (Chloroquine, 4-aminoquinoline glycoside)",
                "Prunasin (Amygdalin precursor)",
                "Cyanogenic glycoside"
            ],
            "true_referents": [
                "Aminoglycosides",
                "Amygdalin",
                "Anthocyanins",
                "Anthracyclines",
                "Arbutin",
                "Atractyloside",
                "Chromomycin A3",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Esculin",
                "Etoposide",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Kanamycin",
                "Mannosides",
                "Phlorhizin",
                "Pradimicins and Benanomicins",
                "Proscillaridin",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Streptomycin",
                "Strophanthins",
                "Thioglycosides",
                "Tobramycin"
            ],
            "TP": 5,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Aloe Vera (Aloe barbadensis)",
            "Amygdalin (Prunasin)",
            "Araloside A",
            "Arbutin",
            "Asperuloside",
            "Cyanidin-3-O-glucoside",
            "Digitalis glycosides",
            "Diosgenin",
            "Emodin-8-O-glucoside",
            "Escin",
            "Flavonoid glycosides",
            "Gentiobiose",
            "Glycyrrhizin",
            "Hesperidin",
            "Luteolin-7-O-glucoside",
            "Naringin",
            "Oleuropein",
            "Phloridzin",
            "Quercetin-3-O-glucoside",
            "Rutin",
            "Sennoside",
            "Saponin",
            "Solanine",
            "Stevioside"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Arbutin",
                "Digitalis glycosides",
                "Escin",
                "Hesperidin",
                "Solanine"
            ],
            "mismatches": [
                "Aloe Vera (Aloe barbadensis)",
                "Araloside A",
                "Asperuloside",
                "Cyanidin-3-O-glucoside",
                "Diosgenin",
                "Emodin-8-O-glucoside",
                "Flavonoid glycosides",
                "Gentiobiose",
                "Glycyrrhizin",
                "Luteolin-7-O-glucoside",
                "Naringin",
                "Oleuropein",
                "Phloridzin",
                "Quercetin-3-O-glucoside",
                "Rutin",
                "Sennoside",
                "Saponin",
                "Stevioside"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Cardiac Glycosides",
                "Chloralose",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Escin",
                "Esculin",
                "Framycetin",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Holothurin",
                "Iridoid Glucosides",
                "Iridoid Glycosides",
                "Isopropyl Thiogalactoside",
                "Lanatosides",
                "Novobiocin",
                "Phlorhizin",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Tomatine"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Salicin (SAL)",
            "Amigdalin (AMG)",
            "Digoxin (DIG)",
            "Digitoxin (DIGT)",
            "Hesperidin (HES)",
            "Rutin (RUT)",
            "Quercetin-3-O-glucoside (Q3G)",
            "Sinigrin (SIN)",
            "Glucobrassicin (GBR)",
            "Steviol glycoside (STV)",
            "Rebaudioside A (Reb A)",
            "Stevioside (STE)",
            "Aesculin (AES)",
            "Arbutin (ARB)",
            "Proscillaridin A (PSA)",
            "Linamarin (LIN)",
            "Dhurrin (DHU)",
            "Paeoniflorin (PAE)",
            "Chlorogenic acid (CGA)",
            "Isoquercitrin (ISO)",
            "Verbascoside (VER)",
            "Aucubin (AUC)",
            "Saponin glycosides (SAP)",
            "Anthraquinone glycosides (ANQ)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digoxin (DIG)",
                "Digitoxin (DIGT)",
                "Hesperidin (HES)",
                "Arbutin (ARB)",
                "Proscillaridin A (PSA)"
            ],
            "mismatches": [
                "Salicin (SAL)",
                "Amigdalin (AMG)",
                "Rutin (RUT)",
                "Quercetin-3-O-glucoside (Q3G)",
                "Sinigrin (SIN)",
                "Glucobrassicin (GBR)",
                "Steviol glycoside (STV)",
                "Rebaudioside A (Reb A)",
                "Stevioside (STE)",
                "Aesculin (AES)",
                "Linamarin (LIN)",
                "Dhurrin (DHU)",
                "Paeoniflorin (PAE)",
                "Chlorogenic acid (CGA)",
                "Isoquercitrin (ISO)",
                "Verbascoside (VER)",
                "Aucubin (AUC)",
                "Saponin glycosides (SAP)",
                "Anthraquinone glycosides (ANQ)"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amikacin",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Cardiac Glycosides",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Dibekacin",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Escin",
                "Esculin",
                "Framycetin",
                "Ginsenosides",
                "Glucosides",
                "Glucosinolates",
                "Hesperidin",
                "Holothurin",
                "Lincomycin",
                "Lincosamides",
                "Medigoxin",
                "Nitrophenylgalactosides",
                "Ouabain",
                "Phlorhizin",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin",
            "Digitoxin",
            "Digoxin",
            "Saponin",
            "Stevioside",
            "Salicin",
            "Arbutin",
            "Rutin",
            "Quercitrin",
            "Naringin",
            "Hesperidin",
            "Vanillin glycoside",
            "Genistin",
            "Daidzin",
            "Glycyrrhizin",
            "Linamarin",
            "Taxifolin",
            "Silibinin",
            "Prunasin",
            "Phlorizin",
            "Aloin",
            "Anthraquinone glycosides",
            "Strophanthin",
            "Cyanidin-3-glucoside"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Digitoxin",
                "Digoxin",
                "Arbutin",
                "Hesperidin"
            ],
            "mismatches": [
                "Saponin",
                "Stevioside",
                "Salicin",
                "Rutin",
                "Quercitrin",
                "Naringin",
                "Vanillin glycoside",
                "Genistin",
                "Daidzin",
                "Glycyrrhizin",
                "Linamarin",
                "Taxifolin",
                "Silibinin",
                "Prunasin",
                "Phlorizin",
                "Aloin",
                "Anthraquinone glycosides",
                "Strophanthin",
                "Cyanidin-3-glucoside"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Cardiac Glycosides",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Escin",
                "Esculin",
                "Framycetin",
                "Glucosides",
                "Hesperidin",
                "Idarubicin",
                "Iridoid Glycosides",
                "Lincomycin",
                "Methylglycosides",
                "Netilmicin",
                "Ouabain",
                "Phlorhizin",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sisomicin",
                "Solanine",
                "Streptomycin",
                "Streptothricins",
                "Streptozocin",
                "Strophanthins"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin (C20H27NO11)",
            "Saponin (various types)",
            "Rutin (C27H30O16)",
            "Quercitrin (C21H20O11)",
            "Starch (C6H10O5)n",
            "Cellulose (C6H10O5)n",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Sucrose (C12H22O11)",
            "Arbutin (C12H16O7)",
            "Naringin (C27H32O14)",
            "Salicin (C13H18O7)",
            "Vanillin glycoside (C9H10O4)",
            "Cyanogenic glycosides (various types)",
            "Oligosaccharides (various types)",
            "Glycyrrhizin (C42H62O16)",
            "Hesperidin (C28H34O15)",
            "Genistin (C21H20O10)",
            "Diosgenin glycosides (various types)",
            "Chicoric acid (C22H18O12)",
            "Bitter almond glycoside (C20H27NO11)",
            "Curcumin glycosides (various types)",
            "Isoflavonoid glycosides (various types)",
            "Flavonoid glycosides (various types)",
            "Triterpenoid glycosides (various types)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Arbutin",
                "Hesperidin"
            ],
            "mismatches": [
                "Saponin (various types)",
                "Rutin (C27H30O16)",
                "Quercitrin (C21H20O11)",
                "Starch (C6H10O5)n",
                "Cellulose (C6H10O5)n",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Sucrose (C12H22O11)",
                "Naringin (C27H32O14)",
                "Salicin (C13H18O7)",
                "Vanillin glycoside (C9H10O4)",
                "Cyanogenic glycosides (various types)",
                "Oligosaccharides (various types)",
                "Glycyrrhizin (C42H62O16)",
                "Genistin (C21H20O10)",
                "Diosgenin glycosides (various types)",
                "Chicoric acid (C22H18O12)",
                "Bitter almond glycoside (C20H27NO11)",
                "Curcumin glycosides (various types)",
                "Isoflavonoid glycosides (various types)",
                "Flavonoid glycosides (various types)",
                "Triterpenoid glycosides (various types)"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Chloralose",
                "Daunorubicin",
                "Digitalis Glycosides",
                "Digitonin",
                "Doxorubicin",
                "Framycetin",
                "Galactosides",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Iridoid Glycosides",
                "Mannosides",
                "Methylglucosides",
                "Methylglycosides",
                "Phlorhizin",
                "Quillaja Saponins",
                "Saponins",
                "Sisomicin",
                "Solanine",
                "Teichoic Acids",
                "Thioglycosides",
                "Uridine Diphosphate Glucose",
                "Uridine Diphosphate N-Acetylglucosamine",
                "Uridine Diphosphate Xylose"
            ],
            "TP": 3,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin (AMY)",
            "Arbutin (ARB)",
            "Aesculin (AESC)",
            "Salicin (SAL)",
            "Digitoxin (DIG)",
            "Digoxin (DOX)",
            "Oleandrin (OLE)",
            "Strophanthin (STRO)",
            "Convolvulin (CON)",
            "Saponin (SAP)",
            "Glycyrrhizin (GLY)",
            "Prunasin (PRU)",
            "Naringin (NAR)",
            "Hesperidin (HESP)",
            "Rutin (RUT)",
            "Quercitrin (QUR)",
            "Apigenin-7-glucoside (API)",
            "Gentiobiose (GEN)",
            "Lactose (LAC)",
            "Maltose (MAL)",
            "Cellobiose (CEL)",
            "Sucrose (SUC)",
            "Isomaltose (ISO)",
            "Trehalose (TRE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin (AMY)",
                "Arbutin (ARB)",
                "Digitoxin (DIG)",
                "Digoxin (DOX)",
                "Hesperidin (HESP)"
            ],
            "mismatches": [
                "Aesculin (AESC)",
                "Salicin (SAL)",
                "Oleandrin (OLE)",
                "Strophanthin (STRO)",
                "Convolvulin (CON)",
                "Saponin (SAP)",
                "Glycyrrhizin (GLY)",
                "Prunasin (PRU)",
                "Naringin (NAR)",
                "Rutin (RUT)",
                "Quercitrin (QUR)",
                "Apigenin-7-glucoside (API)",
                "Gentiobiose (GEN)",
                "Lactose (LAC)",
                "Maltose (MAL)",
                "Cellobiose (CEL)",
                "Sucrose (SUC)",
                "Isomaltose (ISO)",
                "Trehalose (TRE)"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Escin",
                "Esculin",
                "Framycetin",
                "Galactosides",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Isopropyl Thiogalactoside",
                "Novobiocin",
                "Nucleotides",
                "Ouabain",
                "Phlorhizin",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Streptomycin",
                "Streptothricins",
                "Strophanthins",
                "Uridine Diphosphate Galactose",
                "Uridine Diphosphate Glucose"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin",
            "Salicin",
            "Stevioside",
            "Digitoxin",
            "Erythromycin",
            "Gentamicin",
            "Streptomycin",
            "Doxorubicin",
            "Vancomycin",
            "Cephalosporin",
            "Penicillin",
            "Tetracycline",
            "Neomycin",
            "Erythromycin A",
            "Erythromycin B",
            "Erythromycin C",
            "Erythromycin D",
            "Erythromycin E",
            "Erythromycin F",
            "Erythromycin G",
            "Erythromycin H",
            "Erythromycin I",
            "Erythromycin J",
            "Erythromycin K"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Digitoxin",
                "Gentamicin",
                "Streptomycin",
                "Doxorubicin",
                "Neomycin"
            ],
            "mismatches": [
                "Salicin",
                "Stevioside",
                "Erythromycin",
                "Vancomycin",
                "Cephalosporin",
                "Penicillin",
                "Tetracycline",
                "Erythromycin A",
                "Erythromycin B",
                "Erythromycin C",
                "Erythromycin D",
                "Erythromycin E",
                "Erythromycin F",
                "Erythromycin G",
                "Erythromycin H",
                "Erythromycin I",
                "Erythromycin J",
                "Erythromycin K"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amikacin",
                "Aminoglycosides",
                "Amygdalin",
                "Anthracyclines",
                "Carubicin",
                "Chloralose",
                "Clindamycin",
                "Daunorubicin",
                "Digitoxin",
                "Digoxin",
                "Dihydrostreptomycin Sulfate",
                "Doxorubicin",
                "Framycetin",
                "Gentamicins",
                "Hygromycin B",
                "Kanamycin",
                "Nebramycin",
                "Neomycin",
                "Ouabain",
                "Plicamycin",
                "Saponins",
                "Solanine",
                "Spectinomycin",
                "Streptomycin",
                "Tobramycin"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Digoxin (DIG)",
            "Amygdalin",
            "Salicin",
            "Rutin",
            "Quercitrin",
            "Genistin",
            "Glycyrrhizin",
            "Astragalin",
            "Amarogentin",
            "Arbutin",
            "Hesperidin",
            "Naringin",
            "Oleuropein",
            "Sennoside",
            "Stevioside",
            "Isoflavone Glycosides",
            "Anthocyanin Glycosides",
            "Saponin Glycosides",
            "Ouabain",
            "Luteolin Glycosides",
            "Cyanidin",
            "Eschenmoside",
            "Hepegenin Glycosides",
            "Neoastilbin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digoxin",
                "Amygdalin",
                "Arbutin",
                "Hesperidin",
                "Ouabain"
            ],
            "mismatches": [
                "Salicin",
                "Rutin",
                "Quercitrin",
                "Genistin",
                "Glycyrrhizin",
                "Astragalin",
                "Amarogentin",
                "Naringin",
                "Oleuropein",
                "Sennoside",
                "Stevioside",
                "Isoflavone Glycosides",
                "Anthocyanin Glycosides",
                "Saponin Glycosides",
                "Luteolin Glycosides",
                "Cyanidin",
                "Eschenmoside",
                "Hepegenin Glycosides",
                "Neoastilbin"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Escin",
                "Framycetin",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Iridoid Glucosides",
                "Iridoid Glycosides",
                "Menogaril",
                "Neomycin",
                "Netilmicin",
                "Novobiocin",
                "Ouabain",
                "Phlorhizin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin (Laetrile)",
            "Digitoxin",
            "Ouabain",
            "Salicin",
            "Stevioside",
            "Phlorizin",
            "Rutin",
            "Quercetin-3-O-glucoside (Isoquercitrin)",
            "Sinigrin",
            "Arbutin",
            "Convallatoxin",
            "Digoxin",
            "Hesperidin",
            "Naringin",
            "Saponin",
            "Anthocyanin",
            "Oleuropein",
            "Ginsenoside",
            "Glycyrrhizin",
            "Sennidin A",
            "Diosgenin",
            "Hecogenin",
            "Strophanthin-K",
            "Scillaren A"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Digitoxin",
                "Ouabain",
                "Arbutin",
                "Digoxin",
                "Hesperidin",
                "Saponins"
            ],
            "mismatches": [
                "Amygdalin (Laetrile)",
                "Salicin",
                "Stevioside",
                "Phlorizin",
                "Rutin",
                "Quercetin-3-O-glucoside (Isoquercitrin)",
                "Sinigrin",
                "Convallatoxin",
                "Naringin",
                "Anthocyanin",
                "Oleuropein",
                "Ginsenoside",
                "Glycyrrhizin",
                "Sennidin A",
                "Diosgenin",
                "Hecogenin",
                "Strophanthin-K",
                "Scillaren A"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Aclarubicin",
                "Amikacin",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Canagliflozin",
                "Carubicin",
                "Chloralose",
                "Daunorubicin",
                "Deslanoside",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Esculin",
                "Framycetin",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Iridoid Glucosides",
                "Kanamycin",
                "Medigoxin",
                "Netilmicin",
                "Ouabain",
                "Phlorhizin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine",
                "Spectinomycin",
                "Streptomycin",
                "Streptozocin",
                "Strophanthins"
            ],
            "TP": 7,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin",
            "Arbutin",
            "Castanospermine",
            "Convallatoxin",
            "Crocin",
            "Curcumin glucoside",
            "Cycasin",
            "Cyanidin-3-glucoside",
            "Daidzin",
            "Dhurrin",
            "Digitoxin",
            "Fraxin",
            "Gentiopicroside",
            "Glycyrrhizin",
            "Hesperidin",
            "Indican",
            "Linamarin",
            "Naringin",
            "Ouabain",
            "Phloridzin",
            "Quercitrin",
            "Salicin",
            "Sinigrin",
            "Stevioside"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Arbutin",
                "Digitoxin",
                "Hesperidin",
                "Ouabain"
            ],
            "mismatches": [
                "Castanospermine",
                "Convallatoxin",
                "Crocin",
                "Curcumin glucoside",
                "Cycasin",
                "Cyanidin-3-glucoside",
                "Daidzin",
                "Dhurrin",
                "Fraxin",
                "Gentiopicroside",
                "Glycyrrhizin",
                "Indican",
                "Linamarin",
                "Naringin",
                "Phloridzin",
                "Quercitrin",
                "Salicin",
                "Sinigrin",
                "Stevioside"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Canagliflozin",
                "Carubicin",
                "Chloralose",
                "Clindamycin",
                "Daunorubicin",
                "Dibekacin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Esculin",
                "Etoposide",
                "Framycetin",
                "Glucosides",
                "Hesperidin",
                "Idarubicin",
                "Iridoid Glucosides",
                "Lincomycin",
                "Medigoxin",
                "Netilmicin",
                "Ouabain",
                "Phlorhizin",
                "Proscillaridin",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin (D-mandelonitrile-\u03b2-D-gentiobioside)",
            "Arbutin (4-hydroxyphenyl-\u03b2-D-glucopyranoside)",
            "Cardiac Glycosides (e.g., Digoxin, Digitoxin)",
            "Cyanogenic Glycosides (e.g., Linamarin, Lotaustralin)",
            "Diosgenin (25R-spirost-5-en-3\u03b2-ol)",
            "Flavonoid Glycosides (e.g., Rutin, Quercitrin)",
            "Genistein (4',5,7-Trihydroxyisoflavone)",
            "Ginsenosides (e.g., Ginsenoside Rb1, Ginsenoside Rg1)",
            "Glycyrrhizin (20\u03b2-Carboxy-11-deoxy-16\u03b1-glycyrrhetinic acid)",
            "Hesperidin (5,7,3'-Trihydroxy-4'-methoxyflavanone 7-rutinoside)",
            "Indican (Indoxyl-\u03b2-D-glucoside)",
            "Isoflavone Glycosides (e.g., Genistin, Daidzin)",
            "Limonin (1,7-Dioxadispiro[5.1.5.2]hexadecane)",
            "Naringin (4',5,7-Trihydroxyflavanone 7-rhamnoglucoside)",
            "Oleanolic Acid Glycosides (e.g., Oleanolic Acid 3-O-monodesmosides)",
            "Phlorizin (Phloretin 2'-O-glucose)",
            "Saponins (e.g., Avenacosides, Ginsenosides)",
            "Sennosides (e.g., Sennoside A, Sennoside B)",
            "Solanine (\u03b1-Solanine, \u03b1-Tomatine)",
            "Steviol Glycosides (e.g., Stevioside, Rebaudioside A)",
            "Strophanthidin Glycosides (e.g., Ouabain, Cymarin)",
            "Triterpene Glycosides (e.g., Ginsenosides, Quillaic Acid Glycosides)",
            "Xanthone Glycosides (e.g., Mangiferin, Isomangiferin)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin",
                "Arbutin",
                "Cardiac Glycosides",
                "Ginsenosides",
                "Hesperidin",
                "Phlorhizin",
                "Saponins",
                "Sennosides",
                "Solanine"
            ],
            "mismatches": [
                "Cyanogenic Glycosides (e.g., Linamarin, Lotaustralin)",
                "Diosgenin (25R-spirost-5-en-3\u03b2-ol)",
                "Flavonoid Glycosides (e.g., Rutin, Quercitrin)",
                "Genistein (4',5,7-Trihydroxyisoflavone)",
                "Glycyrrhizin (20\u03b2-Carboxy-11-deoxy-16\u03b1-glycyrrhetinic acid)",
                "Indican (Indoxyl-\u03b2-D-glucoside)",
                "Isoflavone Glycosides (e.g., Genistin, Daidzin)",
                "Limonin (1,7-Dioxadispiro[5.1.5.2]hexadecane)",
                "Naringin (4',5,7-Trihydroxyflavanone 7-rhamnoglucoside)",
                "Oleanolic Acid Glycosides (e.g., Oleanolic Acid 3-O-monodesmosides)",
                "Steviol Glycosides (e.g., Stevioside, Rebaudioside A)",
                "Strophanthidin Glycosides (e.g., Ouabain, Cymarin)",
                "Triterpene Glycosides (e.g., Ginsenosides, Quillaic Acid Glycosides)",
                "Xanthone Glycosides (e.g., Mangiferin, Isomangiferin)"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Cardiac Glycosides",
                "Carubicin",
                "Chloralose",
                "Digitalis Glycosides",
                "Digitoxin",
                "Digoxin",
                "Galactosides",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Holothurin",
                "Iridoid Glucosides",
                "Iridoid Glycosides",
                "Lanatosides",
                "Mannosides",
                "Ouabain",
                "Phlorhizin",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Streptozocin",
                "Thioglycosides"
            ],
            "TP": 9,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Amygdalin (AMG)",
            "Arbutin (ARB)",
            "Cardiac Glycosides (CG)",
            "Convallatoxin (CVT)",
            "Digitoxin (DTX)",
            "Emodin (EMD)",
            "Esculin (ESC)",
            "Flavonoid Glycosides (FG)",
            "Glucoraphanin (GRP)",
            "Glycyrrhizin (GLY)",
            "Hesperidin (HES)",
            "Linamarin (LIN)",
            "Naringin (NAR)",
            "Oleanolic Acid Glycosides (OAG)",
            "Phloridzin (PHZ)",
            "Quercitrin (QRC)",
            "Rutin (RUT)",
            "Saponins (SAP)",
            "Scrophularioside (SCR)",
            "Sennosides (SEN)",
            "Solanine (SOL)",
            "Stevioside (STV)",
            "Theaflavins (TFL)",
            "Vincristine (VCR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Amygdalin (AMG)",
                "Arbutin (ARB)",
                "Cardiac Glycosides (CG)",
                "Digitoxin (DTX)",
                "Esculin (ESC)",
                "Hesperidin (HES)",
                "Saponins (SAP)",
                "Sennosides (SEN)",
                "Solanine (SOL)"
            ],
            "mismatches": [
                "Convallatoxin (CVT)",
                "Emodin (EMD)",
                "Flavonoid Glycosides (FG)",
                "Glucoraphanin (GRP)",
                "Glycyrrhizin (GLY)",
                "Linamarin (LIN)",
                "Naringin (NAR)",
                "Oleanolic Acid Glycosides (OAG)",
                "Phloridzin (PHZ)",
                "Quercitrin (QRC)",
                "Rutin (RUT)",
                "Scrophularioside (SCR)",
                "Stevioside (STV)",
                "Theaflavins (TFL)",
                "Vincristine (VCR)"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Adenosine Diphosphate Glucose",
                "Amygdalin",
                "Anthocyanins",
                "Arbutin",
                "Atractyloside",
                "Canagliflozin",
                "Cardiac Glycosides",
                "Chloralose",
                "Daunorubicin",
                "Digitalis Glycosides",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Epirubicin",
                "Escin",
                "Esculin",
                "Framycetin",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Holothurin",
                "Inotuzumab Ozogamicin",
                "Iridoid Glycosides",
                "Lincomycin",
                "Lincosamides",
                "Methylglycosides",
                "Ouabain",
                "Phlorhizin",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine",
                "Streptozocin",
                "Thioglycosides",
                "Uridine Diphosphate Glucuronic Acid"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Rutin (Quercetin-3-O-rutinoside)",
            "Digoxin (Digitoxigenin-3-O-digitoxoside)",
            "Saponin",
            "Gentiopicroside",
            "Glycyrrhizin",
            "Aesculin",
            "Sambunigrin",
            "Papaverine",
            "Cymarin",
            "Astragaloside IV",
            "Ginsenoside Rb1",
            "Ginsenoside Rg1",
            "Triterpenoid saponin",
            "Quillajaponin",
            "Oleanolic acid 28-O-\u03b2-D-glucopyranoside",
            "Lupeol 3-O-\u03b2-D-glucopyranoside",
            "\u03b2-Sitosterol 3-O-\u03b2-D-glucopyranoside",
            "Ursolic acid 28-O-\u03b2-D-glucopyranoside",
            "Triterpenoid glycoside",
            "Saponin glycoside",
            "Glycoside of phenolic acids",
            "Glycoside of anthraquinones",
            "Glycoside of flavonoids",
            "Glycoside of terpenoids",
            "Glycoside of alkaloids",
            "Glycoside of amino acids"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digoxin",
                "Saponins",
                "Ginsenosides",
                "Quillaja Saponins"
            ],
            "mismatches": [
                "Rutin (Quercetin-3-O-rutinoside)",
                "Gentiopicroside",
                "Glycyrrhizin",
                "Aesculin",
                "Sambunigrin",
                "Papaverine",
                "Cymarin",
                "Astragaloside IV",
                "Ginsenoside Rb1",
                "Ginsenoside Rg1",
                "Triterpenoid saponin",
                "Quillajaponin",
                "Oleanolic acid 28-O-\u03b2-D-glucopyranoside",
                "Lupeol 3-O-\u03b2-D-glucopyranoside",
                "\u03b2-Sitosterol 3-O-\u03b2-D-glucopyranoside",
                "Ursolic acid 28-O-\u03b2-D-glucopyranoside",
                "Triterpenoid glycoside",
                "Saponin glycoside",
                "Glycoside of phenolic acids",
                "Glycoside of anthraquinones",
                "Glycoside of flavonoids",
                "Glycoside of terpenoids",
                "Glycoside of alkaloids",
                "Glycoside of amino acids"
            ],
            "true_referents": [
                "3-O-Methylglucose",
                "Amygdalin",
                "Arbutin",
                "Atractyloside",
                "Cardiac Glycosides",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Escin",
                "Esculin",
                "Ginsenosides",
                "Glucosides",
                "Hesperidin",
                "Iridoid Glucosides",
                "Iridoid Glycosides",
                "Menogaril",
                "Methylgalactosides",
                "Phlorhizin",
                "Poly Adenosine Diphosphate Ribose",
                "Proscillaridin",
                "Quillaja Saponins",
                "Saponins",
                "Sennosides",
                "Solanine",
                "Thiogalactosides",
                "Tomatine"
            ],
            "TP": 4,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            "Digoxin (CD)",
            "Ouabain (GSt)",
            "Strophanthin (KSt)",
            "Convalatoside",
            "Digitoxin",
            "Gitoxin",
            "Gitalin",
            "Acetyldigitoxin",
            "Deslanoside",
            "Digitoxigenin",
            "Gitogenin",
            "Strophanthidin",
            "Strophanthidol",
            "Apiin",
            "Vitexin",
            "Prunasin",
            "Phlorizin",
            "Arbutin",
            "Salicin",
            "Helicin",
            "Populin",
            "Amygdalin",
            "Vicianin",
            "Primulacetal A"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Digoxin (CD)",
                "Ouabain (GSt)",
                "Digitoxin",
                "Deslanoside",
                "Arbutin",
                "Amygdalin"
            ],
            "mismatches": [
                "Strophanthin (KSt)",
                "Convalatoside",
                "Gitoxin",
                "Gitalin",
                "Acetyldigitoxin",
                "Digitoxigenin",
                "Gitogenin",
                "Strophanthidin",
                "Strophanthidol",
                "Apiin",
                "Vitexin",
                "Prunasin",
                "Phlorizin",
                "Salicin",
                "Helicin",
                "Populin",
                "Vicianin",
                "Primulacetal A"
            ],
            "true_referents": [
                "Acetyldigitoxins",
                "Acetyldigoxins",
                "Amygdalin",
                "Arbutin",
                "Atractyloside",
                "Carubicin",
                "Daunorubicin",
                "Deslanoside",
                "Digitalis Glycosides",
                "Digitonin",
                "Digitoxin",
                "Digoxin",
                "Doxorubicin",
                "Escin",
                "Esculin",
                "Etoposide",
                "Framycetin",
                "Ginsenosides",
                "Hesperidin",
                "Lanatosides",
                "Mannosides",
                "Medigoxin",
                "Nitrophenylgalactosides",
                "Ouabain",
                "Phlorhizin",
                "Poly Adenosine Diphosphate Ribose",
                "Saponins",
                "Sennosides",
                "Sisomicin",
                "Solanine",
                "Streptomycin",
                "Streptothricins",
                "Streptozocin",
                "Strophanthins",
                "Tomatine",
                "Uridine Diphosphate N-Acetylmuramic Acid"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "glycoside",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            },
            {
                "Referent": "Acetylcholine",
                "Canonical Name": "acetylcholine"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Acetylcholine"
            ],
            "true_referents": [
                "Adenosine Diphosphate Glucose",
                "Uridine Diphosphate Glucose",
                "Uridine Diphosphate Glucuronic Acid"
            ],
            "TP": 0,
            "FP": 1,
            "FN": 0
        }
    }
]