[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Phytol ((E)-phyt-1-en-1-ol)",
            "Geranyl acetate (GerA)",
            "Farnesol (Far)",
            "Farnesyl acetate",
            "Geranyl formate",
            "Nerolidol (Nerd)",
            "Farnesyl propionate",
            "Farnesyl butyrate",
            "Farnesyl hexanoate",
            "Farnesyl octanoate",
            "Farnesyl decanoate",
            "Farnesyl benzoate",
            "Farnesyl salicylate",
            "Farnesyl cinnamate",
            "Farnesyl lactate",
            "Farnesyl maleate",
            "Farnesyl fumarate",
            "Farnesyl citrate",
            "Farnesyl tartrate",
            "Farnesyl succinate",
            "Farnesyl malate",
            "Farnesyl glutarate",
            "Farnesyl adipate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Phytol ((E)-phyt-1-en-1-ol)",
                "Geranyl acetate (GerA)",
                "Farnesol (Far)",
                "Farnesyl acetate",
                "Geranyl formate",
                "Nerolidol (Nerd)",
                "Farnesyl propionate",
                "Farnesyl butyrate",
                "Farnesyl hexanoate",
                "Farnesyl octanoate",
                "Farnesyl decanoate",
                "Farnesyl benzoate",
                "Farnesyl salicylate",
                "Farnesyl cinnamate",
                "Farnesyl lactate",
                "Farnesyl maleate",
                "Farnesyl fumarate",
                "Farnesyl citrate",
                "Farnesyl tartrate",
                "Farnesyl succinate",
                "Farnesyl malate",
                "Farnesyl glutarate",
                "Farnesyl adipate"
            ],
            "true_referents": [
                "(R)-nephthenol",
                "(Z)-biformene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4-hydroxycasbene",
                "8-hydroxycasbene",
                "9beta-pimara-7,15-diene",
                "abieta-7,13-diene",
                "beta-araneosene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "ent-trachyloban-3beta-ol",
                "erinacol",
                "fusicocca-2,10(14)-diene",
                "grayanotoxane",
                "isopimara-8(14),15-diene",
                "isopimarane",
                "labda-7,13(16),14-triene",
                "multidienyl diphosphate(3-)",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pseudolaratriene",
                "serratol",
                "spiroalbatene",
                "stemar-13-ene",
                "stemod-13(17)-ene",
                "syn-isopimara-7,15-diene",
                "syn-labda-8(17),12E,14-triene",
                "taxa-4,11-diene",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene (C15H24)",
            "Farnesene (C15H24)",
            "Zingiberene (C15H24)",
            "Caryophyllene (C15H24)",
            "Bisabolene (C15H24)",
            "Guaiol (C15H26O)",
            "Cadinene (C15H24)",
            "Humulene (C15H24)",
            "Spathulenol (C15H26O)",
            "Alpha-copaene (C15H24)",
            "Beta-cubebene (C15H24)",
            "Alpha-farnesene (C15H24)",
            "Gamma-muurolene (C15H24)",
            "Delta-cadinene (C15H24)",
            "Longifolene (C15H24)",
            "Valencene (C15H24)",
            "Bicyclogermacrene (C15H24)",
            "Nerolidol (C15H26O)",
            "Caryophyllene oxide (C15H24O)",
            "Alpha-gurjunene (C15H24)",
            "Beta-selinene (C15H24)",
            "Bisabolol (C15H28O)",
            "Delta-cadinol (C15H26O)",
            "Eudesmol (C15H26O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Sesquiterpene (C15H24)",
                "Farnesene (C15H24)",
                "Zingiberene (C15H24)",
                "Caryophyllene (C15H24)",
                "Bisabolene (C15H24)",
                "Guaiol (C15H26O)",
                "Cadinene (C15H24)",
                "Humulene (C15H24)",
                "Spathulenol (C15H26O)",
                "Alpha-copaene (C15H24)",
                "Beta-cubebene (C15H24)",
                "Alpha-farnesene (C15H24)",
                "Gamma-muurolene (C15H24)",
                "Delta-cadinene (C15H24)",
                "Longifolene (C15H24)",
                "Valencene (C15H24)",
                "Bicyclogermacrene (C15H24)",
                "Nerolidol (C15H26O)",
                "Caryophyllene oxide (C15H24O)",
                "Alpha-gurjunene (C15H24)",
                "Beta-selinene (C15H24)",
                "Bisabolol (C15H28O)",
                "Delta-cadinol (C15H26O)",
                "Eudesmol (C15H26O)"
            ],
            "true_referents": [
                "(-)-spiroviolene",
                "(12E)-labda-8(17),12,14-triene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "(Z)-biformene",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4,8-dihydroxycasbene",
                "4-hydroxycasbene",
                "5,8-dihydroxy-4-oxocasbene",
                "alpha-camphorene",
                "beta-pinacene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "gamma-camphorene",
                "isocatenula-2(6),14-diene",
                "terpentetriene",
                "verticilla-3,7,11(12)-triene",
                "verticilla-3,7,12(13)-triene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Squalene (SQ)",
            "Geranylgeraniol",
            "Phytol",
            "Farnesyl pyrophosphate",
            "Geranylgeranyl pyrophosphate",
            "Geranylgeraniol",
            "Squalene-2,3-epoxide",
            "2,3-Oxidosqualene",
            "Tetrahymanol",
            "Lanosterol",
            "Cycloartenol",
            "Dammarenediol",
            "\u03b2-Amyrin",
            "\u03b1-Amyrin",
            "Lupeol",
            "Betulin",
            "Betulinic acid",
            "Oleanolic acid",
            "Ursolic acid",
            "Ginsenoside",
            "Saponin",
            "Triterpenoid",
            "Sterol",
            "Triterpene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "phytane"
            ],
            "mismatches": [
                "Squalene (SQ)",
                "Geranylgeraniol",
                "Phytol",
                "Farnesyl pyrophosphate",
                "Geranylgeranyl pyrophosphate",
                "Squalene-2,3-epoxide",
                "2,3-Oxidosqualene",
                "Tetrahymanol",
                "Lanosterol",
                "Cycloartenol",
                "Dammarenediol",
                "\u03b2-Amyrin",
                "\u03b1-Amyrin",
                "Lupeol",
                "Betulin",
                "Betulinic acid",
                "Oleanolic acid",
                "Ursolic acid",
                "Ginsenoside",
                "Saponin",
                "Triterpenoid",
                "Sterol",
                "Triterpene"
            ],
            "true_referents": [
                "(R)-nephthenol",
                "11-O-acetylcyathin A3",
                "15,16-epoxydolabrene",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4-hydroxy-8-oxocasbene",
                "5,8-dihydroxy-4-oxocasbene",
                "8-hydroxycasbene",
                "8-oxocasbene",
                "Rhodojaponin III",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "abietatriene",
                "alpha-camphorene",
                "beta-araneosene",
                "beta-pinacene",
                "cycloaraneosene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "ent-cassa-12,15-diene",
                "ent-gibberellane",
                "grayanotoxane",
                "isocatenula-2(6),14-diene",
                "isocatenula-2,14-diene",
                "isopimara-7,15-diene",
                "isopimara-8(14),15-diene",
                "labdane",
                "multidienyl diphosphate(3-)",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pseudolaratriene",
                "sclarene",
                "serratol",
                "sobralene",
                "stemod-13(17)-ene",
                "terpentetriene"
            ],
            "TP": 1,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Squalene (SQ)",
            "Phytol (PT)",
            "Geranylgeraniol (GGOH)",
            "Farnesol (FS)",
            "Nerolidol (ND)",
            "Bisabolol (BS)",
            "Amorphadiene (AM)",
            "\u03b2-Farnesene (BFN)",
            "\u03b1-Farnesene (AFN)",
            "Camphene (CP)",
            "Caryophyllene (CR)",
            "Humulene (HM)",
            "Limonene oxide (LO)",
            "Pinene oxide (PO)",
            "Thujopsene (TJ)",
            "Isophytol (IP)",
            "Geranyl Farnesol (GF)",
            "Farnesyl pyrophosphate (FPP)",
            "Geranyl pyrophosphate (GPP)",
            "Taxadiene (TD)",
            "Abietic acid (AA)",
            "Dehydroabietic acid (DAA)",
            "Levopimaric acid (LPA)",
            "Pimaric acid (PA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Squalene (SQ)",
                "Phytol (PT)",
                "Geranylgeraniol (GGOH)",
                "Farnesol (FS)",
                "Nerolidol (ND)",
                "Bisabolol (BS)",
                "Amorphadiene (AM)",
                "\u03b2-Farnesene (BFN)",
                "\u03b1-Farnesene (AFN)",
                "Camphene (CP)",
                "Caryophyllene (CR)",
                "Humulene (HM)",
                "Limonene oxide (LO)",
                "Pinene oxide (PO)",
                "Thujopsene (TJ)",
                "Isophytol (IP)",
                "Geranyl Farnesol (GF)",
                "Farnesyl pyrophosphate (FPP)",
                "Geranyl pyrophosphate (GPP)",
                "Taxadiene (TD)",
                "Abietic acid (AA)",
                "Dehydroabietic acid (DAA)",
                "Levopimaric acid (LPA)",
                "Pimaric acid (PA)"
            ],
            "true_referents": [
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "(Z)-biformene",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4-hydroxy-8-oxocasbene",
                "9beta-pimara-7,15-diene",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "abietatriene",
                "alpha-camphorene",
                "beta-araneosene",
                "beta-pinacene",
                "cembrene C",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "ent-trachyloban-3beta-ol",
                "fusicocca-2,10(14)-diene",
                "gamma-camphorene",
                "grayanotoxane",
                "isocatenula-2,14-diene",
                "isopimara-8(14),15-diene",
                "isopimarane",
                "multidienyl diphosphate(3-)",
                "neoabietadiene",
                "neophytadiene",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pimara-8(14),15-diene",
                "pimarane",
                "rosane",
                "sclarene",
                "serratol",
                "sobralene",
                "spiroalbatene",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "taxane",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Squalene (C30H50)",
            "Lycopene (C40H56)",
            "Beta-Carotene (C40H56)",
            "Limonene (C10H16)",
            "Geranylgeraniol (C20H34O)",
            "Phytol (C20H40O)",
            "Farnesene (C15H24)",
            "Nerolidol (C15H26O)",
            "Bisabolene (C15H24)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Farnesol (C15H26O)",
            "Retinol (C20H30O)",
            "Retinal (C20H28O)",
            "Retinoic Acid (C20H28O2)",
            "Astaxanthin (C40H52O4)",
            "Zeaxanthin (C40H56O2)",
            "Lutein (C40H56O2)",
            "Cryptoxanthin (C40H56O)",
            "Capsanthin (C40H56O3)",
            "Violaxanthin (C40H56O4)",
            "Neoxanthin (C40H56O4)",
            "Tocopherol (C29H50O2)",
            "Tocotrienol (C29H44O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Squalene (C30H50)",
                "Lycopene (C40H56)",
                "Beta-Carotene (C40H56)",
                "Limonene (C10H16)",
                "Geranylgeraniol (C20H34O)",
                "Phytol (C20H40O)",
                "Farnesene (C15H24)",
                "Nerolidol (C15H26O)",
                "Bisabolene (C15H24)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Farnesol (C15H26O)",
                "Retinol (C20H30O)",
                "Retinal (C20H28O)",
                "Retinoic Acid (C20H28O2)",
                "Astaxanthin (C40H52O4)",
                "Zeaxanthin (C40H56O2)",
                "Lutein (C40H56O2)",
                "Cryptoxanthin (C40H56O)",
                "Capsanthin (C40H56O3)",
                "Violaxanthin (C40H56O4)",
                "Neoxanthin (C40H56O4)",
                "Tocopherol (C29H50O2)",
                "Tocotrienol (C29H44O2)"
            ],
            "true_referents": [
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-axinyssene",
                "(R)-nephthenol",
                "(Z)-biformene",
                "15,16-epoxydolabrene",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4-hydroxy-8-oxocasbene",
                "4-hydroxycasbene",
                "5,8-dihydroxy-4-oxocasbene",
                "8-hydroxycasbene",
                "abietatriene",
                "beta-araneosene",
                "beta-pinacene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "isocatenula-2(6),14-diene",
                "labda-7,13(16),14-triene",
                "neophytadiene",
                "phomacta-1(15),3,7-triene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "rhizathalene A",
                "taxa-420,1112-diene",
                "terpentetriene",
                "verticilla-3,7,11(12)-triene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Camphor (C10H16O)",
            "Carvone (C10H14O)",
            "Borneol (C10H18O)",
            "Fenchone (C10H14O)",
            "Thujone (C10H14O)",
            "Myrcene (C10H16)",
            "Terpinolene (C10H16)",
            "Geraniol (C10H18O)",
            "Nerol (C10H18O)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Farnesene (C15H24)",
            "Squalene (C30H50)",
            "Squalane (C30H62)",
            "Linalool (C10H18O)",
            "Bicyclogermacrene (C15H24)",
            "Cadinene (C15H24)",
            "Germacrene D (C15H24)",
            "Caryophyllene oxide (C15H24O)",
            "Valerenic acid (C15H22O2)",
            "Dihydrocarvone (C10H18O)",
            "\u03b1-Terpinene (C10H16)",
            "\u03b2-Pinene (C10H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Camphor (C10H16O)",
                "Carvone (C10H14O)",
                "Borneol (C10H18O)",
                "Fenchone (C10H14O)",
                "Thujone (C10H14O)",
                "Myrcene (C10H16)",
                "Terpinolene (C10H16)",
                "Geraniol (C10H18O)",
                "Nerol (C10H18O)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Farnesene (C15H24)",
                "Squalene (C30H50)",
                "Squalane (C30H62)",
                "Linalool (C10H18O)",
                "Bicyclogermacrene (C15H24)",
                "Cadinene (C15H24)",
                "Germacrene D (C15H24)",
                "Caryophyllene oxide (C15H24O)",
                "Valerenic acid (C15H22O2)",
                "Dihydrocarvone (C10H18O)",
                "\u03b1-Terpinene (C10H16)",
                "\u03b2-Pinene (C10H16)"
            ],
            "true_referents": [
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4,8-dihydroxycasbene",
                "4-hydroxycasbene",
                "5,8-dihydroxy-4-oxocasbene",
                "8-hydroxycasbene",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "alpha-camphorene",
                "beta-pinacene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "fusicocca-2,10(14)-diene",
                "gamma-camphorene",
                "isocatenula-2(6),14-diene",
                "terpentetriene",
                "verticilla-3,7,11(12)-triene",
                "verticilla-3,7,12(13)-triene"
            ],
            "TP": 0,
            "FP": 25,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Terpinene (C10H16)",
            "Cymene (C10H14)",
            "Camphene (C10H16)",
            "Sabinene (C10H16)",
            "Carene (C10H16)",
            "Bornyl acetate (C12H20O2)",
            "Terpinolene (C10H16)",
            "Phellandrene (C10H16)",
            "Fenchone (C10H16O)",
            "Thujone (C10H16O)",
            "Anethole (C10H12O)",
            "Estragole (C10H12O)",
            "Thymol (C10H14O)",
            "Carvacrol (C10H14O)",
            "1,8-Cineole (C10H18O)",
            "Linalool (C10H18O)",
            "Camphor (C10H16O)",
            "Borneol (C10H18O)",
            "Menthol (C10H20O)",
            "Eucalyptol (C10H18O)",
            "Geraniol (C10H18O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Terpinene (C10H16)",
                "Cymene (C10H14)",
                "Camphene (C10H16)",
                "Sabinene (C10H16)",
                "Carene (C10H16)",
                "Bornyl acetate (C12H20O2)",
                "Terpinolene (C10H16)",
                "Phellandrene (C10H16)",
                "Fenchone (C10H16O)",
                "Thujone (C10H16O)",
                "Anethole (C10H12O)",
                "Estragole (C10H12O)",
                "Thymol (C10H14O)",
                "Carvacrol (C10H14O)",
                "1,8-Cineole (C10H18O)",
                "Linalool (C10H18O)",
                "Camphor (C10H16O)",
                "Borneol (C10H18O)",
                "Menthol (C10H20O)",
                "Eucalyptol (C10H18O)",
                "Geraniol (C10H18O)"
            ],
            "true_referents": [
                "(12E)-labda-8(17),12,14-triene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4,8-dihydroxycasbene",
                "8-hydroxycasbene",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "alpha-camphorene",
                "beta-pinacene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "ent-cassa-12,15-diene",
                "gamma-camphorene",
                "isocatenula-2(6),14-diene",
                "isocatenula-2,14-diene",
                "syn-labda-8(17),12E,14-triene",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene",
            "Diterpene",
            "Triterpene",
            "Tetraterpene",
            "Polyterpene",
            "Squalene",
            "Limonene",
            "Pinene",
            "Myrcene",
            "Caryophyllene",
            "Humulene",
            "Germacrene",
            "Farnesene",
            "Bisabolene",
            "Cedrene",
            "Taxadiene",
            "Gibberellin",
            "Carotenoid",
            "Steroid",
            "Saponin",
            "Ginsenoside",
            "Triterpene glycoside",
            "Triterpenoid saponin",
            "Triterpenoid alkaloid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Taxadiene"
            ],
            "mismatches": [
                "Sesquiterpene",
                "Diterpene",
                "Triterpene",
                "Tetraterpene",
                "Polyterpene",
                "Squalene",
                "Limonene",
                "Pinene",
                "Myrcene",
                "Caryophyllene",
                "Humulene",
                "Germacrene",
                "Farnesene",
                "Bisabolene",
                "Cedrene",
                "Gibberellin",
                "Carotenoid",
                "Steroid",
                "Saponin",
                "Ginsenoside",
                "Triterpene glycoside",
                "Triterpenoid saponin",
                "Triterpenoid alkaloid"
            ],
            "true_referents": [
                "(-)-casbene",
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4-hydroxycasbene",
                "8-hydroxycasbene",
                "Rhodojaponin III",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "alpha-camphorene",
                "beta-araneosene",
                "beta-pinacene",
                "casbene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "ent-gibberellane",
                "ent-kaurene",
                "gamma-camphorene",
                "gibberellane",
                "kaurane",
                "luchudiene",
                "lydicene",
                "multidienyl diphosphate(3-)",
                "neoabietadiene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pimaradiene",
                "rosane",
                "sclarene",
                "sobralene",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "taxane",
                "terpentetriene"
            ],
            "TP": 1,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Abietic acid",
            "Isopimaric acid",
            "Geranylgeraniol (GGol)",
            "Phytol",
            "Taxadiene",
            "Retinol (Vitamin A1)",
            "Steviol",
            "Forskolin",
            "Manool",
            "Pimaric acid",
            "Sclareol",
            "Ginkgolide",
            "Bilobalide",
            "Triptolide",
            "Kaurene",
            "Ent-kaurene",
            "Lycopodine",
            "Casbene",
            "Gibberellin",
            "Whitfordol",
            "Labdanes",
            "Resin acid",
            "Cafestol",
            "Cafmeylan"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ent-kaurene",
                "casbene"
            ],
            "mismatches": [
                "Abietic acid",
                "Isopimaric acid",
                "Geranylgeraniol (GGol)",
                "Phytol",
                "Taxadiene",
                "Retinol (Vitamin A1)",
                "Steviol",
                "Forskolin",
                "Manool",
                "Pimaric acid",
                "Sclareol",
                "Ginkgolide",
                "Bilobalide",
                "Triptolide",
                "Kaurene",
                "Lycopodine",
                "Gibberellin",
                "Whitfordol",
                "Labdanes",
                "Resin acid",
                "Cafestol",
                "Cafmeylan"
            ],
            "true_referents": [
                "(-)-casbene",
                "(-)-spiroviolene",
                "(R)-nephthenol",
                "(Z)-biformene",
                "4-hydroxycasbene",
                "8-hydroxycasbene",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "abietane",
                "abietatriene",
                "casbane",
                "casbene",
                "catenul-14-en-6-ol",
                "dolabella-3,7,12-triene",
                "ent-gibberellane",
                "ent-isokaurene",
                "ent-kaurane",
                "ent-kaurene",
                "ent-trachyloban-3beta-ol",
                "erinacol",
                "gibberellane",
                "grayanotoxane",
                "halimane",
                "isopimara-7,15-diene",
                "isopimara-8(14),15-diene",
                "isopimarane",
                "kanakugiol",
                "kaurane",
                "labda-7,13(16),14-triene",
                "labdane",
                "lycosantalene",
                "lydicene",
                "neophytadiene",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pimara-8(14),15-diene",
                "pimaradiene",
                "pimarane",
                "rhizathalene A",
                "sclarene",
                "serratol",
                "sobralene",
                "spiroalbatene",
                "spiroluchuene A",
                "spiroluchuene B",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "taxane"
            ],
            "TP": 2,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene",
            "Farnesol",
            "Nerolidol",
            "Caryophyllene",
            "Humulene",
            "Bisabolol",
            "Zingiberene",
            "Valencene",
            "Germacrene",
            "Guaiene",
            "Eudesmol",
            "Cadinene",
            "Patchoulol",
            "Cedrol",
            "Vetivone",
            "Khusimol",
            "Longifolene",
            "Selinene",
            "Curcumene",
            "Santalene",
            "Bergamotene",
            "Copaene",
            "Elemene",
            "Cedrene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Sesquiterpene",
                "Farnesol",
                "Nerolidol",
                "Caryophyllene",
                "Humulene",
                "Bisabolol",
                "Zingiberene",
                "Valencene",
                "Germacrene",
                "Guaiene",
                "Eudesmol",
                "Cadinene",
                "Patchoulol",
                "Cedrol",
                "Vetivone",
                "Khusimol",
                "Longifolene",
                "Selinene",
                "Curcumene",
                "Santalene",
                "Bergamotene",
                "Copaene",
                "Elemene",
                "Cedrene"
            ],
            "true_referents": [
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "8-hydroxycasbene",
                "Rhodojaponin III",
                "abieta-7,13-diene",
                "alpha-camphorene",
                "atisane",
                "beta-araneosene",
                "beta-pinacene",
                "casbene",
                "cembrane",
                "cephalot-3(20)-ene",
                "collinodiene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "ent-atiserene",
                "ent-gibberellane",
                "ent-kaurene",
                "erinacol",
                "gamma-camphorene",
                "halimane",
                "kanakugiol",
                "kaurane",
                "lydicene",
                "phytane",
                "pimaradiene",
                "rosane",
                "serratol",
                "sobralene",
                "spiroalbatene",
                "taxane",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Retinol (Vitamin A)",
            "Phytol",
            "Squalene",
            "Geranylgeraniol",
            "Abietic acid",
            "Lycopene",
            "beta-Carotene",
            "Lutein",
            "Zeaxanthin",
            "Astaxanthin",
            "Fucoxanthin",
            "Coenzyme Q10 (CoQ10)",
            "Vitamin K1 (Phylloquinone)",
            "Vitamin K2 (Menaquinone)",
            "Vitamin E (Tocopherol)",
            "Farnesol",
            "Geranylgeranyl pyrophosphate (GGPP)",
            "Phytoene",
            "Phytofluene",
            "Taxadiene",
            "Cembrene",
            "Kaurene",
            "Ferruginol",
            "Totarol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Kaurene"
            ],
            "mismatches": [
                "Retinol (Vitamin A)",
                "Phytol",
                "Squalene",
                "Geranylgeraniol",
                "Abietic acid",
                "Lycopene",
                "beta-Carotene",
                "Lutein",
                "Zeaxanthin",
                "Astaxanthin",
                "Fucoxanthin",
                "Coenzyme Q10 (CoQ10)",
                "Vitamin K1 (Phylloquinone)",
                "Vitamin K2 (Menaquinone)",
                "Vitamin E (Tocopherol)",
                "Farnesol",
                "Geranylgeranyl pyrophosphate (GGPP)",
                "Phytoene",
                "Phytofluene",
                "Taxadiene",
                "Cembrene",
                "Ferruginol",
                "Totarol"
            ],
            "true_referents": [
                "(-)-spiroviolene",
                "(R)-cembrene A",
                "(R)-nephthenol",
                "(Z)-biformene",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "8-hydroxycasbene",
                "9beta-pimara-7,15-diene",
                "Rhodojaponin III",
                "abieta-7,13-diene",
                "abieta-8(14),12-diene",
                "abietatriene",
                "beta-araneosene",
                "beta-pinacene",
                "cembrene C",
                "dolabella-3,7,12-triene",
                "ent-gibberellane",
                "ent-kaurane",
                "ent-kaurene",
                "erinacol",
                "fusicocca-2,10(14)-diene",
                "gibberellane",
                "grayanotoxane",
                "kaurane",
                "multidienyl diphosphate(3-)",
                "neophytadiene",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "rosane",
                "sclarene",
                "serratol",
                "sobralene",
                "spiroluchuene A",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "taxane",
                "terpentetriene"
            ],
            "TP": 1,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene (e.g. Humulene)",
            "Sesquiterpene (e.g. Caryophyllene)",
            "Sesquiterpene (e.g. Farnesene)",
            "Sesquiterpene (e.g. Nerolidol)",
            "Sesquiterpene (e.g. Valencene)",
            "Sesquiterpene (e.g. Guaiene)",
            "Sesquiterpene (e.g. Cadinene)",
            "Sesquiterpene (e.g. Selinene)",
            "Sesquiterpene (e.g. Zingiberene)",
            "Sesquiterpene (e.g. Bisabolene)",
            "Sesquiterpene (e.g. Curcumene)",
            "Sesquiterpene (e.g. Germacrene D)",
            "Sesquiterpene (e.g. Cedrol)",
            "Sesquiterpene (e.g. Patchoulol)",
            "Sesquiterpene (e.g. Santalene)",
            "Sesquiterpene (e.g. Bergamotene)",
            "Sesquiterpene (e.g. Cedrene)",
            "Sesquiterpene (e.g. Longifolene)",
            "Sesquiterpene (e.g. Aristolochene)",
            "Sesquiterpene (e.g. Caryophyllene oxide)",
            "Sesquiterpene (e.g. Humulene epoxide)",
            "Sesquiterpene (e.g. Chamigrene)",
            "Sesquiterpene (e.g. Ishwarane)",
            "Sesquiterpene (e.g. Silphiperfol-6-ene)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Sesquiterpene (e.g. Humulene)",
                "Sesquiterpene (e.g. Caryophyllene)",
                "Sesquiterpene (e.g. Farnesene)",
                "Sesquiterpene (e.g. Nerolidol)",
                "Sesquiterpene (e.g. Valencene)",
                "Sesquiterpene (e.g. Guaiene)",
                "Sesquiterpene (e.g. Cadinene)",
                "Sesquiterpene (e.g. Selinene)",
                "Sesquiterpene (e.g. Zingiberene)",
                "Sesquiterpene (e.g. Bisabolene)",
                "Sesquiterpene (e.g. Curcumene)",
                "Sesquiterpene (e.g. Germacrene D)",
                "Sesquiterpene (e.g. Cedrol)",
                "Sesquiterpene (e.g. Patchoulol)",
                "Sesquiterpene (e.g. Santalene)",
                "Sesquiterpene (e.g. Bergamotene)",
                "Sesquiterpene (e.g. Cedrene)",
                "Sesquiterpene (e.g. Longifolene)",
                "Sesquiterpene (e.g. Aristolochene)",
                "Sesquiterpene (e.g. Caryophyllene oxide)",
                "Sesquiterpene (e.g. Humulene epoxide)",
                "Sesquiterpene (e.g. Chamigrene)",
                "Sesquiterpene (e.g. Ishwarane)",
                "Sesquiterpene (e.g. Silphiperfol-6-ene)"
            ],
            "true_referents": [
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "3beta-hydroxy-15,16-epoxydolabrene",
                "4,5,8-trihydroxycasbene",
                "4-hydroxycasbene",
                "8-hydroxycasbene",
                "alpha-camphorene",
                "beta-araneosene",
                "beta-pinacene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "gamma-camphorene",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Squalene (Sq)",
            "Geranylgeraniol (GGol)",
            "Retinol (Ret)",
            "Ubiquinone (Ubi)",
            "Phytol (Phy)",
            "Menthol (Men)",
            "Camphor (Cam)",
            "Limonene (Lim)",
            "Farnesol (Far)",
            "Linalool (Lin)",
            "Citronellol (Cit)",
            "Nerol (Ner)",
            "Geraniol (Ger)",
            "Myrcene (Myr)",
            "Ocimene (Oci)",
            "Pinene (Pin)",
            "Sabinene (Sab)",
            "Caryophyllene (Car)",
            "Cadinene (Cad)",
            "Bisabolene (Bis)",
            "Valencene (Val)",
            "Zingiberene (Zing)",
            "Curcumene (Cur)",
            "Guaiene (Gua)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Squalene (Sq)",
                "Geranylgeraniol (GGol)",
                "Retinol (Ret)",
                "Ubiquinone (Ubi)",
                "Phytol (Phy)",
                "Menthol (Men)",
                "Camphor (Cam)",
                "Limonene (Lim)",
                "Farnesol (Far)",
                "Linalool (Lin)",
                "Citronellol (Cit)",
                "Nerol (Ner)",
                "Geraniol (Ger)",
                "Myrcene (Myr)",
                "Ocimene (Oci)",
                "Pinene (Pin)",
                "Sabinene (Sab)",
                "Caryophyllene (Car)",
                "Cadinene (Cad)",
                "Bisabolene (Bis)",
                "Valencene (Val)",
                "Zingiberene (Zing)",
                "Curcumene (Cur)",
                "Guaiene (Gua)"
            ],
            "true_referents": [
                "(-)-casbene",
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-axinyssene",
                "(R)-cembrene A",
                "(R)-nephthenol",
                "(S)-axinyssene",
                "(Z)-biformene",
                "11-oxomiltiradiene",
                "8-hydroxycasbene",
                "alpha-camphorene",
                "beta-araneosene",
                "beta-pinacene",
                "collinodiene",
                "delta-araneosene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "ent-cassa-12,15-diene",
                "ent-gibberellane",
                "ent-trachylobane",
                "gamma-camphorene",
                "gibberellane",
                "grayanotoxane",
                "isopimarane",
                "lydicene",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "rhizathalene A",
                "rosane",
                "sclarene",
                "serratol",
                "sobralene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene (Sesqui)",
            "Caryophyllene",
            "Bisabolene",
            "Farnesene",
            "Santalene",
            "Bergamotene",
            "Humulene",
            "Nootkatone",
            "Valencene",
            "Eremophila",
            "Cadinene",
            "Spathulenol",
            "Copaene",
            "Aromadendrene",
            "Germacrene",
            "Selinene",
            "Caryophyllaldehyde",
            "Farnesal",
            "Santalal",
            "Bergamotol",
            "Humulol",
            "Nootkatol",
            "Valenol",
            "Eremophilal"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Sesquiterpene (Sesqui)",
                "Caryophyllene",
                "Bisabolene",
                "Farnesene",
                "Santalene",
                "Bergamotene",
                "Humulene",
                "Nootkatone",
                "Valencene",
                "Eremophila",
                "Cadinene",
                "Spathulenol",
                "Copaene",
                "Aromadendrene",
                "Germacrene",
                "Selinene",
                "Caryophyllaldehyde",
                "Farnesal",
                "Santalal",
                "Bergamotol",
                "Humulol",
                "Nootkatol",
                "Valenol",
                "Eremophilal"
            ],
            "true_referents": [
                "(-)-casbene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "16alpha-hydroxy-ent-kaurene",
                "4,5,8-trihydroxycasbene",
                "4,8-dihydroxycasbene",
                "4-hydroxycasbene",
                "8-hydroxycasbene",
                "alpha-camphorene",
                "atisane",
                "axerophthene",
                "beta-araneosene",
                "beta-pinacene",
                "casbene",
                "collinodiene",
                "delta-araneosene",
                "dolabella-3,7,18-triene",
                "ent-atiserene",
                "ent-cassa-12,15-diene",
                "ent-kaurane",
                "ent-kaurene",
                "ent-trachyloban-3beta-ol",
                "erinacol",
                "gamma-camphorene",
                "halimane",
                "luchudiene",
                "lydicene",
                "neoabietadiene",
                "phomopsene",
                "phyllocladan-16alpha-ol",
                "phytane",
                "pimaradiene",
                "rosane",
                "serratol",
                "serrulatane",
                "sobralene",
                "spiroluchuene A",
                "taxane",
                "terpentetriene",
                "variediene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            "Sesquiterpene (SQT)",
            "Farnesene (FAR)",
            "Bisabolene (BIS)",
            "Caryophyllene (CAR)",
            "Humulene (HUM)",
            "Cadinene (CAD)",
            "Gurjunene (GUR)",
            "Patchoulene (PAT)",
            "Sesquiterpinene (SQTP)",
            "Cadinol (CADL)",
            "Eudesmol (EUDS)",
            "Guaiazulene (GUAAZ)",
            "Ledene (LEDN)",
            "Bulgarial (BULG)",
            "Cyclosativene (CYSTV)",
            "Eremophilone (EREPH)",
            "Helioporin (HELI)",
            "Isoelemol (ISOEL)",
            "Nerolidol (NERL)",
            "Oplodene (OPLD)",
            "Pacifigorgone (PACIF)",
            "Sesquithujene (SQTUJ)",
            "Thujopsene (THUJP)",
            "Valencene (VALE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Sesquiterpene (SQT)",
                "Farnesene (FAR)",
                "Bisabolene (BIS)",
                "Caryophyllene (CAR)",
                "Humulene (HUM)",
                "Cadinene (CAD)",
                "Gurjunene (GUR)",
                "Patchoulene (PAT)",
                "Sesquiterpinene (SQTP)",
                "Cadinol (CADL)",
                "Eudesmol (EUDS)",
                "Guaiazulene (GUAAZ)",
                "Ledene (LEDN)",
                "Bulgarial (BULG)",
                "Cyclosativene (CYSTV)",
                "Eremophilone (EREPH)",
                "Helioporin (HELI)",
                "Isoelemol (ISOEL)",
                "Nerolidol (NERL)",
                "Oplodene (OPLD)",
                "Pacifigorgone (PACIF)",
                "Sesquithujene (SQTUJ)",
                "Thujopsene (THUJP)",
                "Valencene (VALE)"
            ],
            "true_referents": [
                "(-)-casbene",
                "(-)-spiroviolene",
                "(1Z,2Z,6E,10E)-pinacene",
                "(R)-nephthenol",
                "(S)-axinyssene",
                "16alpha-hydroxy-ent-kaurene",
                "abieta-8(14),12-diene",
                "alpha-camphorene",
                "axerophthene",
                "beta-araneosene",
                "beta-pinacene",
                "casbene",
                "cephalot-3(20)-ene",
                "cephalot-3(4)-ene",
                "collinodiene",
                "cycloaraneosene",
                "dolabella-3,7,12-triene",
                "dolabella-3,7,18-triene",
                "ent-(12E)-labda-8(17),12,14-triene",
                "ent-atiserene",
                "ent-cassa-12,15-diene",
                "ent-isokaurene",
                "ent-kaurane",
                "ent-kaurene",
                "ent-trachyloban-3beta-ol",
                "ent-trachylobane",
                "erinacol",
                "gamma-camphorene",
                "labdane",
                "lydicene",
                "phyllocladan-16alpha-ol",
                "rosane",
                "sclarene",
                "serratol",
                "sobralene",
                "stemod-13(17)-ene",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "taxane",
                "terpentetriene",
                "tigliane",
                "tsukubadiene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "diterpene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "limonene",
                "Canonical name": "limonene"
            },
            {
                "Referent": "beta-pinene",
                "Canonical name": "beta-pinene"
            },
            {
                "Referent": "alpha-pinene",
                "Canonical name": "alpha-pinene"
            },
            {
                "Referent": "gamma-terpinene",
                "Canonical name": "gamma-terpinene"
            },
            {
                "Referent": "terpinolene",
                "Canonical name": "terpinolene"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "limonene",
                "beta-pinene",
                "alpha-pinene",
                "gamma-terpinene",
                "terpinolene"
            ],
            "true_referents": [
                "(1Z,2Z,6E,10E)-pinacene",
                "abieta-7,13-diene",
                "beta-pinacene",
                "taxa-4,11-diene",
                "taxa-420,1112-diene",
                "terpentetriene"
            ],
            "TP": 0,
            "FP": 5,
            "FN": 0
        }
    }
]