[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "alpha-pinene (\u03b1-pinene)",
            "beta-pinene (\u03b2-pinene)",
            "limonene",
            "myrcene",
            "geraniol",
            "linalool",
            "beta-caryophyllene (\u03b2-caryophyllene)",
            "alpha-humulene (\u03b1-humulene)",
            "borneol",
            "thujone",
            "camphor",
            "menthol",
            "patchouli alcohol",
            "eucalyptol (cineole)",
            "menthone",
            "citronellol",
            "citronellal",
            "nerolidol",
            "sabinene",
            "cis-ocimene",
            "trans-ocimene",
            "phellandrene",
            "alpha-terpineol (\u03b1-terpineol)",
            "beta-bisabolol (\u03b2-bisabolol)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "limonene",
                "camphor",
                "menthol",
                "eucalyptol (cineole)"
            ],
            "mismatches": [
                "alpha-pinene (\u03b1-pinene)",
                "beta-pinene (\u03b2-pinene)",
                "myrcene",
                "geraniol",
                "linalool",
                "beta-caryophyllene (\u03b2-caryophyllene)",
                "alpha-humulene (\u03b1-humulene)",
                "borneol",
                "thujone",
                "patchouli alcohol",
                "menthone",
                "citronellol",
                "citronellal",
                "nerolidol",
                "sabinene",
                "cis-ocimene",
                "trans-ocimene",
                "phellandrene",
                "alpha-terpineol (\u03b1-terpineol)",
                "beta-bisabolol (\u03b2-bisabolol)"
            ],
            "true_referents": [
                "Abietanes",
                "Acyclic Monoterpenes",
                "Artesunate",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Camphor",
                "Cannabinol",
                "Cymenes",
                "Diterpenes, Clerodane",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lupanes",
                "Menthol",
                "Monoterpenes",
                "Oleanolic Acid",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phorbols",
                "Sesquiterpenes, Eudesmane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Alpha-pinene (C10H16)",
            "Beta-pinene (C10H16)",
            "Limonene (C10H16)",
            "Myrcene (C10H16)",
            "Linalool (C10H18O)",
            "Geraniol (C10H18O)",
            "Citronellol (C10H20O)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Bisabolol (C15H26O)",
            "Farnesene (C15H24)",
            "Eucalyptol (C10H18O)",
            "Cedrol (C15H26O)",
            "Sabinene (C10H16)",
            "Camphor (C10H16O)",
            "Zingiberene (C15H24)",
            "Thujone (C10H16O)",
            "Terpineol (C10H18O)",
            "Phellandrene (C10H16)",
            "Nerolidol (C15H26O)",
            "Sclareol (C20H32O2)",
            "Ocimene (C10H16)",
            "Abietic acid (C20H30O2)",
            "Borneol (C10H18O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Eucalyptol",
                "Camphor"
            ],
            "mismatches": [
                "Alpha-pinene (C10H16)",
                "Beta-pinene (C10H16)",
                "Myrcene (C10H16)",
                "Linalool (C10H18O)",
                "Geraniol (C10H18O)",
                "Citronellol (C10H20O)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Bisabolol (C15H26O)",
                "Farnesene (C15H24)",
                "Cedrol (C15H26O)",
                "Sabinene (C10H16)",
                "Zingiberene (C15H24)",
                "Thujone (C10H16O)",
                "Terpineol (C10H18O)",
                "Phellandrene (C10H16)",
                "Nerolidol (C15H26O)",
                "Sclareol (C20H32O2)",
                "Ocimene (C10H16)",
                "Abietic acid (C20H30O2)",
                "Borneol (C10H18O)"
            ],
            "true_referents": [
                "Abietanes",
                "Acyclic Monoterpenes",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Camphor",
                "Carbenoxolone",
                "Diterpenes, Clerodane",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Oleanolic Acid",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Menthol (CAS: 89-78-1)",
            "Limonene (CAS: 5989-27-5)",
            "Pinene (CAS: 80-56-8)",
            "Linalool (CAS: 78-70-6)",
            "Geraniol (CAS: 106-24-1)",
            "Caryophyllene (CAS: 87-44-5)",
            "Myrcene (CAS: 123-35-3)",
            "Camphor (CAS: 76-22-2)",
            "Eucalyptol (CAS: 470-82-6)",
            "Thujone (CAS: 546-80-5)",
            "Farnesol (CAS: 4602-84-0)",
            "Nerolidol (CAS: 7212-44-4)",
            "Bisabolol (CAS: 515-69-5)",
            "Guaiazulene (CAS: 489-84-9)",
            "Perillaldehyde (CAS: 2111-75-3)",
            "Carvone (CAS: 99-49-0)",
            "Pulegone (CAS: 89-81-6)",
            "Cedrol (CAS: 77-53-2)",
            "Sabinene (CAS: 3346-93-0)",
            "Terpinene (CAS: 99-85-4)",
            "Isopulegol (CAS: 89-79-2)",
            "Terpineol (CAS: 98-55-5)",
            "Alpha-Terpineol (CAS: 98-59-1)",
            "Valencene (CAS: 4630-07-3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Menthol (CAS: 89-78-1)",
                "Limonene (CAS: 5989-27-5)",
                "Camphor (CAS: 76-22-2)",
                "Eucalyptol (CAS: 470-82-6)",
                "Farnesol (CAS: 4602-84-0)"
            ],
            "mismatches": [
                "Pinene (CAS: 80-56-8)",
                "Linalool (CAS: 78-70-6)",
                "Geraniol (CAS: 106-24-1)",
                "Caryophyllene (CAS: 87-44-5)",
                "Myrcene (CAS: 123-35-3)",
                "Thujone (CAS: 546-80-5)",
                "Nerolidol (CAS: 7212-44-4)",
                "Bisabolol (CAS: 515-69-5)",
                "Guaiazulene (CAS: 489-84-9)",
                "Perillaldehyde (CAS: 2111-75-3)",
                "Carvone (CAS: 99-49-0)",
                "Pulegone (CAS: 89-81-6)",
                "Cedrol (CAS: 77-53-2)",
                "Sabinene (CAS: 3346-93-0)",
                "Terpinene (CAS: 99-85-4)",
                "Isopulegol (CAS: 89-79-2)",
                "Terpineol (CAS: 98-55-5)",
                "Alpha-Terpineol (CAS: 98-59-1)",
                "Valencene (CAS: 4630-07-3)"
            ],
            "true_referents": [
                "Artesunate",
                "Beta-Cryptoxanthin",
                "Camphor",
                "Carbenoxolone",
                "Diterpenes, Clerodane",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Paclitaxel",
                "Phytol",
                "Retinaldehyde",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Thymol",
                "Tretinoin",
                "Ursolic Acid",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Camphene (C10H16)",
            "\u03b2-Caryophyllene (C15H24)",
            "\u03b1-Humulene (C15H24)",
            "Geraniol (C10H18O)",
            "Nerol (C10H18O)",
            "Citronellol (C10H20O)",
            "Menthol (C10H20O)",
            "Camphor (C10H16O)",
            "Eucalyptol (1,8-Cineole) (C10H18O)",
            "Thujone (C10H16O)",
            "Farnesol (C15H26O)",
            "Nerolidol (C15H26O)",
            "Absinthol (C15H24O)",
            "Squalene (C30H50)",
            "Phytol (C20H30O)",
            "Vitamin A (Retinol) (C20H30O)",
            "Ubiquinone (Coenzyme Q10) (C59H90O13)",
            "Paclitaxel (Taxol) (C47H51NO14)",
            "Artemisinin (C15H18O5)",
            "Gingerol (C17H26O3)",
            "Capsaicin (C18H27NO3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "Menthol (C10H20O)",
                "Camphor (C10H16O)",
                "Eucalyptol (1,8-Cineole) (C10H18O)",
                "Farnesol (C15H26O)",
                "Squalene (C30H50)",
                "Phytol (C20H30O)",
                "Vitamin A (Retinol) (C20H30O)",
                "Paclitaxel (Taxol) (C47H51NO14)"
            ],
            "mismatches": [
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Camphene (C10H16)",
                "\u03b2-Caryophyllene (C15H24)",
                "\u03b1-Humulene (C15H24)",
                "Geraniol (C10H18O)",
                "Nerol (C10H18O)",
                "Citronellol (C10H20O)",
                "Thujone (C10H16O)",
                "Nerolidol (C15H26O)",
                "Absinthol (C15H24O)",
                "Ubiquinone (Coenzyme Q10) (C59H90O13)",
                "Artemisinin (C15H18O5)",
                "Gingerol (C17H26O3)",
                "Capsaicin (C18H27NO3)"
            ],
            "true_referents": [
                "Albumin-Bound Paclitaxel",
                "Artemether, Lumefantrine Drug Combination",
                "Artemisinins",
                "Artesunate",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Camphanes",
                "Camphor",
                "Diterpenes, Clerodane",
                "Docetaxel",
                "Eucalyptol",
                "Farnesol",
                "Ginsenosides",
                "Glycyrrhizic Acid",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbols",
                "Phytanic Acid",
                "Phytol",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Thymol",
                "Vitamin A",
                "beta Carotene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Myrcene (C10H16)",
            "Pinene (C10H16)",
            "Camphor (C10H16O)",
            "Menthol (C10H20O)",
            "Carvone (C10H14O)",
            "Thujone (C10H16O)",
            "Geraniol (C10H18O)",
            "Citronellol (C10H20O)",
            "Farnesol (C15H26O)",
            "Bisabolol (C15H26O)",
            "Squalene (C30H50)",
            "Humulene (C15H24)",
            "Nerolidol (C15H26O)",
            "Caryophyllene (C15H24)",
            "Linalool (C10H18O)",
            "Terpinolene (C10H16)",
            "Borneol (C10H18O)",
            "Eucalyptol (C10H18O)",
            "Alpha-Phellandrene (C10H16)",
            "Beta-Caryophyllene (C15H24)",
            "Beta-Pinene (C10H16)",
            "Sabinene (C10H16)",
            "Valencene (C15H24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Camphor",
                "Menthol",
                "Farnesol",
                "Squalene",
                "Eucalyptol"
            ],
            "mismatches": [
                "Myrcene",
                "Pinene",
                "Carvone",
                "Thujone",
                "Geraniol",
                "Citronellol",
                "Bisabolol",
                "Humulene",
                "Nerolidol",
                "Caryophyllene",
                "Linalool",
                "Terpinolene",
                "Borneol",
                "Alpha-Phellandrene",
                "Beta-Caryophyllene",
                "Beta-Pinene",
                "Sabinene",
                "Valencene"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphor",
                "Carbenoxolone",
                "Diterpenes, Clerodane",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Retinaldehyde",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Linalool (C10H18O)",
            "Geraniol (C10H18O)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Terpinene (C10H16)",
            "Farnesene (C15H24)",
            "Camphor (C10H16O)",
            "Menthol (C10H20O)",
            "Thujone (C10H14O)",
            "Borneol (C10H18O)",
            "Sabinene (C10H16)",
            "Valencene (C15H24)",
            "Nerolidol (C15H26O)",
            "Squalene (C30H50)",
            "Beta-Carophyllene (C15H24)",
            "D-limonene (C10H16)",
            "Alpha-Bisabolol (C15H26O)",
            "Bicyclogermacrene (C15H24)",
            "Cymene (C10H14)",
            "Phellandrene (C10H16)",
            "Isoprene (C5H8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "Camphor (C10H16O)",
                "Menthol (C10H20O)",
                "Squalene (C30H50)",
                "Cymene (C10H14)"
            ],
            "mismatches": [
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Linalool (C10H18O)",
                "Geraniol (C10H18O)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Terpinene (C10H16)",
                "Farnesene (C15H24)",
                "Thujone (C10H14O)",
                "Borneol (C10H18O)",
                "Sabinene (C10H16)",
                "Valencene (C15H24)",
                "Nerolidol (C15H26O)",
                "Beta-Carophyllene (C15H24)",
                "D-limonene (C10H16)",
                "Alpha-Bisabolol (C15H26O)",
                "Bicyclogermacrene (C15H24)",
                "Phellandrene (C10H16)",
                "Isoprene (C5H8)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphor",
                "Cymenes",
                "Diterpenes, Clerodane",
                "Eucalyptol",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Retinaldehyde",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Myrcene (C10H16)",
            "Pinene (C10H16)",
            "Linalool (C10H18O)",
            "Geraniol (C10H18O)",
            "Menthol (C10H20O)",
            "Terpinen-4-ol (C10H18O)",
            "Camphene (C10H16)",
            "Carene (C10H16)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Farnesene (C15H24)",
            "Bisabolene (C15H24)",
            "Sesquiphellandrene (C15H24)",
            "Sabinene (C10H16)",
            "Terpinolene (C10H16)",
            "Phellandrene (C10H16)",
            "Citronellol (C10H20O)",
            "Nerol (C10H18O)",
            "Fenchol (C10H18O)",
            "Borneol (C10H18O)",
            "Eucalyptol (C10H18O)",
            "Thujone (C10H16O)",
            "Pulegone (C10H16O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Eucalyptol"
            ],
            "mismatches": [
                "Myrcene",
                "Pinene",
                "Linalool",
                "Geraniol",
                "Terpinen-4-ol",
                "Camphene",
                "Carene",
                "Caryophyllene",
                "Humulene",
                "Farnesene",
                "Bisabolene",
                "Sesquiphellandrene",
                "Sabinene",
                "Terpinolene",
                "Phellandrene",
                "Citronellol",
                "Nerol",
                "Fenchol",
                "Borneol",
                "Thujone",
                "Pulegone"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphanes",
                "Camphor",
                "Diterpenes, Clerodane",
                "Dolichols",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Hemiterpenes",
                "Limonene",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Polycyclic Sesquiterpenes",
                "Sesquiterpenes",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene",
            "Myrcene",
            "Pinene",
            "Linalool",
            "Geraniol",
            "Caryophyllene",
            "Humulene",
            "Terpinene",
            "Terpinolene",
            "Borneol",
            "Camphor",
            "Menthol",
            "Thymol",
            "Eucalyptol",
            "Citronellal",
            "Citral",
            "Nerolidol",
            "Valencene",
            "Farnesene",
            "Sabinene",
            "Bisabolol",
            "Cedrene",
            "Phellandrene",
            "Caryophyllene oxide"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Camphor",
                "Menthol",
                "Thymol",
                "Eucalyptol"
            ],
            "mismatches": [
                "Myrcene",
                "Pinene",
                "Linalool",
                "Geraniol",
                "Caryophyllene",
                "Humulene",
                "Terpinene",
                "Terpinolene",
                "Borneol",
                "Citronellal",
                "Citral",
                "Nerolidol",
                "Valencene",
                "Farnesene",
                "Sabinene",
                "Bisabolol",
                "Cedrene",
                "Phellandrene",
                "Caryophyllene oxide"
            ],
            "true_referents": [
                "Artesunate",
                "Camphanes",
                "Camphor",
                "Cannabidiol",
                "Cannabinol",
                "Carbenoxolone",
                "Diterpenes, Clerodane",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Hemiterpenes",
                "Limonene",
                "Limonins",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbols",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "Tretinoin",
                "Triterpenes",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Myrcene",
            "Limonene",
            "Alpha-Pinene (\u03b1-Pinene)",
            "Beta-Pinene (\u03b2-Pinene)",
            "Caryophyllene",
            "Linalool",
            "Humulene",
            "Ocimene",
            "Terpineol",
            "Geraniol",
            "Farnesene",
            "Bisabolol",
            "Camphene",
            "Sabinene",
            "Nerol",
            "Terpinolene",
            "Valencene",
            "Citral",
            "Eucalyptol (1,8-Cineole)",
            "Menthol",
            "Thymol",
            "Carene",
            "Isoprene",
            "Nerolidol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Thymol",
                "Eucalyptol (1,8-Cineole)"
            ],
            "mismatches": [
                "Myrcene",
                "Alpha-Pinene (\u03b1-Pinene)",
                "Beta-Pinene (\u03b2-Pinene)",
                "Caryophyllene",
                "Linalool",
                "Humulene",
                "Ocimene",
                "Terpineol",
                "Geraniol",
                "Farnesene",
                "Bisabolol",
                "Camphene",
                "Sabinene",
                "Nerol",
                "Terpinolene",
                "Valencene",
                "Citral",
                "Carene",
                "Isoprene",
                "Nerolidol"
            ],
            "true_referents": [
                "Acyclic Monoterpenes",
                "Artesunate",
                "Camphanes",
                "Camphor",
                "Cannabidiol",
                "Carbenoxolone",
                "Cymenes",
                "Eucalyptol",
                "Farnesol",
                "Hemiterpenes",
                "Limonene",
                "Limonins",
                "Lupanes",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Polyprenols",
                "Sesquiterpenes, Eudesmane",
                "Squalene",
                "Thymol",
                "Tretinoin",
                "Triterpenes",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "\u03b1-Pinene (C10H16)",
            "\u03b2-Caryophyllene (C15H24)",
            "Limonene (C10H16)",
            "Myrcene (C10H16)",
            "Linalool (C10H18O)",
            "Geraniol (C10H18O)",
            "Menthol (C10H20O)",
            "Camphor (C10H16O)",
            "Eucalyptol (1,8-Cineole) (C10H18O)",
            "\u03b2-Pinene (C10H16)",
            "Terpinolene (C10H16)",
            "Camphene (C10H16)",
            "\u03b1-Humulene (C15H24)",
            "\u03b2-Ocimene (C10H16)",
            "Citral (C10H16O)",
            "Nerolidol (C15H26O)",
            "Farnesene (C15H24)",
            "Bisabolol (C15H26O)",
            "Carvone (C10H14O)",
            "Borneol (C10H18O)",
            "Valencene (C15H24)",
            "Sabinene (C10H16)",
            "\u03b3-Terpinene (C10H16)",
            "\u03b1-Terpineol (C10H18O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Camphor",
                "Eucalyptol"
            ],
            "mismatches": [
                "\u03b1-Pinene (C10H16)",
                "\u03b2-Caryophyllene (C15H24)",
                "Myrcene (C10H16)",
                "Linalool (C10H18O)",
                "Geraniol (C10H18O)",
                "\u03b2-Pinene (C10H16)",
                "Terpinolene (C10H16)",
                "Camphene (C10H16)",
                "\u03b1-Humulene (C15H24)",
                "\u03b2-Ocimene (C10H16)",
                "Citral (C10H16O)",
                "Nerolidol (C15H26O)",
                "Farnesene (C15H24)",
                "Bisabolol (C15H26O)",
                "Carvone (C10H14O)",
                "Borneol (C10H18O)",
                "Valencene (C15H24)",
                "Sabinene (C10H16)",
                "\u03b3-Terpinene (C10H16)",
                "\u03b1-Terpineol (C10H18O)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphanes",
                "Camphor",
                "Carbenoxolone",
                "Eucalyptol",
                "Farnesol",
                "Hemiterpenes",
                "Limonene",
                "Menthol",
                "Monoterpenes",
                "Phorbol 12,13-Dibutyrate",
                "Retinaldehyde",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Myrcene",
            "Limonene",
            "Linalool",
            "Pinene (\u03b1-Pinene, \u03b2-Pinene)",
            "Caryophyllene (\u03b2-Caryophyllene)",
            "Humulene (\u03b1-Humulene)",
            "Ocimene (\u03b1-Ocimene, \u03b2-Ocimene)",
            "Terpinene (\u03b1-Terpinene, \u03b3-Terpinene)",
            "Terpineol (\u03b1-Terpineol)",
            "Geraniol",
            "Camphene",
            "Sabinene",
            "Phellandrene (\u03b1-Phellandrene, \u03b2-Phellandrene)",
            "Terpinolene",
            "Valencene",
            "Nerolidol",
            "Bisabolol (\u03b1-Bisabolol)",
            "Farnesene (\u03b1-Farnesene, \u03b2-Farnesene)",
            "Cedrene (\u03b1-Cedrene)",
            "Gurjunene",
            "Thujene (\u03b1-Thujene)",
            "Camphor",
            "Borneol",
            "Menthol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Camphor",
                "Menthol"
            ],
            "mismatches": [
                "Myrcene",
                "Linalool",
                "Pinene (\u03b1-Pinene, \u03b2-Pinene)",
                "Caryophyllene (\u03b2-Caryophyllene)",
                "Humulene (\u03b1-Humulene)",
                "Ocimene (\u03b1-Ocimene, \u03b2-Ocimene)",
                "Terpinene (\u03b1-Terpinene, \u03b3-Terpinene)",
                "Terpineol (\u03b1-Terpineol)",
                "Geraniol",
                "Camphene",
                "Sabinene",
                "Phellandrene (\u03b1-Phellandrene, \u03b2-Phellandrene)",
                "Terpinolene",
                "Valencene",
                "Nerolidol",
                "Bisabolol (\u03b1-Bisabolol)",
                "Farnesene (\u03b1-Farnesene, \u03b2-Farnesene)",
                "Cedrene (\u03b1-Cedrene)",
                "Gurjunene",
                "Thujene (\u03b1-Thujene)",
                "Borneol"
            ],
            "true_referents": [
                "Artesunate",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Camphanes",
                "Camphor",
                "Carbenoxolone",
                "Cymenes",
                "Diterpenes, Clerodane",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Glaucarubin",
                "Hemiterpenes",
                "Limonene",
                "Limonins",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Thymol",
                "Tretinoin",
                "Triterpenes",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Menthol (C10H20O)",
            "Camphor (C10H16O)",
            "Borneol (C10H18O)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Geraniol (C10H18O)",
            "Linalool (C10H18O)",
            "Citronellol (C10H20O)",
            "Farnesol (C15H26O)",
            "Squalene (C30H50)",
            "Phytol (C20H40O)",
            "Retinol (C20H30O)",
            "Carotene (C40H56)",
            "Lycopene (C40H56)",
            "Cannabinoids (C21H30O2)",
            "Gingerol (C17H26O4)",
            "Zingiberene (C15H24)",
            "Cucurbitacin (C30H44O8)",
            "Artemisinin (C15H22O5)",
            "Taxol (C47H51NO14)",
            "Ginkgolide (C20H24O9)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Camphor",
                "Farnesol",
                "Squalene",
                "Phytol",
                "Lycopene",
                "Cannabinoids"
            ],
            "mismatches": [
                "Pinene",
                "Myrcene",
                "Borneol",
                "Caryophyllene",
                "Humulene",
                "Geraniol",
                "Linalool",
                "Citronellol",
                "Retinol",
                "Carotene",
                "Gingerol",
                "Zingiberene",
                "Cucurbitacin",
                "Artemisinin",
                "Taxol",
                "Ginkgolide"
            ],
            "true_referents": [
                "Albumin-Bound Paclitaxel",
                "Artemether, Lumefantrine Drug Combination",
                "Artemisinins",
                "Artesunate",
                "Beta-Cryptoxanthin",
                "Camphor",
                "Cannabinoids",
                "Cannabinol",
                "Carotenoids",
                "Cucurbitacins",
                "Diterpenes, Clerodane",
                "Docetaxel",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Ginkgolides",
                "Ginsenosides",
                "Glaucarubin",
                "Hemiterpenes",
                "Iridoid Glycosides",
                "Lanosterol",
                "Limonene",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phytanic Acid",
                "Phytol",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Thymol",
                "Vitamin A",
                "beta Carotene"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Limonene (LIM)",
            "Myrcene (MYR)",
            "Pinene (PIN)",
            "Linalool (LIN)",
            "Camphene (CAM)",
            "Geraniol (GER)",
            "Citronellol (CIT)",
            "Carvone (CAR)",
            "Menthol (MEN)",
            "Borneol (BOR)",
            "Thymol (THY)",
            "Caryophyllene (CAY)",
            "Farnesene (FAR)",
            "Squalene (SQU)",
            "Phytol (PHY)",
            "Retinol (RET)",
            "Tocopherol (TOC)",
            "Carotenoids (CAR)",
            "Gibberellins (GIB)",
            "Abscisic acid (ABA)",
            "Artemisinin (ART)",
            "Paclitaxel (PAC)",
            "Menthone (MEN)",
            "Camphor (CAM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Thymol",
                "Squalene",
                "Phytol",
                "Carotenoids",
                "Gibberellins",
                "Abscisic acid",
                "Paclitaxel",
                "Camphor"
            ],
            "mismatches": [
                "Myrcene",
                "Pinene",
                "Linalool",
                "Camphene",
                "Geraniol",
                "Citronellol",
                "Carvone",
                "Borneol",
                "Caryophyllene",
                "Farnesene",
                "Retinol",
                "Tocopherol",
                "Menthone"
            ],
            "true_referents": [
                "Abscisic Acid",
                "Albumin-Bound Paclitaxel",
                "Artemether",
                "Artemisinins",
                "Artesunate",
                "Betulinic Acid",
                "Camphanes",
                "Camphor",
                "Carotenoids",
                "Cymenes",
                "Docetaxel",
                "Eucalyptol",
                "Farnesol",
                "Gefarnate",
                "Gibberellins",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbols",
                "Phytanic Acid",
                "Phytol",
                "Sesquiterpenes, Germacrane",
                "Squalene",
                "Thymol",
                "Vitamin A",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 10,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": "[\n\"Pinene (\u03b1-Pinene)\" ,\n\"Caryophyllene (\u03b2-Caryophyllene)\" ,\n\"Limonene\" ,\n\"\u03b2-Myrcene\" ,\n\"\u03b1-Terpineol\" ,\n\"Linalool\" ,\n\"Geraniol\" ,\n\"\u03b2-Pinene\" ,\n\"Sabinene\" ,\n\"\u03b1-Humulene\" ,\n\"\u03b2-Humulene\" ,\n\"\u03943-Carene\" ,\n\"\u03b2-Copaene\" ,\n\"\u03b1-Cedrene\" ,\n\"\u03b2-Cedrene\" ,\n\"\u03b2-Elemene\" ,\n\"\u03b2-Selinene\" ,\n\"\u03b2-Caryophyllene oxide\" ,\n\"\u03b1-Terpineol oxide\" ,\n\"Perillyl alcohol\" ,\n\"Bornyl acetate\" ,\n\"Geranyl acetate\" ,\n\"Linalyl acetate\" ,\n\"\u03b2-Myrcenol\" ,\n\"\u03b2-Caryophyllen-9-ol\"",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Geraniol (GER)",
            "Limonene (LIM)",
            "Pinene (PIN)",
            "Linalool (LNL)",
            "Caryophyllene (CYP)",
            "Myrcene (MYR)",
            "Humulene (HUM)",
            "Bisabolol (BIS)",
            "Farnesol (FAR)",
            "Sesquiterpene (SES)",
            "Camphor (CAM)",
            "Borneol (BOR)",
            "Terpineol (TER)",
            "Citronellol (CIT)",
            "R-limonene (RLIM)",
            "Sabinene (SAB)",
            "Delta-3-carene (D3C)",
            "Alpha-pinene (APIN)",
            "Beta-pinene (BPIN)",
            "Gamma-terpinene (GTER)",
            "Alpha-terpinene (ATER)",
            "Beta-terpinene (BTER)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Farnesol",
                "Camphor"
            ],
            "mismatches": [
                "Geraniol (GER)",
                "Pinene (PIN)",
                "Linalool (LNL)",
                "Caryophyllene (CYP)",
                "Myrcene (MYR)",
                "Humulene (HUM)",
                "Bisabolol (BIS)",
                "Sesquiterpene (SES)",
                "Borneol (BOR)",
                "Terpineol (TER)",
                "Citronellol (CIT)",
                "R-limonene (RLIM)",
                "Sabinene (SAB)",
                "Delta-3-carene (D3C)",
                "Alpha-pinene (APIN)",
                "Beta-pinene (BPIN)",
                "Gamma-terpinene (GTER)",
                "Alpha-terpinene (ATER)",
                "Beta-terpinene (BTER)"
            ],
            "true_referents": [
                "Acyclic Monoterpenes",
                "Artesunate",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphanes",
                "Camphor",
                "Cannabinol",
                "Carbenoxolone",
                "Diterpenes, Kaurane",
                "Eucalyptol",
                "Farnesol",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Sesquiterpenes",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Sesterterpenes",
                "Squalene",
                "Thymol",
                "Triterpenes",
                "beta Carotene",
                "zeta Carotene"
            ],
            "TP": 3,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Limonene",
                "Canonical Name": "Limonene"
            },
            {
                "Referent": "Pinene",
                "Canonical Name": "Pinene"
            },
            {
                "Referent": "Myrcene",
                "Canonical Name": "Myrcene"
            },
            {
                "Referent": "Ocimene",
                "Canonical Name": "Ocimene"
            },
            {
                "Referent": "Terpinolene",
                "Canonical Name": "Terpinolene"
            },
            {
                "Referent": "Borneol",
                "Canonical Name": "Borneol"
            },
            {
                "Referent": "Camphene",
                "Canonical Name": "Camphene"
            },
            {
                "Referent": "Sabinene",
                "Canonical Name": "Sabinene"
            },
            {
                "Referent": "Delta-3-carene",
                "Canonical Name": "Delta-3-carene"
            },
            {
                "Referent": "Phellandrene",
                "Canonical Name": "Phellandrene"
            },
            {
                "Referent": "Terpinene",
                "Canonical Name": "Terpinene"
            },
            {
                "Referent": "Linalool",
                "Canonical Name": "Linalool"
            },
            {
                "Referent": "Geraniol",
                "Canonical Name": "Geraniol"
            },
            {
                "Referent": "Nerolidol",
                "Canonical Name": "Nerolidol"
            },
            {
                "Referent": "Citronellol",
                "Canonical Name": "Citronellol"
            },
            {
                "Referent": "Geranyl acetate",
                "Canonical Name": "Geranyl acetate"
            },
            {
                "Referent": "Neryl acetate",
                "Canonical Name": "Neryl acetate"
            },
            {
                "Referent": "Geranyl formate",
                "Canonical Name": "Geranyl formate"
            },
            {
                "Referent": "Neryl formate",
                "Canonical Name": "Neryl formate"
            },
            {
                "Referent": "Geranyl propionate",
                "Canonical Name": "Geranyl propionate"
            },
            {
                "Referent": "Neryl propionate",
                "Canonical Name": "Neryl propionate"
            },
            {
                "Referent": "Geranyl butyrate",
                "Canonical Name": "Geranyl butyrate"
            },
            {
                "Referent": "Neryl butyrate",
                "Canonical Name": "Neryl butyrate"
            },
            {
                "Referent": "Geranyl isovalerate",
                "Canonical Name": "Geranyl isovalerate"
            },
            {
                "Referent": "Neryl isovalerate",
                "Canonical Name": "Neryl isovalerate"
            },
            {
                "Referent": "Geranyl hexanoate",
                "Canonical Name": "Geranyl hexanoate"
            },
            {
                "Referent": "Neryl hexanoate",
                "Canonical Name": "Neryl hexanoate"
            },
            {
                "Referent": "Geranyl octanoate",
                "Canonical Name": "Geranyl octanoate"
            },
            {
                "Referent": "Neryl octanoate",
                "Canonical Name": "Neryl octanoate"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene"
            ],
            "mismatches": [
                "Pinene",
                "Myrcene",
                "Ocimene",
                "Terpinolene",
                "Borneol",
                "Camphene",
                "Sabinene",
                "Delta-3-carene",
                "Phellandrene",
                "Terpinene",
                "Linalool",
                "Geraniol",
                "Nerolidol",
                "Citronellol",
                "Geranyl acetate",
                "Neryl acetate",
                "Geranyl formate",
                "Neryl formate",
                "Geranyl propionate",
                "Neryl propionate",
                "Geranyl butyrate",
                "Neryl butyrate",
                "Geranyl isovalerate",
                "Neryl isovalerate",
                "Geranyl hexanoate",
                "Neryl hexanoate",
                "Geranyl octanoate",
                "Neryl octanoate"
            ],
            "true_referents": [
                "Abietanes",
                "Betulinic Acid",
                "Camphanes",
                "Camphor",
                "Cymenes",
                "Diterpenes, Clerodane",
                "Diterpenes, Kaurane",
                "Eucalyptol",
                "Glycyrrhizic Acid",
                "Iridoids",
                "Limonene",
                "Lupanes",
                "Monoterpenes",
                "Oleanolic Acid",
                "Phorbol 12,13-Dibutyrate",
                "Phytanic Acid",
                "Sesquiterpenes",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Trichothecenes, Type B",
                "Trichothecenes, Type C",
                "Triterpenes",
                "beta Carotene"
            ],
            "TP": 1,
            "FP": 28,
            "FN": 0
        }
    }
]