[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (Glyceraldehyde)",
            "Ribose (Rbo)",
            "Deoxyribose (dRbo)",
            "Rhamnose (Rha)",
            "Ribitol (Ribitol)",
            "Xylitol (Xylitol)",
            "Pentose Phosphate (Pentose-P)",
            "Pentose Sugar (Pentose)",
            "Riboflavin (Riboflavin)",
            "Ribonucleotide (RiboNucleotide)",
            "Deoxyribonucleotide (dRiboNucleotide)",
            "Ribose-5-phosphate (R5P)",
            "D-Ribulose (Ribu)",
            "D-Xylose (Xyl)",
            "D-Arabinose (Ara)",
            "D-Ribose-5-phosphate (R5P)",
            "D-Xylulose-5-phosphate (X5P)",
            "D-Ribulose-5-phosphate (Ru5P)",
            "D-Erythrose-4-phosphate (E4P)",
            "D-Xylulose (Xylu)",
            "D-Ribulose (Ribu)",
            "D-Erythrose (Ery)",
            "D-Xylitol (Xylitol)",
            "D-Arabitol (Araitol)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Glyceraldehyde (Glyceraldehyde)",
                "Ribose (Rbo)",
                "Deoxyribose (dRbo)",
                "Rhamnose (Rha)",
                "Ribitol (Ribitol)",
                "Xylitol (Xylitol)",
                "Pentose Phosphate (Pentose-P)",
                "Pentose Sugar (Pentose)",
                "Riboflavin (Riboflavin)",
                "Ribonucleotide (RiboNucleotide)",
                "Deoxyribonucleotide (dRiboNucleotide)",
                "Ribose-5-phosphate (R5P)",
                "D-Ribulose (Ribu)",
                "D-Xylose (Xyl)",
                "D-Arabinose (Ara)",
                "D-Ribose-5-phosphate (R5P)",
                "D-Xylulose-5-phosphate (X5P)",
                "D-Ribulose-5-phosphate (Ru5P)",
                "D-Erythrose-4-phosphate (E4P)",
                "D-Xylulose (Xylu)",
                "D-Ribulose (Ribu)",
                "D-Erythrose (Ery)",
                "D-Xylitol (Xylitol)",
                "D-Arabitol (Araitol)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Docetaxel",
                "Dolichol Monophosphate Mannose",
                "Dolichol Phosphates",
                "Dolichols",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Lycopene",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Oligosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Polyprenols",
                "Retinaldehyde",
                "Vitamin A",
                "beta Carotene"
            ],
            "TP": 0,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Dimethyl fumarate (C6H8O4)",
            "Geranyl pyrophosphate (C10H20O7P2)",
            "Farnesyl pyrophosphate (C15H28O7P2)",
            "Squalene (C30H50)",
            "Branched isoprenoids",
            "Phytol (C20H40O)",
            "Limonene (C10H16)",
            "Phytane (C20H42)",
            "Prenol (C5H12O)",
            "Geraniol (C10H18O)",
            "Myrcene (C10H16)",
            "Alpha-pinene (C10H16)",
            "Beta-pinene (C10H16)",
            "Eugenol (C10H12O2)",
            "Linalool (C10H18O)",
            "Citronellol (C10H20O)",
            "Camphene (C10H16)",
            "Gamma-terpinene (C10H16)",
            "Neryl pyrophosphate (C10H20O7P2)",
            "Germacrene D (C15H24)",
            "Zingiberene (C15H24)",
            "Beta-sitosterol (C29H50O)",
            "Farnesol (C15H26O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Squalene",
                "Phytol",
                "Limonene",
                "Farnesol"
            ],
            "mismatches": [
                "Isoprene (C5H8)",
                "Dimethyl fumarate (C6H8O4)",
                "Geranyl pyrophosphate (C10H20O7P2)",
                "Farnesyl pyrophosphate (C15H28O7P2)",
                "Branched isoprenoids",
                "Phytane (C20H42)",
                "Prenol (C5H12O)",
                "Geraniol (C10H18O)",
                "Myrcene (C10H16)",
                "Alpha-pinene (C10H16)",
                "Beta-pinene (C10H16)",
                "Eugenol (C10H12O2)",
                "Linalool (C10H18O)",
                "Citronellol (C10H20O)",
                "Camphene (C10H16)",
                "Gamma-terpinene (C10H16)",
                "Neryl pyrophosphate (C10H20O7P2)",
                "Germacrene D (C15H24)",
                "Zingiberene (C15H24)",
                "Beta-sitosterol (C29H50O)"
            ],
            "true_referents": [
                "Acyclic Monoterpenes",
                "Beta-Cryptoxanthin",
                "Camphanes",
                "Camphor",
                "Diterpenes",
                "Diterpenes, Clerodane",
                "Dolichol Monophosphate Mannose",
                "Farnesol",
                "Gefarnate",
                "Lanosterol",
                "Limonene",
                "Menthol",
                "Monoterpenes",
                "Norisoprenoids",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Oleanolic Acid",
                "Phorbol 12,13-Dibutyrate",
                "Phytanic Acid",
                "Phytol",
                "Polyisoprenyl Phosphates",
                "Polyprenols",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (2-methyl-1,3-butadiene)",
            "Geraniol",
            "Linalool",
            "Limonene",
            "Myrcene",
            "Ocimene",
            "Beta-Caryophyllene",
            "Farnesol",
            "Nerolidol",
            "Squalene",
            "Phytol",
            "Retinol (Vitamin A)",
            "Beta-Carotene",
            "Lycopene",
            "Vitamin K1 (Phylloquinone)",
            "Vitamin E (Tocopherol)",
            "Abscisic Acid",
            "Gibberellin",
            "Brassinosteroid",
            "Jasmonic Acid",
            "Salicylic Acid",
            "Strigolactone",
            "Zeatin",
            "Indole-3-Acetic Acid (IAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Farnesol",
                "Squalene",
                "Phytol",
                "Beta-Carotene",
                "Lycopene",
                "Abscisic Acid"
            ],
            "mismatches": [
                "Isoprene (2-methyl-1,3-butadiene)",
                "Geraniol",
                "Linalool",
                "Myrcene",
                "Ocimene",
                "Beta-Caryophyllene",
                "Nerolidol",
                "Retinol (Vitamin A)",
                "Vitamin K1 (Phylloquinone)",
                "Vitamin E (Tocopherol)",
                "Gibberellin",
                "Brassinosteroid",
                "Jasmonic Acid",
                "Salicylic Acid",
                "Strigolactone",
                "Zeatin",
                "Indole-3-Acetic Acid (IAA)"
            ],
            "true_referents": [
                "Abscisic Acid",
                "Beta-Cryptoxanthin",
                "Camphor",
                "Carotenoids",
                "Cymenes",
                "Escin",
                "Farnesol",
                "Gibberellins",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lupanes",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Oleanolic Acid",
                "Phorbols",
                "Phytanic Acid",
                "Phytol",
                "Santonin",
                "Sapogenins",
                "Sesquiterpenes, Eudesmane",
                "Squalene",
                "Thymol",
                "Tretinoin",
                "Ursolic Acid",
                "Vitamin A",
                "Vitamin K",
                "Vitamin K 1",
                "Vitamin K 3",
                "beta Carotene"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Phytol (Phyt)",
            "Geranylgeraniol (GGOH)",
            "Farnesol (FOH)",
            "Nerolidol (Ner)",
            "Linalool (Lin)",
            "Citronellol (Cit)",
            "Geraniol (Ger)",
            "Farnesyl pyrophosphate (FPP)",
            "Geranyl pyrophosphate (GPP)",
            "Isopentenyl pyrophosphate (IPP)",
            "Dimethylallyl pyrophosphate (DMAPP)",
            "Squalene (Sq)",
            "Dolichol (Dol)",
            "Ubiquinone (CoQ10)",
            "Plastoquinone (PQ)",
            "Vitamin K1 (Phylloquinone)",
            "Vitamin E (Tocopherol)",
            "Carotene (Car)",
            "Lycopene (Lyc)",
            "Beta-carotene (\u03b2-Car)",
            "Lutein (Lut)",
            "Zeaxanthin (Zea)",
            "Astaxanthin (Ast)",
            "Canthaxanthin (Can)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phytol",
                "Farnesol",
                "Squalene",
                "Dolichol",
                "Vitamin K 1",
                "Lycopene",
                "beta Carotene",
                "Lutein",
                "Zeaxanthins",
                "Canthaxanthin"
            ],
            "mismatches": [
                "Geranylgeraniol (GGOH)",
                "Nerolidol (Ner)",
                "Linalool (Lin)",
                "Citronellol (Cit)",
                "Geraniol (Ger)",
                "Farnesyl pyrophosphate (FPP)",
                "Geranyl pyrophosphate (GPP)",
                "Isopentenyl pyrophosphate (IPP)",
                "Dimethylallyl pyrophosphate (DMAPP)",
                "Ubiquinone (CoQ10)",
                "Plastoquinone (PQ)",
                "Vitamin E (Tocopherol)",
                "Carotene (Car)",
                "Astaxanthin (Ast)"
            ],
            "true_referents": [
                "Aconitine",
                "Beta-Cryptoxanthin",
                "Canthaxanthin",
                "Carotenoids",
                "Dolichol Monophosphate Mannose",
                "Dolichol Phosphates",
                "Dolichols",
                "Eucalyptol",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Limonene",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Phorbol 12,13-Dibutyrate",
                "Phorbols",
                "Phytanic Acid",
                "Phytol",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Oligosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Sesquiterpenes, Germacrane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "Tretinoin",
                "Vitamin A",
                "Vitamin K",
                "Vitamin K 1",
                "Vitamin K 3",
                "Zeaxanthins",
                "beta Carotene"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Geraniol (C10H18O)",
            "Limonene (C10H16)",
            "Menthol (C10H20O)",
            "Myrcene (C10H16)",
            "Farnesene (C15H24)",
            "Farnesol (C15H26O)",
            "Squalene (C30H50)",
            "Beta-Caryophyllene (C15H24)",
            "Linalool (C10H18O)",
            "Nerolidol (C15H26O)",
            "Bisabolene (C15H24)",
            "Humulene (C15H24)",
            "Phytol (C20H40O)",
            "Carotene (C40H56)",
            "Retinol (C20H30O)",
            "Abietic Acid (C20H30O2)",
            "Taxadiene (C20H32)",
            "Gibberellin (C19H22O6)",
            "Atractylone (C15H24O)",
            "Cedrene (C15H24)",
            "Santolina triene (C15H24)",
            "Artemisia ketone (C10H16O)",
            "Citral (C10H16O)",
            "Camphene (C10H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Farnesol",
                "Squalene",
                "Phytol",
                "Gibberellins"
            ],
            "mismatches": [
                "Geraniol (C10H18O)",
                "Myrcene (C10H16)",
                "Farnesene (C15H24)",
                "Beta-Caryophyllene (C15H24)",
                "Linalool (C10H18O)",
                "Nerolidol (C15H26O)",
                "Bisabolene (C15H24)",
                "Humulene (C15H24)",
                "Carotene (C40H56)",
                "Retinol (C20H30O)",
                "Abietic Acid (C20H30O2)",
                "Taxadiene (C20H32)",
                "Atractylone (C15H24O)",
                "Cedrene (C15H24)",
                "Santolina triene (C15H24)",
                "Artemisia ketone (C10H16O)",
                "Citral (C10H16O)",
                "Camphene (C10H16)"
            ],
            "true_referents": [
                "Abietanes",
                "Abscisic Acid",
                "Artemisinins",
                "Artesunate",
                "Atractyloside",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphanes",
                "Camphor",
                "Carotenoids",
                "Docetaxel",
                "Farnesol",
                "Gibberellins",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Monoterpene Aldehydes and Ketones",
                "Monoterpenes",
                "Oleanolic Acid",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phytanic Acid",
                "Phytol",
                "Santonin",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "Vitamin A",
                "beta Carotene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "C50H82 (squalene)",
            "C15H24 (farnesene)",
            "C30H50 (hopene)",
            "C10H16 (myrcene)",
            "C15H24 (geranylgeraniol)",
            "C20H32 (caryophyllene)",
            "C25H40 (phytol)",
            "C15H24 (nerolidol)",
            "C30H50 (taraxasterol)",
            "C40H64 (beta-carotene)",
            "C25H40 (lupeol)",
            "C30H50 (cycloartenol)",
            "C15H24 (bisabolene)",
            "C20H32 (squalene epoxide)",
            "C30H50 (stigmasta-5,22-dien-3-ol)",
            "C25H40 (beta-amyrin)",
            "C35H60 (lupeol acetate)",
            "C20H32 (germacrene D)",
            "C25H40 (oleanolic acid)",
            "C15H24 (\u03b1-farnesene)",
            "C20H32 (delta-cadinene)",
            "C30H50 (daucosterol)",
            "C15H24 (sesquithujene)",
            "C25H40 (betulin)",
            "C30H50 (solasodine)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Squalene",
                "Phytol",
                "beta Carotene",
                "Oleanolic Acid"
            ],
            "mismatches": [
                "C15H24 (farnesene)",
                "C30H50 (hopene)",
                "C10H16 (myrcene)",
                "C15H24 (geranylgeraniol)",
                "C20H32 (caryophyllene)",
                "C15H24 (nerolidol)",
                "C30H50 (taraxasterol)",
                "C25H40 (lupeol)",
                "C30H50 (cycloartenol)",
                "C15H24 (bisabolene)",
                "C20H32 (squalene epoxide)",
                "C30H50 (stigmasta-5,22-dien-3-ol)",
                "C25H40 (lupeol acetate)",
                "C20H32 (germacrene D)",
                "C15H24 (\u03b1-farnesene)",
                "C20H32 (delta-cadinene)",
                "C30H50 (daucosterol)",
                "C15H24 (sesquithujene)",
                "C25H40 (betulin)",
                "C30H50 (solasodine)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Carotenoids",
                "Diterpenes, Clerodane",
                "Diterpenes, Kaurane",
                "Dronabinol",
                "Farnesol",
                "Lanosterol",
                "Limonene",
                "Monoterpenes",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Oleanolic Acid",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phytanic Acid",
                "Phytol",
                "Retinaldehyde",
                "Sesquiterpenes",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Ursolic Acid",
                "beta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Polyisoprene (C5H8)n",
            "Natural Rubber (C5H8)n",
            "Gutta-Percha (C5H8)n",
            "Chloroprene (C5H8)",
            "Neoprene (polychloroprene)",
            "Cis-1,4-polyisoprene",
            "Trans-1,4-polyisoprene",
            "3-Methyl-1,3-butadiene",
            "2-Methyl-1,3-butadiene",
            "1,3-Pentadiene",
            "Piperylene (1,3-pentadiene)",
            "2-Methyl-2-butene",
            "2-Methyl-1-butene",
            "3-Methyl-1-butene",
            "Isopentane (C5H12)",
            "Neopentane (C5H12)",
            "Isopentene (C5H10)",
            "2-Methyl-2-butene (C5H10)",
            "3-Methyl-1-butene (C5H10)",
            "2,2-Dimethylpropene (C5H10)",
            "2,3-Dimethyl-2-butene (C5H10)",
            "1,3-Di-tert-butylbenzene",
            "1,2-Di-tert-butylbenzene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Isoprene (C5H8)",
                "Polyisoprene (C5H8)n",
                "Natural Rubber (C5H8)n",
                "Gutta-Percha (C5H8)n",
                "Chloroprene (C5H8)",
                "Neoprene (polychloroprene)",
                "Cis-1,4-polyisoprene",
                "Trans-1,4-polyisoprene",
                "3-Methyl-1,3-butadiene",
                "2-Methyl-1,3-butadiene",
                "1,3-Pentadiene",
                "Piperylene (1,3-pentadiene)",
                "2-Methyl-2-butene",
                "2-Methyl-1-butene",
                "3-Methyl-1-butene",
                "Isopentane (C5H12)",
                "Neopentane (C5H12)",
                "Isopentene (C5H10)",
                "2-Methyl-2-butene (C5H10)",
                "3-Methyl-1-butene (C5H10)",
                "2,2-Dimethylpropene (C5H10)",
                "2,3-Dimethyl-2-butene (C5H10)",
                "1,3-Di-tert-butylbenzene",
                "1,2-Di-tert-butylbenzene"
            ],
            "true_referents": [
                "Betulinic Acid",
                "Cannabinol",
                "Cyclopentane Monoterpenes",
                "Diterpenes, Clerodane",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Limonene",
                "Monoterpenes",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Paclitaxel",
                "Pentacyclic Triterpenes",
                "Phorbol 12,13-Dibutyrate",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Polyprenols",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "beta Carotene"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Geraniol (C10H18O)",
            "Limonene (C10H16)",
            "Farnesene (C15H24)",
            "Myrcene (C10H16)",
            "Nerolidol (C15H26O)",
            "Squalene (C30H50)",
            "Linalool (C10H18O)",
            "Caryophyllene (C15H24)",
            "Bisabolol (C15H26O)",
            "Cedrene (C15H24)",
            "Sabinene (C10H16)",
            "Terpinene (C10H16)",
            "Cineole (C10H18O)",
            "Phellandrene (C10H16)",
            "Camphene (C10H16)",
            "Borneol (C10H18O)",
            "Caryophyllene oxide (C15H24O)",
            "Thujene (C10H16)",
            "Limonene oxide (C10H16O)",
            "Terpinolene (C10H16)",
            "Cadinene (C15H24)",
            "Valencene (C15H24)",
            "Citronellol (C10H20O)",
            "Sesquiterpene (C15H24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Squalene"
            ],
            "mismatches": [
                "Isoprene (C5H8)",
                "Geraniol (C10H18O)",
                "Farnesene (C15H24)",
                "Myrcene (C10H16)",
                "Nerolidol (C15H26O)",
                "Linalool (C10H18O)",
                "Caryophyllene (C15H24)",
                "Bisabolol (C15H26O)",
                "Cedrene (C15H24)",
                "Sabinene (C10H16)",
                "Terpinene (C10H16)",
                "Cineole (C10H18O)",
                "Phellandrene (C10H16)",
                "Camphene (C10H16)",
                "Borneol (C10H18O)",
                "Caryophyllene oxide (C15H24O)",
                "Thujene (C10H16)",
                "Limonene oxide (C10H16O)",
                "Terpinolene (C10H16)",
                "Cadinene (C15H24)",
                "Valencene (C15H24)",
                "Citronellol (C10H20O)",
                "Sesquiterpene (C15H24)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Bicyclic Monoterpenes",
                "Camphanes",
                "Camphor",
                "Carbenoxolone",
                "Diterpenes, Clerodane",
                "Diterpenes, Kaurane",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Hemiterpenes",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lycopene",
                "Menthol",
                "Monoterpenes",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Retinaldehyde",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "beta Carotene"
            ],
            "TP": 2,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Dolichol",
            "All-trans-Polyprenol",
            "Polyprenol-5",
            "Polyprenol-10",
            "Polyprenol-15",
            "Polyprenol-20",
            "Polyprenol-25",
            "Polyprenol-30",
            "Polyprenol-35",
            "Polyprenol-40",
            "Polyprenol-45",
            "Polyprenol-50",
            "Polyprenol-55",
            "Polyprenol-60",
            "Polyprenol-65",
            "Polyprenol-70",
            "Polyprenol-75",
            "Polyprenol-80",
            "Polyprenol-85",
            "Polyprenol-90",
            "Polyprenol-95",
            "Polyprenol-100",
            "Synthetic Polyprenol A",
            "Synthetic Polyprenol B"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Polyprenols"
            ],
            "mismatches": [
                "Dolichol",
                "All-trans-Polyprenol",
                "Polyprenol-5",
                "Polyprenol-10",
                "Polyprenol-15",
                "Polyprenol-20",
                "Polyprenol-25",
                "Polyprenol-30",
                "Polyprenol-35",
                "Polyprenol-40",
                "Polyprenol-45",
                "Polyprenol-50",
                "Polyprenol-55",
                "Polyprenol-60",
                "Polyprenol-65",
                "Polyprenol-70",
                "Polyprenol-75",
                "Polyprenol-80",
                "Polyprenol-85",
                "Polyprenol-90",
                "Polyprenol-95",
                "Polyprenol-100",
                "Synthetic Polyprenol A",
                "Synthetic Polyprenol B"
            ],
            "true_referents": [
                "Dolichol Phosphates",
                "Dolichols",
                "Farnesol",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Oligosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Polyprenols"
            ],
            "TP": 1,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Prenol (C5H10O)",
            "Isoprenol",
            "Dimethylallyl pyrophosphate (DMAPP)",
            "Isopentenyl pyrophosphate (IPP)",
            "Geraniol",
            "Linalool",
            "Myrcene",
            "Ocimene",
            "Limonene",
            "Pinene",
            "Carene",
            "Phellandrene",
            "Terpinolene",
            "Citral",
            "Geranial",
            "Neral",
            "Citronellal",
            "Menthol",
            "Carvone",
            "Camphor",
            "Thujone",
            "Borneol",
            "Eucalyptol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Menthol",
                "Camphor",
                "Eucalyptol"
            ],
            "mismatches": [
                "Isoprene (C5H8)",
                "Prenol (C5H10O)",
                "Isoprenol",
                "Dimethylallyl pyrophosphate (DMAPP)",
                "Isopentenyl pyrophosphate (IPP)",
                "Geraniol",
                "Linalool",
                "Myrcene",
                "Ocimene",
                "Pinene",
                "Carene",
                "Phellandrene",
                "Terpinolene",
                "Citral",
                "Geranial",
                "Neral",
                "Citronellal",
                "Carvone",
                "Thujone",
                "Borneol"
            ],
            "true_referents": [
                "Abietanes",
                "Camphor",
                "Cymenes",
                "Diterpenes, Clerodane",
                "Dolichol Monophosphate Mannose",
                "Dronabinol",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Hemiterpenes",
                "Limonene",
                "Limonins",
                "Lupanes",
                "Menthol",
                "Monoterpenes",
                "Norisoprenoids",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phorbols",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Polyprenols",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Guaiane",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "Triterpenes",
                "beta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Myrcene",
            "Ocimene",
            "Farnesene",
            "Geraniol",
            "Linalool",
            "Citronellol",
            "Nerolidol",
            "Farnesol",
            "Geranylgeraniol",
            "Squalene",
            "Lycopene",
            "\u03b2-Carotene",
            "Retinol (Vitamin A)",
            "Phytol",
            "Vitamin E",
            "Vitamin K1 (Phylloquinone)",
            "Coenzyme Q10 (Ubiquinone)",
            "Plastoquinone",
            "Menaquinone (Vitamin K2)",
            "Dolichol",
            "Rubber",
            "Gutta-percha",
            "Chicle"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Farnesol",
                "Squalene",
                "Lycopene",
                "Phytol",
                "Dolichol"
            ],
            "mismatches": [
                "Isoprene (C5H8)",
                "Myrcene",
                "Ocimene",
                "Farnesene",
                "Geraniol",
                "Linalool",
                "Citronellol",
                "Nerolidol",
                "Geranylgeraniol",
                "\u03b2-Carotene",
                "Retinol (Vitamin A)",
                "Vitamin E",
                "Vitamin K1 (Phylloquinone)",
                "Coenzyme Q10 (Ubiquinone)",
                "Plastoquinone",
                "Menaquinone (Vitamin K2)",
                "Rubber",
                "Gutta-percha",
                "Chicle"
            ],
            "true_referents": [
                "Aconitine",
                "Beta-Cryptoxanthin",
                "Camphanes",
                "Camphor",
                "Cannabinol",
                "Canthaxanthin",
                "Carotenoids",
                "Cymenes",
                "Dolichol Phosphates",
                "Dolichols",
                "Escin",
                "Eucalyptol",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Gossypol",
                "Lanosterol",
                "Limonene",
                "Lupanes",
                "Lutein",
                "Lycopene",
                "Menthol",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phorbols",
                "Phytanic Acid",
                "Phytol",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Squalene",
                "Taxoids",
                "Thymol",
                "Tretinoin",
                "Vitamin A",
                "Vitamin K",
                "Vitamin K 1",
                "Vitamin K 2",
                "Vitamin K 3",
                "beta Carotene"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Cholesterol (C27H46O)",
            "Squalene (C30H50)",
            "Lanosterol (C30H50O)",
            "Ergosterol (C28H44O)",
            "Stigmasterol (C29H48O)",
            "Brassicasterol (C28H48O)",
            "Campesterol (C28H48O)",
            "Sitosterol (C29H50O)",
            "Fucosterol (C29H48O)",
            "Vitamin D3 (C27H44O)",
            "Vitamin K1 (C31H46O2)",
            "Vitamin K2 (C31H46O2)",
            "Dolichol (C80-C105)",
            "Ubiquinone (C59H90O4)",
            "Plastoquinone (C45H58O4)",
            "Menaquinone (C31H40O2)",
            "Solanesol (C45H74O)",
            "Presqualene diphosphate (C30H52O7P2)",
            "Farnesyl diphosphate (C15H28O7P2)",
            "Geranylgeranyl diphosphate (C20H36O7P2)",
            "Undecaprenyl diphosphate (C55H89O7P2)",
            "Heptaprenyl diphosphate (C35H59O7P2)",
            "Octaprenyl diphosphate (C40H67O7P2)",
            "Decaprenyl diphosphate (C50H81O7P2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Squalene (C30H50)",
                "Lanosterol (C30H50O)",
                "Vitamin K1 (C31H46O2)",
                "Vitamin K2 (C31H46O2)"
            ],
            "mismatches": [
                "Cholesterol (C27H46O)",
                "Ergosterol (C28H44O)",
                "Stigmasterol (C29H48O)",
                "Brassicasterol (C28H48O)",
                "Campesterol (C28H48O)",
                "Sitosterol (C29H50O)",
                "Fucosterol (C29H48O)",
                "Vitamin D3 (C27H44O)",
                "Dolichol (C80-C105)",
                "Ubiquinone (C59H90O4)",
                "Plastoquinone (C45H58O4)",
                "Menaquinone (C31H40O2)",
                "Solanesol (C45H74O)",
                "Presqualene diphosphate (C30H52O7P2)",
                "Farnesyl diphosphate (C15H28O7P2)",
                "Geranylgeranyl diphosphate (C20H36O7P2)",
                "Undecaprenyl diphosphate (C55H89O7P2)",
                "Heptaprenyl diphosphate (C35H59O7P2)",
                "Octaprenyl diphosphate (C40H67O7P2)",
                "Decaprenyl diphosphate (C50H81O7P2)"
            ],
            "true_referents": [
                "Beta-Cryptoxanthin",
                "Canthaxanthin",
                "Carbenoxolone",
                "Dolichol Phosphates",
                "Dolichols",
                "Farnesol",
                "Glycyrrhizic Acid",
                "Lanosterol",
                "Limonene",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Phorbol 12,13-Dibutyrate",
                "Phytol",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Oligosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Squalene",
                "Tetradecanoylphorbol Acetate",
                "Vitamin A",
                "Vitamin K",
                "Vitamin K 1",
                "Vitamin K 2",
                "Vitamin K 3",
                "beta Carotene"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Farnesol (Far)",
            "Geranylgeraniol (GGol)",
            "Dolichol (Dol)",
            "Ubiquinone (Q)",
            "Squalene (Sq)",
            "Phytoene (Phy)",
            "Retinol (Ret)",
            "Tocopherol (Toc)",
            "Menaquinone (MK)",
            "Phylloquinone (K1)",
            "Ergosterol (Erg)",
            "Lanosterol (Lan)",
            "Cholesterol (Chol)",
            "Stigmasterol (Sti)",
            "Campesterol (Cam)",
            "Cycloartenol (Cyc)",
            "Dammarenediol (Dam)",
            "Lupeol (Lup)",
            "Amyrin (Amy)",
            "Taraxerol (Tar)",
            "Friedelin (Fri)",
            "Betulin (Bet)",
            "Oleanolic acid (Ola)",
            "Ursolic acid (Urs)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Farnesol",
                "Dolichol",
                "Squalene",
                "Lanosterol",
                "Oleanolic Acid",
                "Ursolic Acid"
            ],
            "mismatches": [
                "Geranylgeraniol",
                "Ubiquinone",
                "Phytoene",
                "Retinol",
                "Tocopherol",
                "Menaquinone",
                "Phylloquinone",
                "Ergosterol",
                "Cholesterol",
                "Stigmasterol",
                "Campesterol",
                "Cycloartenol",
                "Dammarenediol",
                "Lupeol",
                "Amyrin",
                "Taraxerol",
                "Friedelin",
                "Betulin"
            ],
            "true_referents": [
                "Allethrins",
                "Artemether",
                "Beta-Cryptoxanthin",
                "Betulinic Acid",
                "Camphanes",
                "Cannabinol",
                "Carotenoids",
                "Colforsin",
                "Dammaranes",
                "Docetaxel",
                "Dolichol Phosphates",
                "Dolichols",
                "Etretinate",
                "Farnesol",
                "Fenretinide",
                "Ginkgolides",
                "Lanosterol",
                "Limonene",
                "Limonins",
                "Lupanes",
                "Norisoprenoids",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Oleanolic Acid",
                "Paclitaxel",
                "Phytanic Acid",
                "Phytol",
                "Retinaldehyde",
                "Squalene",
                "Tretinoin",
                "Ursolic Acid",
                "Vitamin A",
                "Vitamin K",
                "Vitamin K 1",
                "Vitamin K 2",
                "beta Carotene"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Rubber",
            "Natural Rubber",
            "Synthetic Rubber",
            "Polyisoprene",
            "Polyisoprenes",
            "Chloroprene",
            "Neoprene",
            "Buna-N",
            "Buna-S",
            "Polybutadiene",
            "Polyisobutylene",
            "Polyisoprene rubber",
            "Polyisoprene elastomer",
            "Polyisoprene plastic",
            "Polyisoprene resin",
            "Polyisoprene foam",
            "Polyisoprene latex",
            "Polyisoprene emulsion",
            "Polyisoprene dispersion",
            "Polyisoprene solution",
            "Polyisoprene gel",
            "Polyisoprene hydrogel",
            "Polyisoprene nanogel",
            "Polyisoprene microgel",
            "Polyisoprene nanofiber",
            "Polyisoprene microfiber"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Isoprene (C5H8)",
                "Rubber",
                "Natural Rubber",
                "Synthetic Rubber",
                "Polyisoprene",
                "Polyisoprenes",
                "Chloroprene",
                "Neoprene",
                "Buna-N",
                "Buna-S",
                "Polybutadiene",
                "Polyisobutylene",
                "Polyisoprene rubber",
                "Polyisoprene elastomer",
                "Polyisoprene plastic",
                "Polyisoprene resin",
                "Polyisoprene foam",
                "Polyisoprene latex",
                "Polyisoprene emulsion",
                "Polyisoprene dispersion",
                "Polyisoprene solution",
                "Polyisoprene gel",
                "Polyisoprene hydrogel",
                "Polyisoprene nanogel",
                "Polyisoprene microgel",
                "Polyisoprene nanofiber",
                "Polyisoprene microfiber"
            ],
            "true_referents": [
                "Abscisic Acid",
                "Betulinic Acid",
                "Cannabinol",
                "Escin",
                "Eucalyptol",
                "Farnesol",
                "Gossypol",
                "Limonene",
                "Paclitaxel",
                "Phorbol 12,13-Dibutyrate",
                "Phorbols",
                "Polyisoprenyl Phosphate Monosaccharides",
                "Polyisoprenyl Phosphate Oligosaccharides",
                "Polyisoprenyl Phosphate Sugars",
                "Polyisoprenyl Phosphates",
                "Polyprenols",
                "Squalene",
                "beta Carotene"
            ],
            "TP": 0,
            "FP": 27,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            "Geranyl (G)",
            "Neryl (N)",
            "Geranyllinalool",
            "Farnesol (F)",
            "Farnesal",
            "Nerolidol",
            "Geranylgeraniol",
            "Sesquiphellandrene",
            "Bisabolol",
            "Farnesyl acetate",
            "Geranyl acetate",
            "Neryl acetate",
            "Geranyl isovalerate",
            "Geranyl phenylacetate",
            "Geranyl geranyl acetate",
            "Farnesyl cinnamate",
            "Geranyl cinnamate",
            "Neryl cinnamate",
            "Geranyllinalyl acetate",
            "Geranyloxyhexan-6-ol",
            "Geranyl hexanoate",
            "Geranyl octanoate",
            "Geranyl decanoate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Farnesol"
            ],
            "mismatches": [
                "Geranyl (G)",
                "Neryl (N)",
                "Geranyllinalool",
                "Farnesal",
                "Nerolidol",
                "Geranylgeraniol",
                "Sesquiphellandrene",
                "Bisabolol",
                "Farnesyl acetate",
                "Geranyl acetate",
                "Neryl acetate",
                "Geranyl isovalerate",
                "Geranyl phenylacetate",
                "Geranyl geranyl acetate",
                "Farnesyl cinnamate",
                "Geranyl cinnamate",
                "Neryl cinnamate",
                "Geranyllinalyl acetate",
                "Geranyloxyhexan-6-ol",
                "Geranyl hexanoate",
                "Geranyl octanoate",
                "Geranyl decanoate"
            ],
            "true_referents": [
                "Camphor",
                "Cannabidiol",
                "Cannabinol",
                "Carbenoxolone",
                "Dolichols",
                "Eucalyptol",
                "Farnesol",
                "Gefarnate",
                "Glycyrrhizic Acid",
                "Lanosterol",
                "Limonene",
                "Lutein",
                "Menthol",
                "O-(Chloroacetylcarbamoyl)fumagillol",
                "Oleanolic Acid",
                "Phorbol 12,13-Dibutyrate",
                "Phorbol Esters",
                "Phytanic Acid",
                "Phytol",
                "Polycyclic Sesquiterpenes",
                "Polyprenols",
                "Retinaldehyde",
                "Retinyl Esters",
                "Sesquiterpenes",
                "Sesquiterpenes, Eudesmane",
                "Sesquiterpenes, Germacrane",
                "Tetradecanoylphorbol Acetate",
                "Thymol",
                "Vitamin A"
            ],
            "TP": 1,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "terpene",
        "domain": "chemistry",
        "response": [
            {
                "referent": "isoprene",
                "canonical name": "isoprene"
            },
            {
                "referent": "limonene",
                "canonical name": "limonene"
            },
            {
                "referent": "pinene",
                "canonical name": "pinene"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "limonene"
            ],
            "mismatches": [
                "isoprene",
                "pinene"
            ],
            "true_referents": [
                "Abietanes",
                "Limonene",
                "Monoterpenes",
                "Sesquiterpenes",
                "Sesquiterpenes, Guaiane",
                "Triterpenes",
                "beta Carotene"
            ],
            "TP": 1,
            "FP": 2,
            "FN": 0
        }
    }
]