[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "isoprene (IP)",
            "geranyl diphosphate (GPP)",
            "farnesyl diphosphate (FPP)",
            "geraniol",
            "farnesol",
            "lycopene",
            "beta-carotene",
            "retinol",
            "limonene",
            "pinene",
            "camphor",
            "menthol",
            "germacrene D",
            "patchoulol",
            "caryophyllene",
            "beta-elemene",
            "alpha-bisabolol",
            "alpha-humulene",
            "alpha-farnesene",
            "beta-farnesene",
            "ginkgolide A",
            "taxol",
            "vitamin A",
            "coenzyme Q10"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "isoprene (IP)",
                "geranyl diphosphate (GPP)",
                "farnesyl diphosphate (FPP)",
                "geraniol",
                "farnesol",
                "lycopene",
                "beta-carotene",
                "retinol",
                "limonene",
                "pinene",
                "camphor",
                "menthol",
                "germacrene D",
                "patchoulol",
                "caryophyllene",
                "beta-elemene",
                "alpha-bisabolol",
                "alpha-humulene",
                "alpha-farnesene",
                "beta-farnesene",
                "ginkgolide A",
                "taxol",
                "vitamin A",
                "coenzyme Q10"
            ],
            "mismatches": [],
            "true_referents": [
                "alpha-bisabolol",
                "alpha-farnesene",
                "alpha-humulene",
                "beta-carotene",
                "beta-elemene",
                "beta-farnesene",
                "camphor",
                "caryophyllene",
                "coenzyme Q10",
                "farnesol",
                "farnesyl diphosphate (FPP)",
                "geraniol",
                "geranyl diphosphate (GPP)",
                "germacrene D",
                "ginkgolide A",
                "isoprene (IP)",
                "limonene",
                "lycopene",
                "menthol",
                "patchoulol",
                "pinene",
                "retinol",
                "taxol",
                "vitamin A"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "3-Methyl-1,3-butadiene (C5H8)",
            "\u03b1-Pinene",
            "\u03b2-Pinene",
            "Myrcene",
            "Limonene",
            "Sabinene",
            "Caryophyllene",
            "Geraniol",
            "Nerol",
            "Citronellol",
            "Geranyl acetate",
            "Neryl acetate",
            "Eugenol",
            "Cinnamyl acetate",
            "Linalool",
            "Geranyl pyrophosphate",
            "Neryl pyrophosphate",
            "Farnesyl pyrophosphate",
            "Squalene",
            "Phytol",
            "Phytadiene",
            "Farnesol",
            "Bisabolene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (C5H8)",
                "3-Methyl-1,3-butadiene (C5H8)",
                "\u03b1-Pinene",
                "\u03b2-Pinene",
                "Myrcene",
                "Limonene",
                "Sabinene",
                "Caryophyllene",
                "Geraniol",
                "Nerol",
                "Citronellol",
                "Geranyl acetate",
                "Neryl acetate",
                "Eugenol",
                "Cinnamyl acetate",
                "Linalool",
                "Geranyl pyrophosphate",
                "Neryl pyrophosphate",
                "Farnesyl pyrophosphate",
                "Squalene",
                "Phytol",
                "Phytadiene",
                "Farnesol",
                "Bisabolene"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methyl-1,3-butadiene (C5H8)",
                "Bisabolene",
                "Caryophyllene",
                "Cinnamyl acetate",
                "Citronellol",
                "Eugenol",
                "Farnesol",
                "Farnesyl pyrophosphate",
                "Geraniol",
                "Geranyl acetate",
                "Geranyl pyrophosphate",
                "Isoprene (C5H8)",
                "Limonene",
                "Linalool",
                "Myrcene",
                "Nerol",
                "Neryl acetate",
                "Neryl pyrophosphate",
                "Phytadiene",
                "Phytol",
                "Sabinene",
                "Squalene",
                "\u03b1-Pinene",
                "\u03b2-Pinene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (2-methyl-1,3-butadiene)",
            "Myrcene",
            "Ocimene",
            "Linalool",
            "Geraniol",
            "Nerol",
            "Citronellol",
            "Geranylgeraniol",
            "Farnesol",
            "Nerolidol",
            "Squalene",
            "Phytol",
            "Carene",
            "Limonene",
            "Phellandrene",
            "Terpinene",
            "Camphene",
            "Pinene",
            "Caryophyllene",
            "Thujone",
            "Menthol",
            "Carvone",
            "Perillaldehyde",
            "Pulegone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (2-methyl-1,3-butadiene)",
                "Myrcene",
                "Ocimene",
                "Linalool",
                "Geraniol",
                "Nerol",
                "Citronellol",
                "Geranylgeraniol",
                "Farnesol",
                "Nerolidol",
                "Squalene",
                "Phytol",
                "Carene",
                "Limonene",
                "Phellandrene",
                "Terpinene",
                "Camphene",
                "Pinene",
                "Caryophyllene",
                "Thujone",
                "Menthol",
                "Carvone",
                "Perillaldehyde",
                "Pulegone"
            ],
            "mismatches": [],
            "true_referents": [
                "Camphene",
                "Carene",
                "Carvone",
                "Caryophyllene",
                "Citronellol",
                "Farnesol",
                "Geraniol",
                "Geranylgeraniol",
                "Isoprene (2-methyl-1,3-butadiene)",
                "Limonene",
                "Linalool",
                "Menthol",
                "Myrcene",
                "Nerol",
                "Nerolidol",
                "Ocimene",
                "Perillaldehyde",
                "Phellandrene",
                "Phytol",
                "Pinene",
                "Pulegone",
                "Squalene",
                "Terpinene",
                "Thujone"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (ISP)",
            "3-Methyl-1-butene (3MB)",
            "2-Methyl-2-butene (2M2B)",
            "2-Methyl-1-butene (2M1B)",
            "Piperylene (PIP)",
            "Dimethylbutadiene (DMB)",
            "Limonene (LIM)",
            "Pinene (PIN)",
            "Camphene (CMP)",
            "Myrcene (MYR)",
            "Ocimene (OCI)",
            "Farnesene (FAR)",
            "Geraniol (GER)",
            "Nerol (NER)",
            "Citral (CIT)",
            "Menthol (MEN)",
            "Camphor (CAM)",
            "Eucalyptol (EUC)",
            "Thujone (THU)",
            "Abietic Acid (ABI)",
            "Squalene (SQN)",
            "Phytol (PHY)",
            "Vitamin A (VIT)",
            "Beta-Carotene (BET)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (ISP)",
                "3-Methyl-1-butene (3MB)",
                "2-Methyl-2-butene (2M2B)",
                "2-Methyl-1-butene (2M1B)",
                "Piperylene (PIP)",
                "Dimethylbutadiene (DMB)",
                "Limonene (LIM)",
                "Pinene (PIN)",
                "Camphene (CMP)",
                "Myrcene (MYR)",
                "Ocimene (OCI)",
                "Farnesene (FAR)",
                "Geraniol (GER)",
                "Nerol (NER)",
                "Citral (CIT)",
                "Menthol (MEN)",
                "Camphor (CAM)",
                "Eucalyptol (EUC)",
                "Thujone (THU)",
                "Abietic Acid (ABI)",
                "Squalene (SQN)",
                "Phytol (PHY)",
                "Vitamin A (VIT)",
                "Beta-Carotene (BET)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Methyl-1-butene (2M1B)",
                "2-Methyl-2-butene (2M2B)",
                "3-Methyl-1-butene (3MB)",
                "Abietic Acid (ABI)",
                "Beta-Carotene (BET)",
                "Camphene (CMP)",
                "Camphor (CAM)",
                "Citral (CIT)",
                "Dimethylbutadiene (DMB)",
                "Eucalyptol (EUC)",
                "Farnesene (FAR)",
                "Geraniol (GER)",
                "Isoprene (ISP)",
                "Limonene (LIM)",
                "Menthol (MEN)",
                "Myrcene (MYR)",
                "Nerol (NER)",
                "Ocimene (OCI)",
                "Phytol (PHY)",
                "Pinene (PIN)",
                "Piperylene (PIP)",
                "Squalene (SQN)",
                "Thujone (THU)",
                "Vitamin A (VIT)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Myrcene (C10H16)",
            "Ocimene (C10H16)",
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Camphene (C10H16)",
            "Terpinene (C10H16)",
            "Cymene (C10H14)",
            "Carene (C10H16)",
            "Sabinene (C10H16)",
            "Phellandrene (C10H16)",
            "Farnesene (C15H24)",
            "Bisabolene (C15H24)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Valencene (C15H24)",
            "Selinene (C15H24)",
            "Cedrene (C15H24)",
            "Germacrene (C15H24)",
            "Elemene (C15H24)",
            "Santalene (C15H24)",
            "Bergamotene (C15H24)",
            "Cadinene (C15H24)",
            "Guaiol (C15H24O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (C5H8)",
                "Myrcene (C10H16)",
                "Ocimene (C10H16)",
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Camphene (C10H16)",
                "Terpinene (C10H16)",
                "Cymene (C10H14)",
                "Carene (C10H16)",
                "Sabinene (C10H16)",
                "Phellandrene (C10H16)",
                "Farnesene (C15H24)",
                "Bisabolene (C15H24)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Valencene (C15H24)",
                "Selinene (C15H24)",
                "Cedrene (C15H24)",
                "Germacrene (C15H24)",
                "Elemene (C15H24)",
                "Santalene (C15H24)",
                "Bergamotene (C15H24)",
                "Cadinene (C15H24)",
                "Guaiol (C15H24O)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bergamotene (C15H24)",
                "Bisabolene (C15H24)",
                "Cadinene (C15H24)",
                "Camphene (C10H16)",
                "Carene (C10H16)",
                "Caryophyllene (C15H24)",
                "Cedrene (C15H24)",
                "Cymene (C10H14)",
                "Elemene (C15H24)",
                "Farnesene (C15H24)",
                "Germacrene (C15H24)",
                "Guaiol (C15H24O)",
                "Humulene (C15H24)",
                "Isoprene (C5H8)",
                "Limonene (C10H16)",
                "Myrcene (C10H16)",
                "Ocimene (C10H16)",
                "Phellandrene (C10H16)",
                "Pinene (C10H16)",
                "Sabinene (C10H16)",
                "Santalene (C15H24)",
                "Selinene (C15H24)",
                "Terpinene (C10H16)",
                "Valencene (C15H24)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Myrcene (C10H16)",
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Camphene (C10H16)",
            "Borneol (C10H18O)",
            "Geraniol (C10H18O)",
            "Farnesene (C15H24)",
            "Squalene (C30H50)",
            "Beta-Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Caryophyllene oxide (C15H24O)",
            "Linalool (C10H18O)",
            "Terpinene (C10H16)",
            "Alpha-Pinene (C10H16)",
            "Beta-Pinene (C10H16)",
            "Terpineol (C10H18O)",
            "Nerolidol (C15H26O)",
            "Bisabolene (C15H24)",
            "Sesquiterpenes (various)",
            "Diterpenes (various)",
            "Triterpenes (various)",
            "Carotenoids (various)",
            "Rubber (natural rubber, polyisoprene)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (C5H8)",
                "Myrcene (C10H16)",
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Camphene (C10H16)",
                "Borneol (C10H18O)",
                "Geraniol (C10H18O)",
                "Farnesene (C15H24)",
                "Squalene (C30H50)",
                "Beta-Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Caryophyllene oxide (C15H24O)",
                "Linalool (C10H18O)",
                "Terpinene (C10H16)",
                "Alpha-Pinene (C10H16)",
                "Beta-Pinene (C10H16)",
                "Terpineol (C10H18O)",
                "Nerolidol (C15H26O)",
                "Bisabolene (C15H24)",
                "Sesquiterpenes (various)",
                "Diterpenes (various)",
                "Triterpenes (various)",
                "Carotenoids (various)",
                "Rubber (natural rubber, polyisoprene)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-Pinene (C10H16)",
                "Beta-Caryophyllene (C15H24)",
                "Beta-Pinene (C10H16)",
                "Bisabolene (C15H24)",
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Carotenoids (various)",
                "Caryophyllene oxide (C15H24O)",
                "Diterpenes (various)",
                "Farnesene (C15H24)",
                "Geraniol (C10H18O)",
                "Humulene (C15H24)",
                "Isoprene (C5H8)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Myrcene (C10H16)",
                "Nerolidol (C15H26O)",
                "Pinene (C10H16)",
                "Rubber (natural rubber, polyisoprene)",
                "Sesquiterpenes (various)",
                "Squalene (C30H50)",
                "Terpinene (C10H16)",
                "Terpineol (C10H18O)",
                "Triterpenes (various)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Dimethylallyl pyrophosphate (DMAPP)",
            "Isopentenyl pyrophosphate (IPP)",
            "Geranyl pyrophosphate (GPP)",
            "Farnesyl pyrophosphate (FPP)",
            "Geranylgeranyl pyrophosphate (GGPP)",
            "Dolichyl pyrophosphate (DolPP)",
            "Monoterpenes",
            "Sesquiterpenes",
            "Diterpenes",
            "Triterpenes",
            "Tetraterpenes",
            "Polyterpenes",
            "Hemiterpenes",
            "Squalene",
            "Limonene",
            "Myrcene",
            "Pinene",
            "Linalool",
            "Bisabolene",
            "Humulene",
            "Caryophyllene",
            "Farnesene",
            "Geraniol",
            "Nerol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethylallyl pyrophosphate (DMAPP)",
                "Isopentenyl pyrophosphate (IPP)",
                "Geranyl pyrophosphate (GPP)",
                "Farnesyl pyrophosphate (FPP)",
                "Geranylgeranyl pyrophosphate (GGPP)",
                "Dolichyl pyrophosphate (DolPP)",
                "Monoterpenes",
                "Sesquiterpenes",
                "Diterpenes",
                "Triterpenes",
                "Tetraterpenes",
                "Polyterpenes",
                "Hemiterpenes",
                "Squalene",
                "Limonene",
                "Myrcene",
                "Pinene",
                "Linalool",
                "Bisabolene",
                "Humulene",
                "Caryophyllene",
                "Farnesene",
                "Geraniol",
                "Nerol"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisabolene",
                "Caryophyllene",
                "Dimethylallyl pyrophosphate (DMAPP)",
                "Diterpenes",
                "Dolichyl pyrophosphate (DolPP)",
                "Farnesene",
                "Farnesyl pyrophosphate (FPP)",
                "Geraniol",
                "Geranyl pyrophosphate (GPP)",
                "Geranylgeranyl pyrophosphate (GGPP)",
                "Hemiterpenes",
                "Humulene",
                "Isopentenyl pyrophosphate (IPP)",
                "Limonene",
                "Linalool",
                "Monoterpenes",
                "Myrcene",
                "Nerol",
                "Pinene",
                "Polyterpenes",
                "Sesquiterpenes",
                "Squalene",
                "Tetraterpenes",
                "Triterpenes"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "Myrcene (C10H16)",
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Carene (C10H16)",
            "Terpinolene (C10H16)",
            "Terpinene (C10H16)",
            "Phellandrene (C10H16)",
            "Sabinene (C10H16)",
            "Camphene (C10H16)",
            "Farnesene (C15H24)",
            "Nerolidol (C15H26)",
            "Geraniol (C10H18O)",
            "Linalool (C10H18O)",
            "Citronellol (C10H20O)",
            "Menthol (C10H20O)",
            "Borneol (C10H18O)",
            "Thujone (C10H16O)",
            "Carvone (C10H14O)",
            "Cedrene (C15H24)",
            "Caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Valencene (C15H24)",
            "Sesquiterpene",
            "Diterpene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (C5H8)",
                "Myrcene (C10H16)",
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Carene (C10H16)",
                "Terpinolene (C10H16)",
                "Terpinene (C10H16)",
                "Phellandrene (C10H16)",
                "Sabinene (C10H16)",
                "Camphene (C10H16)",
                "Farnesene (C15H24)",
                "Nerolidol (C15H26)",
                "Geraniol (C10H18O)",
                "Linalool (C10H18O)",
                "Citronellol (C10H20O)",
                "Menthol (C10H20O)",
                "Borneol (C10H18O)",
                "Thujone (C10H16O)",
                "Carvone (C10H14O)",
                "Cedrene (C15H24)",
                "Caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Valencene (C15H24)",
                "Sesquiterpene"
            ],
            "mismatches": [
                "Diterpene"
            ],
            "true_referents": [
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Carene (C10H16)",
                "Carvone (C10H14O)",
                "Caryophyllene (C15H24)",
                "Cedrene (C15H24)",
                "Citronellol (C10H20O)",
                "Farnesene (C15H24)",
                "Geraniol (C10H18O)",
                "Humulene (C15H24)",
                "Isoprene (C5H8)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Menthol (C10H20O)",
                "Myrcene (C10H16)",
                "Nerolidol (C15H26)",
                "Phellandrene (C10H16)",
                "Pinene (C10H16)",
                "Sabinene (C10H16)",
                "Sesquiterpene",
                "Terpinene (C10H16)",
                "Terpinolene (C10H16)",
                "Thujone (C10H16O)",
                "Valencene (C15H24)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "2-Methyl-1,3-butadiene (Isoprene)",
            "1,3-Pentadiene",
            "1,4-Pentadiene",
            "cis-1,3-Pentadiene",
            "trans-1,3-Pentadiene",
            "cis-1,4-Pentadiene",
            "trans-1,4-Pentadiene",
            "Cyclopent-1-ene",
            "Cyclopent-2-ene",
            "1-Methylcyclobut-1-ene",
            "1-Methylcyclobut-2-ene",
            "1,2-Dimethylcyclopropene",
            "1,3-Dimethylcyclopropene",
            "2-Methylcyclopropene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "2-Methyl-1,3-butadiene (Isoprene)",
                "1,3-Pentadiene",
                "1,4-Pentadiene",
                "cis-1,3-Pentadiene",
                "trans-1,3-Pentadiene",
                "cis-1,4-Pentadiene",
                "trans-1,4-Pentadiene",
                "Cyclopent-1-ene",
                "Cyclopent-2-ene",
                "1-Methylcyclobut-1-ene",
                "1-Methylcyclobut-2-ene",
                "1,2-Dimethylcyclopropene",
                "1,3-Dimethylcyclopropene",
                "2-Methylcyclopropene"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dimethylcyclopropene",
                "1,3-Dimethylcyclopropene",
                "1,3-Pentadiene",
                "1,4-Pentadiene",
                "1-Methylcyclobut-1-ene",
                "1-Methylcyclobut-2-ene",
                "2-Methyl-1,3-butadiene (Isoprene)",
                "2-Methylcyclopropene",
                "Cyclopent-1-ene",
                "Cyclopent-2-ene",
                "cis-1,3-Pentadiene",
                "cis-1,4-Pentadiene",
                "trans-1,3-Pentadiene",
                "trans-1,4-Pentadiene"
            ],
            "TP": 14,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isopentenyl pyrophosphate (IPP)",
            "Dimethylallyl pyrophosphate (DMAPP)",
            "2-Methyl-1,3-butadiene",
            "3-Methyl-1,3-butadiene",
            "2-Methyl-2-butene",
            "Prenyl group",
            "Hemiterpene",
            "Isoprenoid building block",
            "Isoprene monomer",
            "Terpene precursor",
            "Isopentyl unit",
            "Branched pentene",
            "C5 terpenoid unit",
            "Mevalonate pathway intermediate",
            "Non-mevalonate pathway intermediate",
            "Isoprenyl diphosphate",
            "Isopentenyl diphosphate",
            "Prenyl diphosphate",
            "Isoprenoid subunit",
            "Terpenoid building block",
            "Isoprene skeleton",
            "C5 isoprenoid",
            "Prenyl pyrophosphate",
            "Isoprene-derived unit"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isopentenyl pyrophosphate (IPP)",
                "Dimethylallyl pyrophosphate (DMAPP)",
                "2-Methyl-1,3-butadiene",
                "3-Methyl-1,3-butadiene",
                "2-Methyl-2-butene",
                "Prenyl group",
                "Hemiterpene",
                "Isoprenoid building block",
                "Isoprene monomer",
                "Terpene precursor",
                "Isopentyl unit",
                "Branched pentene",
                "C5 terpenoid unit",
                "Mevalonate pathway intermediate",
                "Non-mevalonate pathway intermediate",
                "Isoprenyl diphosphate",
                "Isopentenyl diphosphate",
                "Prenyl diphosphate",
                "Isoprenoid subunit",
                "Terpenoid building block",
                "Isoprene skeleton",
                "C5 isoprenoid",
                "Prenyl pyrophosphate",
                "Isoprene-derived unit"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Methyl-1,3-butadiene",
                "2-Methyl-2-butene",
                "3-Methyl-1,3-butadiene",
                "Branched pentene",
                "C5 isoprenoid",
                "C5 terpenoid unit",
                "Dimethylallyl pyrophosphate (DMAPP)",
                "Hemiterpene",
                "Isopentenyl diphosphate",
                "Isopentenyl pyrophosphate (IPP)",
                "Isopentyl unit",
                "Isoprene monomer",
                "Isoprene skeleton",
                "Isoprene-derived unit",
                "Isoprenoid building block",
                "Isoprenoid subunit",
                "Isoprenyl diphosphate",
                "Mevalonate pathway intermediate",
                "Non-mevalonate pathway intermediate",
                "Prenyl diphosphate",
                "Prenyl group",
                "Prenyl pyrophosphate",
                "Terpene precursor",
                "Terpenoid building block"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "2-Methylbuta-1,3-diene (Isoprene)",
            "2-Methyl-1,3-butadiene",
            "(2E)-2-Methyl-1,3-butadiene",
            "(2Z)-2-Methyl-1,3-butadiene",
            "1,3-Pentadiene, 2-methyl-",
            "1-Methylbutadiene",
            "3-Methylbutadiene",
            "Isopentadiene",
            "Isoprene, natural",
            "Isoprene, synthetic",
            "2-Methyl-1,3-butadiene, (2E)-",
            "2-Methyl-1,3-butadiene, (2Z)-",
            "1,3-Butadiene, 2-methyl-",
            "Butadiene, 2-methyl-",
            "2-Methylbutadiene",
            "Hemiterpene",
            "Prenylene",
            "\u03b1-Methylbutadiene",
            "\u03b2-Methylbutadiene",
            "2-Methylbuta-1,3-diene, (E)-",
            "2-Methylbuta-1,3-diene, (Z)-",
            "Isoprene unit",
            "C5H8",
            "CH2=C(CH3)CH=CH2"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "2-Methylbuta-1,3-diene (Isoprene)",
                "2-Methyl-1,3-butadiene",
                "(2E)-2-Methyl-1,3-butadiene",
                "(2Z)-2-Methyl-1,3-butadiene",
                "1,3-Pentadiene, 2-methyl-",
                "1-Methylbutadiene",
                "3-Methylbutadiene",
                "Isopentadiene",
                "Isoprene, natural",
                "Isoprene, synthetic",
                "2-Methyl-1,3-butadiene, (2E)-",
                "2-Methyl-1,3-butadiene, (2Z)-",
                "1,3-Butadiene, 2-methyl-",
                "Butadiene, 2-methyl-",
                "2-Methylbutadiene",
                "Hemiterpene",
                "Prenylene",
                "\u03b1-Methylbutadiene",
                "\u03b2-Methylbutadiene",
                "2-Methylbuta-1,3-diene, (E)-",
                "2-Methylbuta-1,3-diene, (Z)-",
                "Isoprene unit",
                "C5H8",
                "CH2=C(CH3)CH=CH2"
            ],
            "mismatches": [],
            "true_referents": [
                "(2E)-2-Methyl-1,3-butadiene",
                "(2Z)-2-Methyl-1,3-butadiene",
                "1,3-Butadiene, 2-methyl-",
                "1,3-Pentadiene, 2-methyl-",
                "1-Methylbutadiene",
                "2-Methyl-1,3-butadiene",
                "2-Methyl-1,3-butadiene, (2E)-",
                "2-Methyl-1,3-butadiene, (2Z)-",
                "2-Methylbuta-1,3-diene (Isoprene)",
                "2-Methylbuta-1,3-diene, (E)-",
                "2-Methylbuta-1,3-diene, (Z)-",
                "2-Methylbutadiene",
                "3-Methylbutadiene",
                "Butadiene, 2-methyl-",
                "C5H8",
                "CH2=C(CH3)CH=CH2",
                "Hemiterpene",
                "Isopentadiene",
                "Isoprene unit",
                "Isoprene, natural",
                "Isoprene, synthetic",
                "Prenylene",
                "\u03b1-Methylbutadiene",
                "\u03b2-Methylbutadiene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (C5H8)",
            "\u03b2-Myrcene (C10H16)",
            "Limonene (C10H16)",
            "\u03b1-Pinene (C10H16)",
            "\u03b2-Pinene (C10H16)",
            "Camphene (C10H16)",
            "Sabinene (C10H16)",
            "\u03b3-Terpinene (C10H16)",
            "\u03b1-Terpinene (C10H16)",
            "Terpinolene (C10H16)",
            "\u03b1-Phellandrene (C10H16)",
            "\u03b2-Phellandrene (C10H16)",
            "\u03b1-Terpineol (C10H18O)",
            "Linalool (C10H18O)",
            "Nerol (C10H18O)",
            "Geraniol (C10H18O)",
            "Citronellol (C10H20O)",
            "Menthol (C10H20O)",
            "Borneol (C10H18O)",
            "Fenchol (C10H18O)",
            "Nerolidol (C15H26O)",
            "Farnesol (C15H26O)",
            "Geranylgeraniol (C20H34O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (C5H8)",
                "\u03b2-Myrcene (C10H16)",
                "Limonene (C10H16)",
                "\u03b1-Pinene (C10H16)",
                "\u03b2-Pinene (C10H16)",
                "Camphene (C10H16)",
                "Sabinene (C10H16)",
                "\u03b3-Terpinene (C10H16)",
                "\u03b1-Terpinene (C10H16)",
                "Terpinolene (C10H16)",
                "\u03b1-Phellandrene (C10H16)",
                "\u03b2-Phellandrene (C10H16)",
                "\u03b1-Terpineol (C10H18O)",
                "Linalool (C10H18O)",
                "Nerol (C10H18O)",
                "Geraniol (C10H18O)",
                "Citronellol (C10H20O)",
                "Menthol (C10H20O)",
                "Borneol (C10H18O)",
                "Fenchol (C10H18O)",
                "Nerolidol (C15H26O)",
                "Farnesol (C15H26O)",
                "Geranylgeraniol (C20H34O)"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Citronellol (C10H20O)",
                "Farnesol (C15H26O)",
                "Fenchol (C10H18O)",
                "Geraniol (C10H18O)",
                "Geranylgeraniol (C20H34O)",
                "Isoprene (C5H8)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Menthol (C10H20O)",
                "Nerol (C10H18O)",
                "Nerolidol (C15H26O)",
                "Sabinene (C10H16)",
                "Terpinolene (C10H16)",
                "\u03b1-Phellandrene (C10H16)",
                "\u03b1-Pinene (C10H16)",
                "\u03b1-Terpinene (C10H16)",
                "\u03b1-Terpineol (C10H18O)",
                "\u03b2-Myrcene (C10H16)",
                "\u03b2-Phellandrene (C10H16)",
                "\u03b2-Pinene (C10H16)",
                "\u03b3-Terpinene (C10H16)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (ISO)",
            "2-Methyl-1,3-butadiene (2M13BD)",
            "Isoprenoid (ISNP)",
            "Monoterpene (MOTP)",
            "Sesquiterpene (SETP)",
            "Diterpene (DITP)",
            "Triterpene (TRITP)",
            "Carotenoid (CAR)",
            "Squalene (SQU)",
            "Cholesterol (CHOL)",
            "Vitamin A (VIT_A)",
            "Vitamin E (VIT_E)",
            "Menthol (MEOL)",
            "Limonene (LIM)",
            "Pinene (PIN)",
            "Camphor (CAMP)",
            "Geraniol (GER)",
            "Linalool (LIN)",
            "Citronellol (CIT)",
            "Farnesol (FAR)",
            "Phytol (PHY)",
            "Retinol (RET)",
            "Ubiquinone (UBQ)",
            "Dolichol (DOL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (ISO)",
                "2-Methyl-1,3-butadiene (2M13BD)",
                "Isoprenoid (ISNP)",
                "Monoterpene (MOTP)",
                "Sesquiterpene (SETP)",
                "Diterpene (DITP)",
                "Triterpene (TRITP)",
                "Carotenoid (CAR)",
                "Squalene (SQU)",
                "Cholesterol (CHOL)",
                "Vitamin A (VIT_A)",
                "Vitamin E (VIT_E)",
                "Menthol (MEOL)",
                "Limonene (LIM)",
                "Pinene (PIN)",
                "Camphor (CAMP)",
                "Geraniol (GER)",
                "Linalool (LIN)",
                "Citronellol (CIT)",
                "Farnesol (FAR)",
                "Phytol (PHY)",
                "Retinol (RET)",
                "Ubiquinone (UBQ)",
                "Dolichol (DOL)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Methyl-1,3-butadiene (2M13BD)",
                "Camphor (CAMP)",
                "Carotenoid (CAR)",
                "Cholesterol (CHOL)",
                "Citronellol (CIT)",
                "Diterpene (DITP)",
                "Dolichol (DOL)",
                "Farnesol (FAR)",
                "Geraniol (GER)",
                "Isoprene (ISO)",
                "Isoprenoid (ISNP)",
                "Limonene (LIM)",
                "Linalool (LIN)",
                "Menthol (MEOL)",
                "Monoterpene (MOTP)",
                "Phytol (PHY)",
                "Pinene (PIN)",
                "Retinol (RET)",
                "Sesquiterpene (SETP)",
                "Squalene (SQU)",
                "Triterpene (TRITP)",
                "Ubiquinone (UBQ)",
                "Vitamin A (VIT_A)",
                "Vitamin E (VIT_E)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isopentane (2-Methylbutane) (IP)",
            "Limonene",
            "Pinene",
            "Sabinene",
            "Myrcene",
            "Linalool",
            "Borneol",
            "Terpinen-4-ol",
            "Caryophyllene",
            "Humulene",
            "Nerolidol",
            "Perillyl alcohol",
            "Sativene",
            "Dihydrocarveol",
            "Carveol",
            "Carvone",
            "Dihydrocarvone",
            "Geraniol",
            "Limonene oxide",
            "Borneol acetate",
            "Terpinen-4-ol acetate",
            "Caryophyllene oxide",
            "Humulene epoxide",
            "Nerolidol acetate",
            "Perillyl acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isopentane (2-Methylbutane) (IP)",
                "Limonene",
                "Pinene",
                "Sabinene",
                "Myrcene",
                "Linalool",
                "Borneol",
                "Terpinen-4-ol",
                "Caryophyllene",
                "Humulene",
                "Nerolidol",
                "Perillyl alcohol",
                "Sativene",
                "Dihydrocarveol",
                "Carveol",
                "Carvone",
                "Dihydrocarvone",
                "Geraniol",
                "Limonene oxide",
                "Borneol acetate",
                "Terpinen-4-ol acetate",
                "Caryophyllene oxide",
                "Humulene epoxide",
                "Nerolidol acetate"
            ],
            "mismatches": [
                "Perillyl acetate"
            ],
            "true_referents": [
                "Borneol",
                "Borneol acetate",
                "Carveol",
                "Carvone",
                "Caryophyllene",
                "Caryophyllene oxide",
                "Dihydrocarveol",
                "Dihydrocarvone",
                "Geraniol",
                "Humulene",
                "Humulene epoxide",
                "Isopentane (2-Methylbutane) (IP)",
                "Limonene",
                "Limonene oxide",
                "Linalool",
                "Myrcene",
                "Nerolidol",
                "Nerolidol acetate",
                "Perillyl alcohol",
                "Pinene",
                "Sabinene",
                "Sativene",
                "Terpinen-4-ol",
                "Terpinen-4-ol acetate"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Isoprene (IP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene (IP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Isoprene (IP)"
            ],
            "TP": 1,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Isoprene",
                "Canonical name": "Isoprene",
                "Definition": [
                    "It is an isoprene unit",
                    "It has the chemical formula C5H8",
                    "It forms a terpene"
                ]
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Isoprene"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"Isoprene\", \"Canonical name\": \"Isoprene\", \"Definition\": [\"It is an isoprene unit\", \"It has the chemical formula C5H8\", \"It forms a terpene\"]}"
            ],
            "TP": 1,
            "FP": 0,
            "FN": 0
        }
    }
]