[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene ((LIM))",
            "Pinene ((PIN))",
            "Myrcene ((MYR))",
            "Terpinolene ((TERP))",
            "Citronellol ((CTNL))",
            "Geraniol ((GER))",
            "Nerolidol ((NER))",
            "Borneol ((BOR))",
            "Camphor ((CAM))",
            "Farnesol ((FAR))",
            "Sabinene ((SAB))",
            "Terpineol ((TERP))",
            "Isopulegol ((ISOP))",
            "Thymol ((THYM))",
            "Carvone ((CARV))",
            "Pulegone ((PUL))",
            "Cineole ((CIN))",
            "Linalool ((LIN))",
            "Eucalyptol ((EUC))",
            "Terpinene ((TERP))",
            "Basilicin ((BAS))",
            "Phellandrene ((PHEL))",
            "Spathulenol ((SPAT))",
            "Bornyl Acetate ((BORAC))",
            "Isoborneol ((ISOBOR))"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene ((LIM))",
                "Pinene ((PIN))",
                "Myrcene ((MYR))",
                "Terpinolene ((TERP))",
                "Citronellol ((CTNL))",
                "Geraniol ((GER))",
                "Nerolidol ((NER))",
                "Borneol ((BOR))",
                "Camphor ((CAM))",
                "Farnesol ((FAR))",
                "Sabinene ((SAB))",
                "Terpineol ((TERP))",
                "Isopulegol ((ISOP))",
                "Thymol ((THYM))",
                "Carvone ((CARV))",
                "Pulegone ((PUL))",
                "Cineole ((CIN))",
                "Linalool ((LIN))",
                "Eucalyptol ((EUC))",
                "Terpinene ((TERP))",
                "Basilicin ((BAS))",
                "Phellandrene ((PHEL))",
                "Spathulenol ((SPAT))",
                "Bornyl Acetate ((BORAC))"
            ],
            "mismatches": [
                "Isoborneol ((ISOBOR))"
            ],
            "true_referents": [
                "Basilicin ((BAS))",
                "Borneol ((BOR))",
                "Bornyl Acetate ((BORAC))",
                "Camphor ((CAM))",
                "Carvone ((CARV))",
                "Cineole ((CIN))",
                "Citronellol ((CTNL))",
                "Eucalyptol ((EUC))",
                "Farnesol ((FAR))",
                "Geraniol ((GER))",
                "Isopulegol ((ISOP))",
                "Limonene ((LIM))",
                "Linalool ((LIN))",
                "Myrcene ((MYR))",
                "Nerolidol ((NER))",
                "Phellandrene ((PHEL))",
                "Pinene ((PIN))",
                "Pulegone ((PUL))",
                "Sabinene ((SAB))",
                "Spathulenol ((SPAT))",
                "Terpinene ((TERP))",
                "Terpineol ((TERP))",
                "Terpinolene ((TERP))",
                "Thymol ((THYM))"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Caryophyllene (C15H24)",
            "Linalool (C10H18O)",
            "Geraniol (C10H18O)",
            "Citronellal (C10H18O)",
            "Borneol (C10H18O)",
            "Camphor (C10H16O)",
            "Eucalyptol (C10H18O)",
            "Alpha-terpineol (C10H18O)",
            "Beta-ocimene (C10H16)",
            "Nerol (C10H18O)",
            "Camphene (C10H16)",
            "Phellandrene (C10H16)",
            "Alpha-pinene (C10H16)",
            "Beta-pinene (C10H16)",
            "Alpha-terpinene (C10H14)",
            "Delta-3-carene (C10H16)",
            "Thujene (C10H16)",
            "Sabinene (C10H16)",
            "Terpinolene (C10H18)",
            "Linalyl acetate (C12H20O2)",
            "Geranyl acetate (C12H20O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Caryophyllene (C15H24)",
                "Linalool (C10H18O)",
                "Geraniol (C10H18O)",
                "Citronellal (C10H18O)",
                "Borneol (C10H18O)",
                "Camphor (C10H16O)",
                "Eucalyptol (C10H18O)",
                "Alpha-terpineol (C10H18O)",
                "Beta-ocimene (C10H16)",
                "Nerol (C10H18O)",
                "Camphene (C10H16)",
                "Phellandrene (C10H16)",
                "Alpha-pinene (C10H16)",
                "Beta-pinene (C10H16)",
                "Alpha-terpinene (C10H14)",
                "Delta-3-carene (C10H16)",
                "Thujene (C10H16)",
                "Sabinene (C10H16)",
                "Terpinolene (C10H18)",
                "Linalyl acetate (C12H20O2)",
                "Geranyl acetate (C12H20O2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-pinene (C10H16)",
                "Alpha-terpinene (C10H14)",
                "Alpha-terpineol (C10H18O)",
                "Beta-ocimene (C10H16)",
                "Beta-pinene (C10H16)",
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Camphor (C10H16O)",
                "Caryophyllene (C15H24)",
                "Citronellal (C10H18O)",
                "Delta-3-carene (C10H16)",
                "Eucalyptol (C10H18O)",
                "Geraniol (C10H18O)",
                "Geranyl acetate (C12H20O2)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Linalyl acetate (C12H20O2)",
                "Myrcene (C10H16)",
                "Nerol (C10H18O)",
                "Phellandrene (C10H16)",
                "Pinene (C10H16)",
                "Sabinene (C10H16)",
                "Terpinolene (C10H18)",
                "Thujene (C10H16)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (CAS: 5989-27-5)",
            "\u03b1-Pinene (CAS: 80-56-8)",
            "\u03b2-Pinene (CAS: 127-91-3)",
            "Myrcene (CAS: 123-35-3)",
            "Geraniol (CAS: 106-24-1)",
            "Linalool (CAS: 78-70-6)",
            "Citronellol (CAS: 106-22-9)",
            "Citral (CAS: 5392-40-5)",
            "Menthol (CAS: 2216-51-5)",
            "Camphor (CAS: 76-22-2)",
            "Pulegone (CAS: 89-82-7)",
            "Carvone (CAS: 99-48-7)",
            "Terpinen-4-ol (CAS: 562-74-3)",
            "Perillaldehyde (CAS: 2111-75-3)",
            "Thymol (CAS: 89-83-8)",
            "\u03b1-Terpineol (CAS: 98-55-5)",
            "\u03b2-Ocimene (CAS: 13877-91-3)",
            "Terpinolene (CAS: 586-62-9)",
            "Nerol (CAS: 106-25-2)",
            "Geranyl acetate (CAS: 105-87-3)",
            "Nerolidol (CAS: 7212-44-4)",
            "Isopulegol (CAS: 89-79-2)",
            "Eucalyptol (CAS: 470-82-6)",
            "Terpineol (CAS: 98-55-5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (CAS: 5989-27-5)",
                "\u03b1-Pinene (CAS: 80-56-8)",
                "\u03b2-Pinene (CAS: 127-91-3)",
                "Myrcene (CAS: 123-35-3)",
                "Geraniol (CAS: 106-24-1)",
                "Linalool (CAS: 78-70-6)",
                "Citronellol (CAS: 106-22-9)",
                "Citral (CAS: 5392-40-5)",
                "Menthol (CAS: 2216-51-5)",
                "Camphor (CAS: 76-22-2)",
                "Pulegone (CAS: 89-82-7)",
                "Carvone (CAS: 99-48-7)",
                "Terpinen-4-ol (CAS: 562-74-3)",
                "Perillaldehyde (CAS: 2111-75-3)",
                "Thymol (CAS: 89-83-8)",
                "\u03b1-Terpineol (CAS: 98-55-5)",
                "\u03b2-Ocimene (CAS: 13877-91-3)",
                "Terpinolene (CAS: 586-62-9)",
                "Nerol (CAS: 106-25-2)",
                "Geranyl acetate (CAS: 105-87-3)",
                "Nerolidol (CAS: 7212-44-4)",
                "Isopulegol (CAS: 89-79-2)",
                "Eucalyptol (CAS: 470-82-6)",
                "Terpineol (CAS: 98-55-5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Camphor (CAS: 76-22-2)",
                "Carvone (CAS: 99-48-7)",
                "Citral (CAS: 5392-40-5)",
                "Citronellol (CAS: 106-22-9)",
                "Eucalyptol (CAS: 470-82-6)",
                "Geraniol (CAS: 106-24-1)",
                "Geranyl acetate (CAS: 105-87-3)",
                "Isopulegol (CAS: 89-79-2)",
                "Limonene (CAS: 5989-27-5)",
                "Linalool (CAS: 78-70-6)",
                "Menthol (CAS: 2216-51-5)",
                "Myrcene (CAS: 123-35-3)",
                "Nerol (CAS: 106-25-2)",
                "Nerolidol (CAS: 7212-44-4)",
                "Perillaldehyde (CAS: 2111-75-3)",
                "Pulegone (CAS: 89-82-7)",
                "Terpinen-4-ol (CAS: 562-74-3)",
                "Terpineol (CAS: 98-55-5)",
                "Terpinolene (CAS: 586-62-9)",
                "Thymol (CAS: 89-83-8)",
                "\u03b1-Pinene (CAS: 80-56-8)",
                "\u03b1-Terpineol (CAS: 98-55-5)",
                "\u03b2-Ocimene (CAS: 13877-91-3)",
                "\u03b2-Pinene (CAS: 127-91-3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (LIM)",
            "\u03b1-Pinene (PIN)",
            "\u03b2-Pinene (BPN)",
            "Myrcene (MYR)",
            "\u03b3-Terpinene (GTP)",
            "\u03b1-Terpinene (ATP)",
            "p-Cymene (CYM)",
            "Camphene (CMP)",
            "\u03b2-Phellandrene (BPH)",
            "\u03b1-Phellandrene (APH)",
            "Citronellal (CIT)",
            "Geraniol (GER)",
            "Nerol (NER)",
            "Citral (CITRAL)",
            "Linalool (LAL)",
            "Geranyl acetate (GERAC)",
            "Menthol (MEN)",
            "Isomenthol (ISM)",
            "Thujone (THJ)",
            "Camphor (CMPH)",
            "Eucalyptol (EUC)",
            "Perillyl alcohol (PER)",
            "Valencene (VAL)",
            "Bisabolene (BIS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (LIM)",
                "\u03b1-Pinene (PIN)",
                "\u03b2-Pinene (BPN)",
                "Myrcene (MYR)",
                "\u03b3-Terpinene (GTP)",
                "\u03b1-Terpinene (ATP)",
                "p-Cymene (CYM)",
                "Camphene (CMP)",
                "\u03b2-Phellandrene (BPH)",
                "\u03b1-Phellandrene (APH)",
                "Citronellal (CIT)",
                "Geraniol (GER)",
                "Nerol (NER)",
                "Citral (CITRAL)",
                "Linalool (LAL)",
                "Geranyl acetate (GERAC)",
                "Menthol (MEN)",
                "Isomenthol (ISM)",
                "Thujone (THJ)",
                "Camphor (CMPH)",
                "Eucalyptol (EUC)",
                "Perillyl alcohol (PER)",
                "Valencene (VAL)",
                "Bisabolene (BIS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bisabolene (BIS)",
                "Camphene (CMP)",
                "Camphor (CMPH)",
                "Citral (CITRAL)",
                "Citronellal (CIT)",
                "Eucalyptol (EUC)",
                "Geraniol (GER)",
                "Geranyl acetate (GERAC)",
                "Isomenthol (ISM)",
                "Limonene (LIM)",
                "Linalool (LAL)",
                "Menthol (MEN)",
                "Myrcene (MYR)",
                "Nerol (NER)",
                "Perillyl alcohol (PER)",
                "Thujone (THJ)",
                "Valencene (VAL)",
                "p-Cymene (CYM)",
                "\u03b1-Phellandrene (APH)",
                "\u03b1-Pinene (PIN)",
                "\u03b1-Terpinene (ATP)",
                "\u03b2-Phellandrene (BPH)",
                "\u03b2-Pinene (BPN)",
                "\u03b3-Terpinene (GTP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Camphene (C10H16)",
            "Terpinene (C10H16)",
            "Cymene (C10H14)",
            "Phellandrene (C10H16)",
            "Carene (C10H16)",
            "Menthol (C10H20O)",
            "Geraniol (C10H18O)",
            "Citral (C10H16O)",
            "Thymol (C10H14O)",
            "Borneol (C10H18O)",
            "Eucalyptol (C10H18O)",
            "Terpineol (C10H18O)",
            "Nerol (C10H18O)",
            "Linalool (C10H18O)",
            "Sabinene (C10H16)",
            "Ocimene (C10H16)",
            "Isopulegol (C10H18O)",
            "Carvone (C10H14O)",
            "Alpha-terpinene (C10H16)",
            "Beta-terpinene (C10H16)",
            "Gamma-terpinene (C10H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Camphene (C10H16)",
                "Terpinene (C10H16)",
                "Cymene (C10H14)",
                "Phellandrene (C10H16)",
                "Carene (C10H16)",
                "Menthol (C10H20O)",
                "Geraniol (C10H18O)",
                "Citral (C10H16O)",
                "Thymol (C10H14O)",
                "Borneol (C10H18O)",
                "Eucalyptol (C10H18O)",
                "Terpineol (C10H18O)",
                "Nerol (C10H18O)",
                "Linalool (C10H18O)",
                "Sabinene (C10H16)",
                "Ocimene (C10H16)",
                "Isopulegol (C10H18O)",
                "Carvone (C10H14O)",
                "Alpha-terpinene (C10H16)",
                "Beta-terpinene (C10H16)",
                "Gamma-terpinene (C10H16)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-terpinene (C10H16)",
                "Beta-terpinene (C10H16)",
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Carene (C10H16)",
                "Carvone (C10H14O)",
                "Citral (C10H16O)",
                "Cymene (C10H14)",
                "Eucalyptol (C10H18O)",
                "Gamma-terpinene (C10H16)",
                "Geraniol (C10H18O)",
                "Isopulegol (C10H18O)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Menthol (C10H20O)",
                "Myrcene (C10H16)",
                "Nerol (C10H18O)",
                "Ocimene (C10H16)",
                "Phellandrene (C10H16)",
                "Pinene (C10H16)",
                "Sabinene (C10H16)",
                "Terpinene (C10H16)",
                "Terpineol (C10H18O)",
                "Thymol (C10H14O)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Linalool (C10H18O)",
            "Cymene (C10H14)",
            "Geraniol (C10H18O)",
            "Terpineol (C10H18O)",
            "Camphor (C10H16O)",
            "Borneol (C10H18O)",
            "Thujone (C10H14O)",
            "Carvone (C10H14O)",
            "Fenchone (C10H14O)",
            "Nerol (C10H18O)",
            "Ocimene (C10H16)",
            "Sabinene (C10H16)",
            "Valencene (C15H24)",
            "Alpha-terpineol (C10H18O)",
            "Beta-caryophyllene (C15H24)",
            "Humulene (C15H24)",
            "Menthol (C10H20O)",
            "Isopulegol (C10H18O)",
            "Carene (C10H16)",
            "Alpha-pinene (C10H16)",
            "Beta-pinene (C10H16)",
            "Alpha-phellandrene (C10H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Linalool (C10H18O)",
                "Cymene (C10H14)",
                "Geraniol (C10H18O)",
                "Terpineol (C10H18O)",
                "Camphor (C10H16O)",
                "Borneol (C10H18O)",
                "Thujone (C10H14O)",
                "Carvone (C10H14O)",
                "Fenchone (C10H14O)",
                "Nerol (C10H18O)",
                "Ocimene (C10H16)",
                "Sabinene (C10H16)",
                "Valencene (C15H24)",
                "Alpha-terpineol (C10H18O)",
                "Beta-caryophyllene (C15H24)",
                "Humulene (C15H24)",
                "Isopulegol (C10H18O)",
                "Carene (C10H16)",
                "Alpha-pinene (C10H16)",
                "Beta-pinene (C10H16)"
            ],
            "mismatches": [
                "Menthol (C10H20O)",
                "Alpha-phellandrene (C10H16)"
            ],
            "true_referents": [
                "Alpha-pinene (C10H16)",
                "Alpha-terpineol (C10H18O)",
                "Beta-caryophyllene (C15H24)",
                "Beta-pinene (C10H16)",
                "Borneol (C10H18O)",
                "Camphor (C10H16O)",
                "Carene (C10H16)",
                "Carvone (C10H14O)",
                "Cymene (C10H14)",
                "Fenchone (C10H14O)",
                "Geraniol (C10H18O)",
                "Humulene (C15H24)",
                "Isopulegol (C10H18O)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Menthol (C10H20O)",
                "Myrcene (C10H16)",
                "Nerol (C10H18O)",
                "Ocimene (C10H16)",
                "Pinene (C10H16)",
                "Sabinene (C10H16)",
                "Terpineol (C10H18O)",
                "Thujone (C10H14O)",
                "Valencene (C15H24)"
            ],
            "TP": 23,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (LIM)",
            "Myrcene (MYR)",
            "Pinene (PIN)",
            "Camphene (CAM)",
            "Pulegone (PUL)",
            "Menthol (MEN)",
            "Geraniol (GER)",
            "Linalool (LIN)",
            "Terpinolene (TER)",
            "Cymene (CYM)",
            "Borneol (BOR)",
            "Carvone (CAR)",
            "Citral (CIT)",
            "Fenchone (FEN)",
            "Thujone (THU)",
            "Eucalyptol (EUC)",
            "Camphor (CAM)",
            "Nerol (NER)",
            "Terpineol (TERP)",
            "Citronellol (CITR)",
            "Sabinene (SAB)",
            "Cineole (CIN)",
            "Carene (CAR)",
            "Phellandrene (PHE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (LIM)",
                "Myrcene (MYR)",
                "Pinene (PIN)",
                "Camphene (CAM)",
                "Pulegone (PUL)",
                "Menthol (MEN)",
                "Geraniol (GER)",
                "Linalool (LIN)",
                "Terpinolene (TER)",
                "Cymene (CYM)",
                "Borneol (BOR)",
                "Carvone (CAR)",
                "Citral (CIT)",
                "Fenchone (FEN)",
                "Thujone (THU)",
                "Eucalyptol (EUC)",
                "Camphor (CAM)",
                "Nerol (NER)",
                "Terpineol (TERP)",
                "Citronellol (CITR)",
                "Sabinene (SAB)",
                "Cineole (CIN)",
                "Carene (CAR)",
                "Phellandrene (PHE)"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol (BOR)",
                "Camphene (CAM)",
                "Camphor (CAM)",
                "Carene (CAR)",
                "Carvone (CAR)",
                "Cineole (CIN)",
                "Citral (CIT)",
                "Citronellol (CITR)",
                "Cymene (CYM)",
                "Eucalyptol (EUC)",
                "Fenchone (FEN)",
                "Geraniol (GER)",
                "Limonene (LIM)",
                "Linalool (LIN)",
                "Menthol (MEN)",
                "Myrcene (MYR)",
                "Nerol (NER)",
                "Phellandrene (PHE)",
                "Pinene (PIN)",
                "Pulegone (PUL)",
                "Sabinene (SAB)",
                "Terpineol (TERP)",
                "Terpinolene (TER)",
                "Thujone (THU)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene",
            "Pinene",
            "Linalool",
            "Geraniol",
            "Myrcene",
            "Terpinene",
            "Camphene",
            "Citral",
            "Thymol",
            "Carvone",
            "Menthol",
            "Borneol",
            "Cineole",
            "Sabinene",
            "Fenchone",
            "Pulegone",
            "Eucalyptol",
            "Caryophyllene",
            "Humulene",
            "Geranyl acetate",
            "Nerol",
            "Citronellal",
            "Terpinolene",
            "Phellandrene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "Pinene",
                "Linalool",
                "Geraniol",
                "Myrcene",
                "Terpinene",
                "Camphene",
                "Citral",
                "Thymol",
                "Carvone",
                "Menthol",
                "Borneol",
                "Cineole",
                "Sabinene",
                "Fenchone",
                "Pulegone",
                "Eucalyptol",
                "Caryophyllene",
                "Humulene",
                "Geranyl acetate",
                "Nerol",
                "Citronellal",
                "Terpinolene",
                "Phellandrene"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol",
                "Camphene",
                "Carvone",
                "Caryophyllene",
                "Cineole",
                "Citral",
                "Citronellal",
                "Eucalyptol",
                "Fenchone",
                "Geraniol",
                "Geranyl acetate",
                "Humulene",
                "Limonene",
                "Linalool",
                "Menthol",
                "Myrcene",
                "Nerol",
                "Phellandrene",
                "Pinene",
                "Pulegone",
                "Sabinene",
                "Terpinene",
                "Terpinolene",
                "Thymol"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Alpha-Pinene (\u03b1-Pinene)",
            "Beta-Pinene (\u03b2-Pinene)",
            "Limonene",
            "Myrcene",
            "Linalool",
            "Terpinolene",
            "Camphene",
            "Sabinene",
            "Geraniol",
            "Eucalyptol (1,8-Cineole)",
            "Terpineol",
            "Pulegone",
            "Menthol",
            "Citral",
            "Thymol",
            "Carvone",
            "Bornyl Acetate",
            "Ocimene",
            "Valencene",
            "Nerol",
            "Pinocarveol",
            "Isopulegol",
            "Terpine-4-ol",
            "4-Carene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alpha-Pinene (\u03b1-Pinene)",
                "Beta-Pinene (\u03b2-Pinene)",
                "Limonene",
                "Myrcene",
                "Linalool",
                "Terpinolene",
                "Camphene",
                "Sabinene",
                "Geraniol",
                "Eucalyptol (1,8-Cineole)",
                "Terpineol",
                "Pulegone",
                "Menthol",
                "Citral",
                "Thymol",
                "Carvone",
                "Bornyl Acetate",
                "Ocimene",
                "Valencene",
                "Nerol",
                "Pinocarveol",
                "Isopulegol",
                "Terpine-4-ol",
                "4-Carene"
            ],
            "mismatches": [],
            "true_referents": [
                "4-Carene",
                "Alpha-Pinene (\u03b1-Pinene)",
                "Beta-Pinene (\u03b2-Pinene)",
                "Bornyl Acetate",
                "Camphene",
                "Carvone",
                "Citral",
                "Eucalyptol (1,8-Cineole)",
                "Geraniol",
                "Isopulegol",
                "Limonene",
                "Linalool",
                "Menthol",
                "Myrcene",
                "Nerol",
                "Ocimene",
                "Pinocarveol",
                "Pulegone",
                "Sabinene",
                "Terpine-4-ol",
                "Terpineol",
                "Terpinolene",
                "Thymol",
                "Valencene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "\u03b1-Pinene (PIN)",
            "\u03b2-Pinene",
            "Limonene (LIM)",
            "Myrcene",
            "Camphene",
            "Sabinene",
            "\u03943-Carene",
            "Ocimene",
            "Terpinolene",
            "p-Cymene",
            "\u03b1-Terpinene",
            "\u03b3-Terpinene",
            "\u03b1-Phellandrene",
            "\u03b2-Phellandrene",
            "Geraniol",
            "Linalool",
            "Citronellol",
            "Nerol",
            "Menthol",
            "Eucalyptol (1,8-Cineole)",
            "Borneol",
            "Camphor",
            "Thujone",
            "Citral"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "\u03b1-Pinene (PIN)",
                "\u03b2-Pinene",
                "Limonene (LIM)",
                "Myrcene",
                "Camphene",
                "Sabinene",
                "\u03943-Carene",
                "Ocimene",
                "Terpinolene",
                "p-Cymene",
                "\u03b1-Terpinene",
                "\u03b3-Terpinene",
                "\u03b1-Phellandrene",
                "\u03b2-Phellandrene",
                "Geraniol",
                "Linalool",
                "Citronellol",
                "Nerol",
                "Menthol",
                "Eucalyptol (1,8-Cineole)",
                "Borneol",
                "Camphor",
                "Thujone",
                "Citral"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol",
                "Camphene",
                "Camphor",
                "Citral",
                "Citronellol",
                "Eucalyptol (1,8-Cineole)",
                "Geraniol",
                "Limonene (LIM)",
                "Linalool",
                "Menthol",
                "Myrcene",
                "Nerol",
                "Ocimene",
                "Sabinene",
                "Terpinolene",
                "Thujone",
                "p-Cymene",
                "\u03943-Carene",
                "\u03b1-Phellandrene",
                "\u03b1-Pinene (PIN)",
                "\u03b1-Terpinene",
                "\u03b2-Phellandrene",
                "\u03b2-Pinene",
                "\u03b3-Terpinene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene",
            "\u03b1-Pinene",
            "\u03b2-Pinene",
            "Myrcene",
            "Camphene",
            "Sabinene",
            "3-Carene",
            "\u03b1-Terpinene",
            "\u03b3-Terpinene",
            "\u03b1-Phellandrene",
            "\u03b2-Phellandrene",
            "Terpinolene",
            "\u03b1-Terpineol",
            "Linalool",
            "Geraniol",
            "Citronellol",
            "Nerol",
            "Citronellal",
            "Perillaldehyde",
            "Menthol",
            "Borneol",
            "Thymol",
            "Carvacrol",
            "p-Cymene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene",
                "\u03b1-Pinene",
                "\u03b2-Pinene",
                "Myrcene",
                "Camphene",
                "Sabinene",
                "3-Carene",
                "\u03b1-Terpinene",
                "\u03b3-Terpinene",
                "\u03b1-Phellandrene",
                "\u03b2-Phellandrene",
                "Terpinolene",
                "\u03b1-Terpineol",
                "Linalool",
                "Geraniol",
                "Citronellol",
                "Nerol",
                "Citronellal",
                "Perillaldehyde",
                "Menthol",
                "Borneol",
                "Thymol",
                "Carvacrol",
                "p-Cymene"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Carene",
                "Borneol",
                "Camphene",
                "Carvacrol",
                "Citronellal",
                "Citronellol",
                "Geraniol",
                "Limonene",
                "Linalool",
                "Menthol",
                "Myrcene",
                "Nerol",
                "Perillaldehyde",
                "Sabinene",
                "Terpinolene",
                "Thymol",
                "p-Cymene",
                "\u03b1-Phellandrene",
                "\u03b1-Pinene",
                "\u03b1-Terpinene",
                "\u03b1-Terpineol",
                "\u03b2-Phellandrene",
                "\u03b2-Pinene",
                "\u03b3-Terpinene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (C10H16)",
            "\u03b1-Pinene (C10H16)",
            "\u03b2-Pinene (C10H16)",
            "Myrcene (C10H16)",
            "Camphene (C10H16)",
            "Sabinene (C10H16)",
            "Terpinolene (C10H16)",
            "\u03b3-Terpinene (C10H16)",
            "\u03b1-Terpinene (C10H16)",
            "Geraniol (C10H18O)",
            "Linalool (C10H18O)",
            "Citronellol (C10H20O)",
            "Menthol (C10H20O)",
            "Thymol (C10H14O)",
            "Carvacrol (C10H14O)",
            "Borneol (C10H18O)",
            "Camphor (C10H16O)",
            "Isoborneol (C10H18O)",
            "Fenchone (C10H16O)",
            "Pulegone (C10H16O)",
            "Perillyl alcohol (C10H16O)",
            "Eucalyptol (C10H18O)",
            "Carvone (C10H14O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (C10H16)",
                "\u03b1-Pinene (C10H16)",
                "\u03b2-Pinene (C10H16)",
                "Myrcene (C10H16)",
                "Camphene (C10H16)",
                "Sabinene (C10H16)",
                "Terpinolene (C10H16)",
                "\u03b3-Terpinene (C10H16)",
                "\u03b1-Terpinene (C10H16)",
                "Geraniol (C10H18O)",
                "Linalool (C10H18O)",
                "Citronellol (C10H20O)",
                "Menthol (C10H20O)",
                "Thymol (C10H14O)",
                "Carvacrol (C10H14O)",
                "Borneol (C10H18O)",
                "Camphor (C10H16O)",
                "Isoborneol (C10H18O)",
                "Fenchone (C10H16O)",
                "Pulegone (C10H16O)",
                "Perillyl alcohol (C10H16O)",
                "Eucalyptol (C10H18O)",
                "Carvone (C10H14O)"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol (C10H18O)",
                "Camphene (C10H16)",
                "Camphor (C10H16O)",
                "Carvacrol (C10H14O)",
                "Carvone (C10H14O)",
                "Citronellol (C10H20O)",
                "Eucalyptol (C10H18O)",
                "Fenchone (C10H16O)",
                "Geraniol (C10H18O)",
                "Isoborneol (C10H18O)",
                "Limonene (C10H16)",
                "Linalool (C10H18O)",
                "Menthol (C10H20O)",
                "Myrcene (C10H16)",
                "Perillyl alcohol (C10H16O)",
                "Pulegone (C10H16O)",
                "Sabinene (C10H16)",
                "Terpinolene (C10H16)",
                "Thymol (C10H14O)",
                "\u03b1-Pinene (C10H16)",
                "\u03b1-Terpinene (C10H16)",
                "\u03b2-Pinene (C10H16)",
                "\u03b3-Terpinene (C10H16)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Limonene (LIM)",
            "Myrcene (MYR)",
            "Pinene (PIN)",
            "Camphene (CAM)",
            "Geraniol (GER)",
            "Citronellol (CIT)",
            "Linalool (LIN)",
            "Menthol (MEN)",
            "Thymol (THY)",
            "Carvone (CAR)",
            "Eucalyptol (EUC)",
            "Borneol (BOR)",
            "Terpineol (TER)",
            "Nerol (NER)",
            "Citral (CIT)",
            "Sabinene (SAB)",
            "Terpinolene (TER)",
            "Fenchol (FEN)",
            "Perillyl alcohol (PER)",
            "Phellandrene (PHE)",
            "Ocimene (OCI)",
            "Terpinene (TER)",
            "Citronellal (CIT)",
            "Carveol (CAR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Limonene (LIM)",
                "Myrcene (MYR)",
                "Pinene (PIN)",
                "Camphene (CAM)",
                "Geraniol (GER)",
                "Citronellol (CIT)",
                "Linalool (LIN)",
                "Menthol (MEN)",
                "Thymol (THY)",
                "Carvone (CAR)",
                "Eucalyptol (EUC)",
                "Borneol (BOR)",
                "Terpineol (TER)",
                "Nerol (NER)",
                "Citral (CIT)",
                "Sabinene (SAB)",
                "Terpinolene (TER)",
                "Fenchol (FEN)",
                "Perillyl alcohol (PER)",
                "Phellandrene (PHE)",
                "Ocimene (OCI)",
                "Terpinene (TER)",
                "Citronellal (CIT)",
                "Carveol (CAR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Borneol (BOR)",
                "Camphene (CAM)",
                "Carveol (CAR)",
                "Carvone (CAR)",
                "Citral (CIT)",
                "Citronellal (CIT)",
                "Citronellol (CIT)",
                "Eucalyptol (EUC)",
                "Fenchol (FEN)",
                "Geraniol (GER)",
                "Limonene (LIM)",
                "Linalool (LIN)",
                "Menthol (MEN)",
                "Myrcene (MYR)",
                "Nerol (NER)",
                "Ocimene (OCI)",
                "Perillyl alcohol (PER)",
                "Phellandrene (PHE)",
                "Pinene (PIN)",
                "Sabinene (SAB)",
                "Terpinene (TER)",
                "Terpineol (TER)",
                "Terpinolene (TER)",
                "Thymol (THY)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Pinene (\u03b1-pinene)",
            "Limonene",
            "Camphene",
            "Bornyl acetate",
            "Bornyl acetate",
            "Bornyl acetate",
            "Borneol",
            "Camphor",
            "Terpinen-4-ol",
            "Sabinene",
            "Sabinene hydrate",
            "Carvone",
            "Carvacrol",
            "Thujone",
            "Thujone",
            "Cineole",
            "Eucalyptol",
            "Menthol",
            "Menthone",
            "Linalool",
            "Linalool oxide",
            "Perillyl alcohol",
            "Perillyl acetate",
            "Caryophyllene oxide",
            "Geraniol",
            "Geranial",
            "Nerolidol",
            "Viridiflorol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Pinene (\u03b1-pinene)",
                "Limonene",
                "Camphene",
                "Bornyl acetate",
                "Borneol",
                "Camphor",
                "Terpinen-4-ol",
                "Sabinene",
                "Sabinene hydrate",
                "Carvone",
                "Carvacrol",
                "Thujone",
                "Cineole",
                "Eucalyptol",
                "Menthol",
                "Menthone",
                "Linalool",
                "Linalool oxide",
                "Perillyl alcohol",
                "Perillyl acetate",
                "Caryophyllene oxide"
            ],
            "mismatches": [
                "Geraniol",
                "Geranial",
                "Nerolidol",
                "Viridiflorol"
            ],
            "true_referents": [
                "Borneol",
                "Bornyl acetate",
                "Camphene",
                "Camphor",
                "Carvacrol",
                "Carvone",
                "Caryophyllene oxide",
                "Cineole",
                "Eucalyptol",
                "Limonene",
                "Linalool",
                "Linalool oxide",
                "Menthol",
                "Menthone",
                "Perillyl acetate",
                "Perillyl alcohol",
                "Pinene (\u03b1-pinene)",
                "Sabinene",
                "Sabinene hydrate",
                "Terpinen-4-ol",
                "Thujone"
            ],
            "TP": 21,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            "Alpha-pinene (PIN)",
            "Beta-pinene (BIN)",
            "Limonene (LIM)",
            "Gamma-terpinene (GTP)",
            "Terpinolene (TPL)",
            "Linalool (LNL)",
            "Geraniol (GROL)",
            "Citronellol (CITL)",
            "Linalyl acetate (LNAC)",
            "Borneol (BOR)",
            "Camphor (CAM)",
            "Fenchol (FEN)",
            "Myrtenol (MYRT)",
            "Cineole (CIN)",
            "Menthone (MNT)",
            "Pulegone (PUL)",
            "Piperitone (PIP)",
            "Carvone (CAR)",
            "Dihydrocarvone (DHC)",
            "Perillyl alcohol (PER)",
            "Sabinene (SAB)",
            "Terpineol (TERP)",
            "Nerol (NER)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alpha-pinene (PIN)",
                "Beta-pinene (BIN)",
                "Limonene (LIM)",
                "Gamma-terpinene (GTP)",
                "Terpinolene (TPL)",
                "Linalool (LNL)",
                "Geraniol (GROL)",
                "Citronellol (CITL)",
                "Linalyl acetate (LNAC)",
                "Borneol (BOR)",
                "Camphor (CAM)",
                "Fenchol (FEN)",
                "Myrtenol (MYRT)",
                "Cineole (CIN)",
                "Menthone (MNT)",
                "Pulegone (PUL)",
                "Piperitone (PIP)",
                "Carvone (CAR)",
                "Dihydrocarvone (DHC)",
                "Perillyl alcohol (PER)",
                "Sabinene (SAB)",
                "Terpineol (TERP)",
                "Nerol (NER)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-pinene (PIN)",
                "Beta-pinene (BIN)",
                "Borneol (BOR)",
                "Camphor (CAM)",
                "Carvone (CAR)",
                "Cineole (CIN)",
                "Citronellol (CITL)",
                "Dihydrocarvone (DHC)",
                "Fenchol (FEN)",
                "Gamma-terpinene (GTP)",
                "Geraniol (GROL)",
                "Limonene (LIM)",
                "Linalool (LNL)",
                "Linalyl acetate (LNAC)",
                "Menthone (MNT)",
                "Myrtenol (MYRT)",
                "Nerol (NER)",
                "Perillyl alcohol (PER)",
                "Piperitone (PIP)",
                "Pulegone (PUL)",
                "Sabinene (SAB)",
                "Terpineol (TERP)",
                "Terpinolene (TPL)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "monoterpene",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "alpha-pinene",
                "Canonical name": "alpha-pinene"
            },
            {
                "Referent": "beta-pinene",
                "Canonical name": "beta-pinene"
            },
            {
                "Referent": "limonene",
                "Canonical name": "limonene"
            },
            {
                "Referent": "phellandrene",
                "Canonical name": "phellandrene"
            },
            {
                "Referent": "terpinolene",
                "Canonical name": "terpinolene"
            },
            {
                "Referent": "3-carene",
                "Canonical name": "3-carene"
            },
            {
                "Referent": "myrcene",
                "Canonical name": "myrcene"
            },
            {
                "Referent": "ocimene",
                "Canonical name": "ocimene"
            },
            {
                "Referent": "trans-pinocarveol",
                "Canonical name": "trans-pinocarveol"
            },
            {
                "Referent": "bornyl acetate",
                "Canonical name": "bornyl acetate"
            },
            {
                "Referent": "camphene",
                "Canonical name": "camphene"
            },
            {
                "Referent": "verbenone",
                "Canonical name": "verbenone"
            },
            {
                "Referent": "caryophyllene",
                "Canonical name": "caryophyllene"
            },
            {
                "Referent": "thujopsene",
                "Canonical name": "thujopsene"
            },
            {
                "Referent": "carene",
                "Canonical name": "carene"
            },
            {
                "Referent": "valencene",
                "Canonical name": "valencene"
            },
            {
                "Referent": "fenchone",
                "Canonical name": "fenchone"
            },
            {
                "Referent": "p-cymene",
                "Canonical name": "p-cymene"
            },
            {
                "Referent": "borneol",
                "Canonical name": "borneol"
            },
            {
                "Referent": "menthol",
                "Canonical name": "menthol"
            },
            {
                "Referent": "piperitone",
                "Canonical name": "piperitone"
            },
            {
                "Referent": "phellandrene oxide",
                "Canonical name": "phellandrene oxide"
            },
            {
                "Referent": "terpinene",
                "Canonical name": "terpinene"
            },
            {
                "Referent": "thymol",
                "Canonical name": "thymol"
            },
            {
                "Referent": "isopinocamphone",
                "Canonical name": "isopinocamphone"
            },
            {
                "Referent": "sabinene",
                "Canonical name": "sabinene"
            },
            {
                "Referent": "alloaromadendrene",
                "Canonical name": "alloaromadendrene"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "alpha-pinene",
                "beta-pinene",
                "limonene",
                "phellandrene",
                "terpinolene",
                "3-carene",
                "myrcene",
                "ocimene",
                "trans-pinocarveol",
                "bornyl acetate",
                "camphene",
                "verbenone",
                "caryophyllene",
                "thujopsene",
                "carene",
                "valencene",
                "fenchone",
                "p-cymene",
                "borneol",
                "menthol",
                "piperitone",
                "phellandrene oxide",
                "terpinene",
                "thymol"
            ],
            "mismatches": [
                "isopinocamphone",
                "sabinene",
                "alloaromadendrene"
            ],
            "true_referents": [
                "{\"Referent\": \"3-carene\", \"Canonical name\": \"3-carene\"}",
                "{\"Referent\": \"alpha-pinene\", \"Canonical name\": \"alpha-pinene\"}",
                "{\"Referent\": \"beta-pinene\", \"Canonical name\": \"beta-pinene\"}",
                "{\"Referent\": \"borneol\", \"Canonical name\": \"borneol\"}",
                "{\"Referent\": \"bornyl acetate\", \"Canonical name\": \"bornyl acetate\"}",
                "{\"Referent\": \"camphene\", \"Canonical name\": \"camphene\"}",
                "{\"Referent\": \"carene\", \"Canonical name\": \"carene\"}",
                "{\"Referent\": \"caryophyllene\", \"Canonical name\": \"caryophyllene\"}",
                "{\"Referent\": \"fenchone\", \"Canonical name\": \"fenchone\"}",
                "{\"Referent\": \"limonene\", \"Canonical name\": \"limonene\"}",
                "{\"Referent\": \"menthol\", \"Canonical name\": \"menthol\"}",
                "{\"Referent\": \"myrcene\", \"Canonical name\": \"myrcene\"}",
                "{\"Referent\": \"ocimene\", \"Canonical name\": \"ocimene\"}",
                "{\"Referent\": \"p-cymene\", \"Canonical name\": \"p-cymene\"}",
                "{\"Referent\": \"phellandrene oxide\", \"Canonical name\": \"phellandrene oxide\"}",
                "{\"Referent\": \"phellandrene\", \"Canonical name\": \"phellandrene\"}",
                "{\"Referent\": \"piperitone\", \"Canonical name\": \"piperitone\"}",
                "{\"Referent\": \"terpinene\", \"Canonical name\": \"terpinene\"}",
                "{\"Referent\": \"terpinolene\", \"Canonical name\": \"terpinolene\"}",
                "{\"Referent\": \"thujopsene\", \"Canonical name\": \"thujopsene\"}",
                "{\"Referent\": \"thymol\", \"Canonical name\": \"thymol\"}",
                "{\"Referent\": \"trans-pinocarveol\", \"Canonical name\": \"trans-pinocarveol\"}",
                "{\"Referent\": \"valencene\", \"Canonical name\": \"valencene\"}",
                "{\"Referent\": \"verbenone\", \"Canonical name\": \"verbenone\"}"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    }
]