[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (C12H22O11)",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Trehalose (C12H24O11)",
            "Cellobiose (C12H22O11)",
            "Isomaltose (C12H22O11)",
            "Nigerose (C12H22O11)",
            "Palatinose (C12H24O11)",
            "Turanose (C12H22O11)",
            "Lactulose (C12H24O11)",
            "Melibiose (C12H22O11)",
            "Raffinose (C18H32O16)",
            "Verbascose (C30H58O26)",
            "Melezitose (C18H32O16)",
            "Sorbitol (C6H14O6)",
            "Xylobiose (C6H10O5)",
            "Gentiobiose (C6H10O5)",
            "Melibiulose (C12H22O11)",
            "Isomaltooligosaccharide (C12H22O11)",
            "Isomaltulose (C12H24O11)",
            "Panose (C9H14O6)",
            "Cellotriose (C6H10O5)",
            "Maltulose (C12H22O11)",
            "Kojibiose (C12H22O11)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (C12H22O11)",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Trehalose (C12H24O11)",
                "Cellobiose (C12H22O11)",
                "Isomaltose (C12H22O11)",
                "Nigerose (C12H22O11)",
                "Turanose (C12H22O11)",
                "Lactulose (C12H24O11)",
                "Melibiose (C12H22O11)",
                "Verbascose (C30H58O26)",
                "Xylobiose (C6H10O5)",
                "Gentiobiose (C6H10O5)",
                "Maltulose (C12H22O11)",
                "Kojibiose (C12H22O11)"
            ],
            "mismatches": [
                "Palatinose (C12H24O11)",
                "Raffinose (C18H32O16)",
                "Melezitose (C18H32O16)",
                "Sorbitol (C6H14O6)",
                "Melibiulose (C12H22O11)",
                "Isomaltooligosaccharide (C12H22O11)",
                "Isomaltulose (C12H24O11)",
                "Panose (C9H14O6)",
                "Cellotriose (C6H10O5)"
            ],
            "true_referents": [
                "1-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "1-O-acetyl-alpha-maltose",
                "1-O-acetyl-beta-maltose",
                "2-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "2-O-palmitoyl-alpha,alpha-trehalose",
                "2-phenylethyl beta-primeveroside",
                "3,6-di-O-methyl-beta-D-glucopyranosyl-(1->4)-alpha-L-rhamnopyranose",
                "6-O-(beta-D-xylopyranosyl)-beta-D-glucopyranoside",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "7-O-[alpha-L-rhamnosyl-(1->2)-beta-D-glucosyl]isoorientin",
                "D-glucopyranosyl-(1->4)-D-mannopyranose",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "D-maltobiono-1,5-lactone",
                "D-xylobiono-1,5-lactone",
                "alpha,beta-trehalose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "beta-D-apiofuranosyl-(1->6)-D-glucopyranose",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "glycosylglucose",
                "irilone-4'-O-[beta-D-glucopyranosyl-(1->6)-beta-D-glucopyranoside]",
                "isomaltose",
                "kojibiose",
                "lactose phosphate",
                "lactulose",
                "maltose",
                "maltulose",
                "melibiose",
                "nigerose",
                "sambubiose",
                "sesaminol 2-O-beta-D-gentiobioside",
                "sucrose",
                "trehalose",
                "turanose",
                "verbascose",
                "vicianose",
                "xylobiose"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (C12H22O11)",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Trehalose (C12H22O11)",
            "Cellobiose (C12H22O11)",
            "Chitobiose (C12H18N2O8)",
            "Lactulose (C12H22O11)",
            "Isomaltose (C12H22O11)",
            "Melibiose (C12H22O11)",
            "Turanose (C12H22O11)",
            "Levulose (C12H22O11)",
            "Nigerose (C12H22O11)",
            "Sophorose (C18H32O16)",
            "Raffinose (C18H32O16)",
            "Stachyose (C24H42O21)",
            "Verbascose (C30H52O25)",
            "Kojibiose (C12H22O11)",
            "Allulose (C12H22O11)",
            "Gentiobiose (C12H22O11)",
            "Melezitose (C18H32O16)",
            "Laminaribiose (C12H22O11)",
            "Nystose (C24H42O21)",
            "Fucosyllactose (C17H32O16)",
            "Lacto-N-tetraose (C24H44O21)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (C12H22O11)",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Trehalose (C12H22O11)",
                "Cellobiose (C12H22O11)",
                "Chitobiose (C12H18N2O8)",
                "Lactulose (C12H22O11)",
                "Isomaltose (C12H22O11)",
                "Melibiose (C12H22O11)",
                "Turanose (C12H22O11)",
                "Nigerose (C12H22O11)",
                "Sophorose (C18H32O16)",
                "Stachyose (C24H42O21)",
                "Verbascose (C30H52O25)",
                "Kojibiose (C12H22O11)",
                "Gentiobiose (C12H22O11)",
                "Laminaribiose (C12H22O11)"
            ],
            "mismatches": [
                "Levulose (C12H22O11)",
                "Raffinose (C18H32O16)",
                "Allulose (C12H22O11)",
                "Melezitose (C18H32O16)",
                "Nystose (C24H42O21)",
                "Fucosyllactose (C17H32O16)",
                "Lacto-N-tetraose (C24H44O21)"
            ],
            "true_referents": [
                "1-O-acetyl-alpha-maltose",
                "1-O-acetyl-beta-maltose",
                "2'-fucosyllactose",
                "2,2'-difucosyllactose",
                "2-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "2-phenylethyl beta-primeveroside",
                "3'-ketolactose",
                "3,6-di-O-methyl-beta-D-glucopyranosyl-(1->4)-alpha-L-rhamnopyranose",
                "3-fucosyllactose",
                "4-\\{[4-(dimethylamino)phenyl]diazenyl\\}phenyl-beta-lactoside",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "6-hydroxyluteolin 7-O-laminaribioside",
                "6-kestotriose",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "D-maltobiono-1,5-lactone",
                "N,N'-diacetylchitobiose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "beta-D-apiofuranosyl-(1->6)-D-glucopyranose",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "bis-beta-D-fructofuranose 1,2':2,3'-dianhydride",
                "cellobiose",
                "chitobioses",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "irilone-4'-O-[beta-D-glucopyranosyl-(1->6)-beta-D-glucopyranoside]",
                "isomaltose",
                "kojibiose",
                "lactose phosphate",
                "lactulose",
                "laminarabiose",
                "maltose",
                "maltulose",
                "melibiose",
                "methyl 13-sophorosyloxydocosanoate",
                "nigerose",
                "sambubiose",
                "sesaminol 2-O-beta-D-gentiobioside",
                "sophorose",
                "sophoroside",
                "stachyose",
                "sucrose",
                "trehalose",
                "turanose",
                "verbascose",
                "vicianose"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (table sugar)",
            "Lactose (milk sugar)",
            "Maltose (malt sugar)",
            "Cellobiose",
            "Trebulose",
            "Isomaltose",
            "Kojibiose",
            "Nigerose",
            "Sophorose",
            "Laminaribiose",
            "Gentiobiose",
            "Turanose",
            "Palatinose",
            "Isomaltulose",
            "Maltulose",
            "Leucrose",
            "Lactulose",
            "Melibiose",
            "Raffinose",
            "Stachyose",
            "Verbascose",
            "Acarviose",
            "Lyxinose",
            "Xylobiose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (table sugar)",
                "Lactose (milk sugar)",
                "Maltose (malt sugar)",
                "Cellobiose",
                "Isomaltose",
                "Kojibiose",
                "Nigerose",
                "Sophorose",
                "Laminaribiose",
                "Gentiobiose",
                "Turanose",
                "Maltulose",
                "Leucrose",
                "Lactulose",
                "Melibiose",
                "Stachyose",
                "Verbascose",
                "Xylobiose"
            ],
            "mismatches": [
                "Trebulose",
                "Palatinose",
                "Isomaltulose",
                "Raffinose",
                "Acarviose",
                "Lyxinose"
            ],
            "true_referents": [
                "(2S)-poncirin",
                "3-ketosucrose",
                "6-hydroxyluteolin 7-O-laminaribioside",
                "6-kestotriose",
                "Lyciumoside I",
                "acutoside A",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "beta-L-arabinofuranosyl-(1->2)-beta-L-arabinofuranose",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "galactobiose",
                "gentiobiose",
                "glycosylxylose",
                "inulobiose",
                "isomaltose",
                "kojibiose",
                "lactose",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "lyciumoside II",
                "maltose",
                "maltulose",
                "melibiose",
                "nigerose",
                "primeverose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "stachyose",
                "sucrose",
                "trehalose",
                "trehalulose",
                "turanose",
                "verbascose",
                "vicianose",
                "xylobiose"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Table Sugar)",
            "Lactose (Milk Sugar)",
            "Maltose (Malt Sugar)",
            "Cellobiose (Cellulose breakdown product)",
            "Isomaltose (Starch breakdown product)",
            "Trehalose (Insect hemolymph sugar)",
            "Turranose (Found in fruits)",
            "Lactulose (Synthetic disaccharide)",
            "Gentiobiose (Gentian root sugar)",
            "Rutinoside (Flavonoid glycoside)",
            "Sophorose (Glycosidase substrate)",
            "Maltotriose (Oligosaccharide related to Maltose)",
            "Panose (Starch degradation product)",
            "Kojibiose (Produced by Aspergillus)",
            "Nigerose (Produced by Aspergillus niger)",
            "Melezitose (Honey dew sugar)",
            "Raffinose (Beet sugar family)",
            "Melibiose (Galactose and Glucose)",
            "Palatinose (Isomer of Sucrose)",
            "Theobromine (Cacao plant disaccharide)",
            "Phlorizin (Apple glycoside)",
            "Hesperidin (Citrus flavonoid glycoside)",
            "Naringin (Grapefruit glycoside)",
            "Rutinoside (Buckwheat glycoside)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (Table Sugar)",
                "Lactose (Milk Sugar)",
                "Maltose (Malt Sugar)",
                "Cellobiose (Cellulose breakdown product)",
                "Isomaltose (Starch breakdown product)",
                "Trehalose (Insect hemolymph sugar)",
                "Lactulose (Synthetic disaccharide)",
                "Gentiobiose (Gentian root sugar)",
                "Sophorose (Glycosidase substrate)",
                "Kojibiose (Produced by Aspergillus)",
                "Nigerose (Produced by Aspergillus niger)",
                "Melibiose (Galactose and Glucose)",
                "Hesperidin (Citrus flavonoid glycoside)",
                "Naringin (Grapefruit glycoside)"
            ],
            "mismatches": [
                "Turranose (Found in fruits)",
                "Rutinoside (Flavonoid glycoside)",
                "Maltotriose (Oligosaccharide related to Maltose)",
                "Panose (Starch degradation product)",
                "Melezitose (Honey dew sugar)",
                "Raffinose (Beet sugar family)",
                "Palatinose (Isomer of Sucrose)",
                "Theobromine (Cacao plant disaccharide)",
                "Phlorizin (Apple glycoside)",
                "Rutinoside (Buckwheat glycoside)"
            ],
            "true_referents": [
                "1-O-acetyl-beta-maltose",
                "3'-dehydromaltose",
                "5,6,7,4'-tetrahydroxyflavonol-3-O-rutinoside",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "alpha,beta-trehalose",
                "alpha-cellobiose",
                "beta-D-fructofuranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "cyanidin 3-O-rutinoside",
                "cyanidin 3-O-rutinoside 5-O-beta-D-glucoside",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "flavonol 3-O-beta-D-glucosyl-(1->2)-beta-D-glucoside",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "glycosylpentose",
                "glycosylxylose",
                "hesperidin",
                "inulobiose",
                "isomaltose",
                "kaempferol-3-rutinoside",
                "kojibiose",
                "lactose",
                "lactulose",
                "maltose",
                "maltulose",
                "melibiose",
                "methyl hesperidin",
                "naringin",
                "nigerose",
                "pelargonidin 3-O-rutinoside",
                "pelargonidin 3-O-rutinoside 5-O-beta-D-glucoside",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "sucralose",
                "sucrose",
                "trehalose",
                "trehalose phosphate",
                "turanose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (C12H22O11)",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Trehalose (C12H22O11)",
            "Cellobiose (C12H22O11)",
            "Isomaltose (C12H22O11)",
            "Gentiobiose (C12H22O11)",
            "Melezitose (C18H32O16)",
            "Maltulose (C12H22O11)",
            "Palatinose (C12H22O11)",
            "Lactulose (C12H22O11)",
            "Melibiose (C12H22O11)",
            "Turbinose (C12H22O11)",
            "Sophirose (C12H22O11)",
            "Laminaribiose (C12H22O11)",
            "Leucrose (C12H22O11)",
            "Mannobiose (C12H22O11)",
            "Raffinose (C18H32O16)",
            "Neotrehalose (C12H22O11)",
            "Vicianose (C12H22O11)",
            "Kojibiose (C12H22O11)",
            "Turanose (C12H22O11)",
            "Nigerose (C12H22O11)",
            "Maltotriose (C18H32O16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (C12H22O11)",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Trehalose (C12H22O11)",
                "Cellobiose (C12H22O11)",
                "Isomaltose (C12H22O11)",
                "Gentiobiose (C12H22O11)",
                "Maltulose (C12H22O11)",
                "Lactulose (C12H22O11)",
                "Melibiose (C12H22O11)",
                "Laminaribiose (C12H22O11)",
                "Leucrose (C12H22O11)",
                "Mannobiose (C12H22O11)",
                "Vicianose (C12H22O11)",
                "Kojibiose (C12H22O11)",
                "Turanose (C12H22O11)",
                "Nigerose (C12H22O11)"
            ],
            "mismatches": [
                "Melezitose (C18H32O16)",
                "Palatinose (C12H22O11)",
                "Turbinose (C12H22O11)",
                "Sophirose (C12H22O11)",
                "Raffinose (C18H32O16)",
                "Neotrehalose (C12H22O11)",
                "Maltotriose (C18H32O16)"
            ],
            "true_referents": [
                "1,3,3'-tri-O-methyl-4alpha-mannobiose",
                "1-O-acetyl-alpha-maltose",
                "1-O-acetyl-beta-maltose",
                "2-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "2-O-palmitoyl-alpha,alpha-trehalose",
                "3,3'-neotrehalosadiamine",
                "3,6-di-O-methyl-beta-D-glucopyranosyl-(1->4)-alpha-L-rhamnopyranose",
                "3-ketosucrose",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "6-hydroxyluteolin 7-O-laminaribioside",
                "7-O-[alpha-L-rhamnosyl-(1->2)-beta-D-glucosyl]isoorientin",
                "7-O-methylvitexin 2''-O-alpha-L-rhamnoside",
                "7-O-methylvitexin 2''-O-beta-L-rhamnoside",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "D-maltobiono-1,5-lactone",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "beta-D-apiofuranosyl-(1->6)-D-glucopyranose",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "irilone-4'-O-[beta-D-glucopyranosyl-(1->6)-beta-D-glucopyranoside]",
                "isomaltose",
                "kojibiose",
                "lactose phosphate",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltulose",
                "mannobiose",
                "melibiose",
                "nigerose",
                "sambubiose",
                "sesaminol 2-O-beta-D-gentiobioside",
                "sophoroside",
                "sucrose",
                "trehalose",
                "turanose",
                "vicianose"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (C12H22O11)",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Trehalose (C12H22O11)",
            "Cellobiose (C12H22O11)",
            "Raffinose (C18H32O16)",
            "Melezitose (C18H32O16)",
            "Maltotriose (C18H36O18)",
            "Melezitose (C18H32O16)",
            "Mannobiose (C12H22O11)",
            "Gentiobiose (C12H22O11)",
            "Lactulose (C12H22O11)",
            "Sorbitol (C6H14O6)",
            "Isomaltose (C12H22O11)",
            "Manninotriose (C18H36O18)",
            "Maltodextrins (C6H10O5)n",
            "Nigerose (C12H22O11)",
            "Raffinose (C18H32O16)",
            "Melezitose (C18H32O16)",
            "Oligofructose (C6H10O5)n",
            "Lactosucrose (C18H32O16)",
            "Maltotetraose (C24H42O21)",
            "Mannobiose (C12H22O11)",
            "Maltotriulose (C18H36O18)",
            "Mannobiose (C12H22O11)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (C12H22O11)",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Trehalose (C12H22O11)",
                "Cellobiose (C12H22O11)",
                "Mannobiose (C12H22O11)",
                "Gentiobiose (C12H22O11)",
                "Lactulose (C12H22O11)",
                "Isomaltose (C12H22O11)",
                "Nigerose (C12H22O11)"
            ],
            "mismatches": [
                "Raffinose (C18H32O16)",
                "Melezitose (C18H32O16)",
                "Maltotriose (C18H36O18)",
                "Sorbitol (C6H14O6)",
                "Manninotriose (C18H36O18)",
                "Maltodextrins (C6H10O5)n",
                "Oligofructose (C6H10O5)n",
                "Lactosucrose (C18H32O16)",
                "Maltotetraose (C24H42O21)",
                "Maltotriulose (C18H36O18)"
            ],
            "true_referents": [
                "1,3,3'-tri-O-methyl-4alpha-mannobiose",
                "1,6-kestotetraose",
                "1-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "1-O-acetyl-alpha-maltose",
                "1-O-acetyl-beta-maltose",
                "2-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "3-ketosucrose",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "D-glucopyranosyl-(1->3)-D-mannopyranose",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "D-maltobiono-1,5-lactone",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "beta-D-apiofuranosyl-(1->6)-D-glucopyranose",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "bis-D-fructose 2',1:2,1'-dianhydride",
                "bis-beta-D-fructofuranose 1,2':2,3'-dianhydride",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "glycosylfructose",
                "isomaltose",
                "lactose phosphate",
                "lactulose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "nigerose",
                "sucrose",
                "trehalose"
            ],
            "TP": 10,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Trehalose (Tre)",
            "Cellobiose (Cel)",
            "Gentiobiose (Gen)",
            "Melibiose (Mel)",
            "Isomaltose (Iso)",
            "Nigerose (Nig)",
            "Maltulose (Mtu)",
            "Turanose (Tur)",
            "Palatinose (Pal)",
            "Lactulose (Ltu)",
            "Melizitose (Mzt)",
            "Raffinose (Raf)",
            "Stachyose (Sta)",
            "Verbascose (Ver)",
            "Maltotriose (Mtr)",
            "Mannobiose (Mnb)",
            "Sophorose (Sop)",
            "Laminaribiose (Lam)",
            "Glycosyl trehalose (Gly)",
            "Galactobiose (Gal)",
            "Kojibiose (Koj)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (Suc)",
                "Lactose (Lac)",
                "Maltose (Mal)",
                "Trehalose (Tre)",
                "Cellobiose (Cel)",
                "Gentiobiose (Gen)",
                "Melibiose (Mel)",
                "Isomaltose (Iso)",
                "Nigerose (Nig)",
                "Maltulose (Mtu)",
                "Turanose (Tur)",
                "Lactulose (Ltu)",
                "Stachyose (Sta)",
                "Verbascose (Ver)",
                "Mannobiose (Mnb)",
                "Sophorose (Sop)",
                "Galactobiose (Gal)",
                "Kojibiose (Koj)"
            ],
            "mismatches": [
                "Palatinose (Pal)",
                "Melizitose (Mzt)",
                "Raffinose (Raf)",
                "Maltotriose (Mtr)",
                "Laminaribiose (Lam)",
                "Glycosyl trehalose (Gly)"
            ],
            "true_referents": [
                "(2S)-poncirin",
                "1,6-kestotetraose",
                "1-O-acetyl-beta-maltose",
                "6-kestotriose",
                "alpha,beta-trehalose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "beta,beta-trehalose",
                "beta-(1->6)-galactobiose",
                "beta-D-Galf-(1->5)-D-Galf",
                "beta-D-Galp-(1->3)-L-Araf",
                "beta-L-arabinofuranosyl-(1->2)-beta-L-arabinofuranose",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "galactobiose",
                "gentiobiose",
                "glycosylxylose",
                "inulobiose",
                "isomaltose",
                "kojibiose",
                "lactose",
                "lactose 6'-phosphate",
                "lactulose",
                "laminarabiose",
                "maltose",
                "maltose 1-phosphate",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "melibionic acid",
                "melibiose",
                "nigerose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "stachyose",
                "streptobiosamine",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "turanose",
                "verbascose",
                "vicianose"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose",
            "Lactose",
            "Maltose",
            "Trehalose",
            "Cellobiose",
            "Isomaltose",
            "Gentianose",
            "Turanose",
            "Laminaribiose",
            "Sophorose",
            "Nigerose",
            "Kojibiose",
            "Maltulose",
            "Gentiobiose",
            "Divergose",
            "Isodulcite",
            "Galactosylsucrose",
            "Lactulose",
            "Lactosucrose",
            "Cellobiitol",
            "Laminaribioitol",
            "Maltitol",
            "Isomaltitol",
            "Trehalitol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose",
                "Lactose",
                "Maltose",
                "Trehalose",
                "Cellobiose",
                "Isomaltose",
                "Turanose",
                "Sophorose",
                "Nigerose",
                "Kojibiose",
                "Maltulose",
                "Gentiobiose",
                "Lactulose"
            ],
            "mismatches": [
                "Gentianose",
                "Laminaribiose",
                "Divergose",
                "Isodulcite",
                "Galactosylsucrose",
                "Lactosucrose",
                "Cellobiitol",
                "Laminaribioitol",
                "Maltitol",
                "Isomaltitol",
                "Trehalitol"
            ],
            "true_referents": [
                "3-ketosucrose",
                "6-hydroxyluteolin 7-O-laminaribioside",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "beta-D-galactosyl-(1->4)-beta-D-glucoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "galactobiose",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "glycosylgalactose",
                "glycosylxylose",
                "inulobiose",
                "isobemisiose",
                "isomaltose",
                "kojibiose",
                "lactose",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltulose",
                "melibiose",
                "nigerose",
                "rutinose",
                "sambubiose",
                "scroside D",
                "sesaminol 2-O-beta-D-gentiobioside",
                "sophorose",
                "sophoroside",
                "sucrose",
                "trehalosamine",
                "trehalose",
                "trehalose phosphate",
                "trehalulose",
                "turanose"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Cellobiose",
            "Trehalose",
            "Isomaltose",
            "Gentiobiose",
            "Melibiose",
            "Xylobiose",
            "Arabinobiose",
            "Lactulose",
            "Maltulose",
            "Panose",
            "Kojibiose",
            "Laminaribiose",
            "Mannobiose",
            "Galactobiose",
            "Turansose",
            "Vinitol",
            "Allobiose",
            "Neoketobiose",
            "Petroselobiose",
            "Passobiose",
            "Solanesose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (Suc)",
                "Lactose (Lac)",
                "Maltose (Mal)",
                "Cellobiose",
                "Trehalose",
                "Isomaltose",
                "Gentiobiose",
                "Melibiose",
                "Xylobiose",
                "Lactulose",
                "Maltulose",
                "Kojibiose",
                "Mannobiose",
                "Galactobiose"
            ],
            "mismatches": [
                "Arabinobiose",
                "Panose",
                "Turansose",
                "Vinitol",
                "Allobiose",
                "Neoketobiose",
                "Petroselobiose",
                "Passobiose",
                "Solanesose"
            ],
            "true_referents": [
                "6-hydroxyluteolin 7-O-laminaribioside",
                "ajugose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "beta-(1->6)-galactobiose",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "galactobiose",
                "gentiobiose",
                "glycosylxylose",
                "inulobiose",
                "isobemisiose",
                "isomaltose",
                "kojibiose",
                "lactose",
                "lactulose",
                "laminarabiose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "melibiose",
                "nigerose",
                "primeverose",
                "rebaudioside D",
                "rebaudioside E",
                "robinobiose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "trehalulose",
                "turanose",
                "xylobiose"
            ],
            "TP": 14,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (\u03b1-D-glucopyranosyl-(1\u21922)-\u03b2-D-fructofuranoside)",
            "Lactose (\u03b2-D-galactopyranosyl-(1\u21924)-D-glucose)",
            "Maltose (\u03b1-D-glucopyranosyl-(1\u21924)-D-glucose)",
            "Trehalose (\u03b1-D-glucopyranosyl-(1\u21921)-\u03b1-D-glucopyranoside)",
            "Cellobiose (\u03b2-D-glucopyranosyl-(1\u21924)-D-glucose)",
            "Isomaltose (\u03b1-D-glucopyranosyl-(1\u21926)-D-glucose)",
            "Gentiobiose (\u03b2-D-glucopyranosyl-(1\u21926)-D-glucose)",
            "Lactulose (\u03b2-D-galactopyranosyl-(1\u21924)-D-fructose)",
            "Melibiose (\u03b1-D-galactopyranosyl-(1\u21926)-D-glucose)",
            "Turanose (\u03b1-D-glucopyranosyl-(1\u21923)-D-fructose)",
            "Kojibiose (\u03b1-D-glucopyranosyl-(1\u21922)-D-glucose)",
            "Nigerose (\u03b1-D-glucopyranosyl-(1\u21923)-D-glucose)",
            "Sophorose (\u03b2-D-glucopyranosyl-(1\u21922)-D-glucose)",
            "Laminaribiose (\u03b2-D-glucopyranosyl-(1\u21923)-D-glucose)",
            "Xylobiose (\u03b2-D-xylopyranosyl-(1\u21924)-D-xylose)",
            "Mannobiose (\u03b2-D-mannopyranosyl-(1\u21924)-D-mannose)",
            "Palatinose (\u03b1-D-glucopyranosyl-(1\u21926)-D-fructose)",
            "Leucrose (\u03b1-D-glucopyranosyl-(1\u21925)-D-fructose)",
            "Maltulose (\u03b1-D-glucopyranosyl-(1\u21924)-D-fructose)",
            "Epilactose (\u03b2-D-galactopyranosyl-(1\u21924)-D-mannose)",
            "Allolactose (\u03b2-D-galactopyranosyl-(1\u21926)-D-glucose)",
            "Rutinose (\u03b1-L-rhamnopyranosyl-(1\u21926)-D-glucose)",
            "Primeverose (\u03b2-D-xylopyranosyl-(1\u21926)-D-glucose)",
            "Vicianose (\u03b2-D-arabinopyranosyl-(1\u21926)-D-glucose)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Maltose (\u03b1-D-glucopyranosyl-(1\u21924)-D-glucose)",
                "Trehalose (\u03b1-D-glucopyranosyl-(1\u21921)-\u03b1-D-glucopyranoside)",
                "Cellobiose (\u03b2-D-glucopyranosyl-(1\u21924)-D-glucose)",
                "Isomaltose (\u03b1-D-glucopyranosyl-(1\u21926)-D-glucose)",
                "Gentiobiose (\u03b2-D-glucopyranosyl-(1\u21926)-D-glucose)",
                "Lactulose (\u03b2-D-galactopyranosyl-(1\u21924)-D-fructose)",
                "Turanose (\u03b1-D-glucopyranosyl-(1\u21923)-D-fructose)",
                "Kojibiose (\u03b1-D-glucopyranosyl-(1\u21922)-D-glucose)",
                "Nigerose (\u03b1-D-glucopyranosyl-(1\u21923)-D-glucose)",
                "Sophorose (\u03b2-D-glucopyranosyl-(1\u21922)-D-glucose)",
                "Laminaribiose (\u03b2-D-glucopyranosyl-(1\u21923)-D-glucose)",
                "Xylobiose (\u03b2-D-xylopyranosyl-(1\u21924)-D-xylose)",
                "Mannobiose (\u03b2-D-mannopyranosyl-(1\u21924)-D-mannose)",
                "Leucrose (\u03b1-D-glucopyranosyl-(1\u21925)-D-fructose)",
                "Maltulose (\u03b1-D-glucopyranosyl-(1\u21924)-D-fructose)",
                "Rutinose (\u03b1-L-rhamnopyranosyl-(1\u21926)-D-glucose)",
                "Primeverose (\u03b2-D-xylopyranosyl-(1\u21926)-D-glucose)",
                "Vicianose (\u03b2-D-arabinopyranosyl-(1\u21926)-D-glucose)"
            ],
            "mismatches": [
                "Sucrose (\u03b1-D-glucopyranosyl-(1\u21922)-\u03b2-D-fructofuranoside)",
                "Lactose (\u03b2-D-galactopyranosyl-(1\u21924)-D-glucose)",
                "Melibiose (\u03b1-D-galactopyranosyl-(1\u21926)-D-glucose)",
                "Palatinose (\u03b1-D-glucopyranosyl-(1\u21926)-D-fructose)",
                "Epilactose (\u03b2-D-galactopyranosyl-(1\u21924)-D-mannose)",
                "Allolactose (\u03b2-D-galactopyranosyl-(1\u21926)-D-glucose)"
            ],
            "true_referents": [
                "3,6-di-O-methyl-beta-D-glucopyranosyl-(1->4)-alpha-L-rhamnopyranose",
                "6-O-(beta-D-xylopyranosyl)-beta-D-glucopyranoside",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-alpha-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "D-glucopyranosyl-(1->3)-D-mannopyranose",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "allolactose",
                "alpha,beta-trehalose",
                "alpha-D-glucopyranosyl alpha-D-galactopyranoside",
                "alpha-L-rhamnopyranosyl-(1->2)-alpha-D-galactopyranose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-allolactose",
                "alpha-cellobiose",
                "alpha-maltose",
                "benzyl beta-primeveroside",
                "beta,beta-trehalose",
                "beta-D-Gal-(1->4)-beta-D-Xyl-OBn",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-D-galactosyl-(1->3)-alpha-D-mannose",
                "beta-D-galactosyl-(1->4)-L-rhamnose",
                "beta-D-galactosyl-(1->4)-beta-D-glucoside",
                "beta-D-glucosyl-(1->4)-D-mannopyranose",
                "beta-allolactose",
                "beta-cellobiose",
                "beta-epilactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "isomaltose",
                "kojibiose",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltulose",
                "mannobiose",
                "nigerose",
                "primeverose",
                "rutinose",
                "sophorose",
                "sophoroside",
                "trehalose",
                "turanose",
                "vicianose",
                "xylobiose"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Trehalose (Tre)",
            "Cellobiose (Cel)",
            "Gentiobiose (Gen)",
            "Isomaltose (Iso)",
            "Kojibiose (Koj)",
            "Laminaribiose (Lam)",
            "Maltulose (Mlt)",
            "Mannobiose (Man)",
            "Melibiose (Mel)",
            "Nigerose (Nig)",
            "Palatinose (Pal)",
            "Primeverose (Pri)",
            "Rutinose (Rut)",
            "Sophorose (Sop)",
            "Turanose (Tur)",
            "Xylobiose (Xyl)",
            "Chitobiose (Chi)",
            "Galactinol (Gal)",
            "Lactulose (Lct)",
            "Leucrose (Leu)",
            "Vicianose (Vic)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose",
                "Lactose",
                "Maltose",
                "Trehalose",
                "Cellobiose",
                "Gentiobiose",
                "Isomaltose",
                "Kojibiose",
                "Laminarabiose",
                "Maltulose",
                "Mannobiose",
                "Melibiose",
                "Nigerose",
                "Primeverose",
                "Rutinose",
                "Sophorose",
                "Turanose",
                "Xylobiose",
                "Lactulose",
                "Vicianose"
            ],
            "mismatches": [
                "Palatinose",
                "Chitobiose",
                "Galactinol",
                "Leucrose"
            ],
            "true_referents": [
                "(R)-vicianin",
                "1-O-acetyl-beta-maltose",
                "2-phenylethyl beta-primeveroside",
                "3-O-beta-D-galactopyranosyl-D-arabinose",
                "3-ketosucrose",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "D-maltobiono-1,5-lactone",
                "Galactinol dihydrate",
                "N,N'-diacetylchitobiose",
                "alpha,beta-trehalose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "benzyl beta-primeveroside",
                "beta-D-Gal-(1->4)-beta-D-Xyl-OBn",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "chitobioses",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "formononetin 7-O-rutinoside",
                "galactobiose",
                "gentiobiose",
                "inulobiose",
                "isomaltose",
                "isorhamnetin-3-O-rutinoside",
                "kojibiose",
                "lactose",
                "lactose 6'-phosphate",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "melibionic acid",
                "melibiose",
                "nigerose",
                "pinen-10-yl vicianoside",
                "primeverose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "turanose",
                "vicianose",
                "xylobiose"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Trehalose (Tre)",
            "Cellobiose (Cel)",
            "Gentiobiose (Gen)",
            "Isomaltose (Iso)",
            "Melibiose (Mel)",
            "Palatinose (Pal)",
            "Turanose (Tur)",
            "Maltulose (Mlu)",
            "Kojibiose (Koj)",
            "Nigerose (Nig)",
            "Laminaribiose (Lam)",
            "Sophorose (Sop)",
            "Rutinose (Rut)",
            "Mannobiose (Man)",
            "Xylobiose (Xyl)",
            "Lactulose (Ltu)",
            "Isomaltulose (Iml)",
            "Gentiobiulose (Glu)",
            "Melibiitol (Mli)",
            "Galactosucroses (Gal)",
            "Leucrose (Leu)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (Suc)",
                "Lactose (Lac)",
                "Maltose (Mal)",
                "Trehalose (Tre)",
                "Cellobiose (Cel)",
                "Gentiobiose (Gen)",
                "Isomaltose (Iso)",
                "Melibiose (Mel)",
                "Turanose (Tur)",
                "Maltulose (Mlu)",
                "Kojibiose (Koj)",
                "Nigerose (Nig)",
                "Sophorose (Sop)",
                "Rutinose (Rut)",
                "Mannobiose (Man)",
                "Xylobiose (Xyl)",
                "Lactulose (Ltu)",
                "Leucrose (Leu)"
            ],
            "mismatches": [
                "Palatinose (Pal)",
                "Laminaribiose (Lam)",
                "Isomaltulose (Iml)",
                "Gentiobiulose (Glu)",
                "Melibiitol (Mli)",
                "Galactosucroses (Gal)"
            ],
            "true_referents": [
                "1-O-acetyl-beta-maltose",
                "3-ketosucrose",
                "alpha,beta-trehalose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "beta-(1->3)-galactobiose",
                "beta-(1->6)-galactobiose",
                "beta-D-Gal-(1->4)-beta-D-Xyl-OBn",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "formononetin 7-O-rutinoside",
                "galactobiose",
                "gentiobiose",
                "gentiobiose octaacetate",
                "glycosylxylose",
                "inulobiose",
                "isomaltose",
                "isorhamnetin-3-O-rutinoside",
                "kojibiose",
                "lactose",
                "lactose 6'-phosphate",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "melibionic acid",
                "melibiose",
                "nigerose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "turanose",
                "xylobiose"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Trehalose (Tre)",
            "Cellobiose (Cel)",
            "Gentiobiose (Gen)",
            "Melibiose (Mel)",
            "Turanose (Tur)",
            "Nigerose (Nig)",
            "Isomaltose (Iso)",
            "Kojibiose (Koj)",
            "Laminaribiose (Lam)",
            "Sophorose (Sop)",
            "Primeverose (Pri)",
            "Xylobiose (Xyl)",
            "Laminaribinose (Lbi)",
            "Mannobiose (Man)",
            "Leucrose (Leu)",
            "Rutinose (Rut)",
            "Palatinose (Pal)",
            "Isomaltulose (Iml)",
            "Lactulose (Lct)",
            "Maltulose (Mlt)",
            "Lactosucrose (Lcs)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (Suc)",
                "Lactose (Lac)",
                "Maltose (Mal)",
                "Trehalose (Tre)",
                "Cellobiose (Cel)",
                "Gentiobiose (Gen)",
                "Melibiose (Mel)",
                "Turanose (Tur)",
                "Nigerose (Nig)",
                "Isomaltose (Iso)",
                "Kojibiose (Koj)",
                "Sophorose (Sop)",
                "Primeverose (Pri)",
                "Xylobiose (Xyl)",
                "Mannobiose (Man)",
                "Leucrose (Leu)",
                "Rutinose (Rut)",
                "Lactulose (Lct)",
                "Maltulose (Mlt)"
            ],
            "mismatches": [
                "Laminaribiose (Lam)",
                "Laminaribinose (Lbi)",
                "Palatinose (Pal)",
                "Isomaltulose (Iml)",
                "Lactosucrose (Lcs)"
            ],
            "true_referents": [
                "1-O-acetyl-beta-maltose",
                "2-phenylethyl beta-primeveroside",
                "3-ketosucrose",
                "6-hydroxyluteolin 7-O-laminaribioside",
                "D-maltobiono-1,5-lactone",
                "alpha,beta-trehalose",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "alpha-mannobiose",
                "benzyl beta-primeveroside",
                "beta-D-Gal-(1->4)-beta-D-Xyl-OBn",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-mannobiose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "formononetin 7-O-rutinoside",
                "galactobiose",
                "gentiobiose",
                "inulobiose",
                "isomaltose",
                "isorhamnetin-3-O-rutinoside",
                "kojibiose",
                "lactose",
                "lactose 6'-phosphate",
                "lactulose",
                "laminarabiose",
                "leucrose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "mannobiose",
                "melibionic acid",
                "melibiose",
                "nigerose",
                "primeverose",
                "rutinose",
                "sambubiose",
                "sophorose",
                "sophoroside",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "turanose",
                "xylobiose"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Sucrose (Suc)",
            "Trehalose (Tre)",
            "Cellobiose (Cel)",
            "Isomaltose (Iso)",
            "Turanose (Tur)",
            "Melibiose (Mel)",
            "Lactulose (Lacu)",
            "Maltulose (Maltu)",
            "Palatinose (Pal)",
            "Isomaltulose (IsoM)",
            "Nigerose (Nig)",
            "Raffinose (Raf)",
            "Stachyose (Sta)",
            "Verbascose (Ver)",
            "Gentiobiose (Gen)",
            "N-Acetylglucosamine (GlcNAc)",
            "N-Acetylgalactosamine (GalNAc)",
            "Lacto-N-biose (LNB)",
            "Lacto-N-neotetraose (LNnT)",
            "Glycogen (Gly)",
            "Chitin (Chi)",
            "Hyaluronic acid (HA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lactose (Lac)",
                "Maltose (Mal)",
                "Sucrose (Suc)",
                "Trehalose (Tre)",
                "Cellobiose (Cel)",
                "Isomaltose (Iso)",
                "Turanose (Tur)",
                "Melibiose (Mel)",
                "Lactulose (Lacu)",
                "Maltulose (Maltu)",
                "Nigerose (Nig)",
                "Stachyose (Sta)",
                "Verbascose (Ver)",
                "Gentiobiose (Gen)"
            ],
            "mismatches": [
                "Palatinose (Pal)",
                "Isomaltulose (IsoM)",
                "Raffinose (Raf)",
                "N-Acetylglucosamine (GlcNAc)",
                "N-Acetylgalactosamine (GalNAc)",
                "Lacto-N-biose (LNB)",
                "Lacto-N-neotetraose (LNnT)",
                "Glycogen (Gly)",
                "Chitin (Chi)",
                "Hyaluronic acid (HA)"
            ],
            "true_referents": [
                "(2S)-poncirin",
                "1,6-kestotetraose",
                "1-O-acetyl-beta-maltose",
                "3-(4-deoxy-beta-D-gluc-4-enosyluronic acid)-N-acetyl-D-glucosamine",
                "4-nitrophenyl beta-lactoside",
                "6,6-kestotetraose",
                "6-kestotriose",
                "N-acetyl-beta-D-glucosaminyl-(1->4)-D-glucosamine",
                "N-acetyl-beta-D-glucosaminyl-(1->4)-alpha-D-glucosamine",
                "N-acetyllactosamine",
                "alpha,beta-trehalose",
                "alpha-D-galactosyl-N-acetyllactosamine",
                "alpha-L-rhamnopyranosyl-(1->2)-beta-D-glucopyranose",
                "alpha-cellobiose",
                "alpha-lactose",
                "beta-D-Gal-(1->3)-alpha-D-GalNAc",
                "beta-D-Galf-(1->5)-D-Galf",
                "beta-D-Galp-(1->3)-L-Araf",
                "beta-D-glucosaminyl-(1->4)-D-glucosamine",
                "beta-D-glucosaminyl-(1->4)-beta-D-glucosamine",
                "beta-L-arabinofuranosyl-(1->2)-beta-L-arabinofuranose",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "chitobioses",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "galactobiose",
                "gentiobiose",
                "glycosylfructose",
                "glycosylglucose",
                "glycosylxylose",
                "inulobiose",
                "isomaltose",
                "lactobionic acid",
                "lactose",
                "lactose 6'-phosphate",
                "lactulose",
                "laminarabiose",
                "maltose",
                "maltose phosphate",
                "maltulose",
                "melibionic acid",
                "melibiose",
                "nigerose",
                "rutinose",
                "stachyose",
                "streptobiosamine",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalose phosphate",
                "turanose",
                "verbascose",
                "vicianose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            "Sucrose (C12H22O11)",
            "Lactose (C12H22O11)",
            "Maltose (C12H22O11)",
            "Trehalose (C12H22O11)",
            "Isomaltose (C12H22O11)",
            "Gentiobiose (C14H26O12)",
            "Cellobiose (C12H22O11)",
            "Kojibiose (C12H22O11)",
            "Nigeroza (C12H22O11)",
            "Melibiose (C13H24O12)",
            "Sophorose (C13H24O12)",
            "Raffinose (C18H32O16)",
            " Umbelliferose (C18H32O16)",
            "Planteose (C20H36O18)",
            "Stachyose (C24H42O23)",
            "Verbascose (C26H44O26)",
            "Globoside (C26H44O26)",
            "Fucosyllactose (C19H34O16)",
            "Lactulose (C12H22O11)",
            "Maltitol (C12H24O11)",
            "Isomaltulose (C12H24O11)",
            "Trehalulose (C12H24O11)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sucrose (C12H22O11)",
                "Lactose (C12H22O11)",
                "Maltose (C12H22O11)",
                "Trehalose (C12H22O11)",
                "Isomaltose (C12H22O11)",
                "Gentiobiose (C14H26O12)",
                "Cellobiose (C12H22O11)",
                "Kojibiose (C12H22O11)",
                "Melibiose (C13H24O12)",
                "Sophorose (C13H24O12)",
                "Stachyose (C24H42O23)",
                "Verbascose (C26H44O26)",
                "Lactulose (C12H22O11)",
                "Trehalulose (C12H24O11)"
            ],
            "mismatches": [
                "Nigeroza (C12H22O11)",
                "Raffinose (C18H32O16)",
                "Umbelliferose (C18H32O16)",
                "Planteose (C20H36O18)",
                "Globoside (C26H44O26)",
                "Fucosyllactose (C19H34O16)",
                "Maltitol (C12H24O11)",
                "Isomaltulose (C12H24O11)"
            ],
            "true_referents": [
                "1-O-acetyl-alpha-maltose",
                "1-O-acetyl-beta-maltose",
                "2'-fucosyllactose",
                "2,2'-difucosyllactose",
                "2-O-(4-O-beta-D-galactopyranosyl-beta-D-glucopyranosyl)-glycerol",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "3-fucosyllactose",
                "6-O-alpha-D-glucopyranosyl-D-fructofuranose",
                "6-O-alpha-D-glucopyranosyl-beta-D-fructofuranose",
                "6-phospho-beta-D-glucosyl-(1->4)-D-glucose",
                "7-O-[alpha-L-rhamnosyl-(1->2)-beta-D-glucosyl]isoorientin",
                "7-O-methylvitexin 2''-O-alpha-L-rhamnoside",
                "7-O-methylvitexin 2''-O-beta-L-rhamnoside",
                "D-glucopyranosyl-(1->4)-aldehydo-D-mannose",
                "alpha-cellobiose",
                "alpha-lactose",
                "beta-D-apiofuranosyl-(1->6)-D-glucopyranose",
                "beta-D-fructofuranosyl 6-O-octanoyl-alpha-D-glucopyranoside",
                "beta-cellobiose",
                "beta-lactose",
                "beta-maltose",
                "beta-melibiose",
                "cellobiose",
                "cyclobis-(1->6)-alpha-nigerosyl",
                "genistin 7-O-gentiobioside",
                "gentiobiose",
                "gentiobiose octaacetate",
                "glochierioside A",
                "glochierioside B",
                "glycosyl glycoside",
                "irilone-4'-O-[beta-D-glucopyranosyl-(1->6)-beta-D-glucopyranoside]",
                "isomaltose",
                "kojibiose",
                "lactose phosphate",
                "lactulose",
                "maltose",
                "maltulose",
                "melibiose",
                "nigerose",
                "sambubiose",
                "sesaminol 2-O-beta-D-gentiobioside",
                "sophorose",
                "sophoroside",
                "stachyose",
                "sucrose",
                "sucrose 6(G)-phosphate",
                "trehalose",
                "trehalulose",
                "verbascose"
            ],
            "TP": 14,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "disaccharide",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "sucrose (C12H22O11)",
                "Canonical name": "sucrose"
            },
            {
                "Referent": "lactose (C12H22O11)",
                "Canonical name": "lactose"
            },
            {
                "Referent": "maltose (C12H22O11)",
                "Canonical name": "maltose"
            },
            {
                "Referent": "cellobiose (C12H22O11)",
                "Canonical name": "cellobiose"
            },
            {
                "Referent": "trehalose (C12H22O11)",
                "Canonical name": "trehalose"
            },
            {
                "Referent": "melezitose (C12H22O11)",
                "Canonical name": "melezitose"
            },
            {
                "Referent": "isomaltose (C12H22O11)",
                "Canonical name": "isomaltose"
            },
            {
                "Referent": "nystose (C12H22O11)",
                "Canonical name": "nystose"
            },
            {
                "Referent": "psicose (C12H22O11)",
                "Canonical name": "psicose"
            },
            {
                "Referent": "gentobiose (C12H22O11)",
                "Canonical name": "gentobiose"
            },
            {
                "Referent": "galactobiose (C12H22O11)",
                "Canonical name": "galactobiose"
            },
            {
                "Referent": "altrose (C12H22O11)",
                "Canonical name": "altrose"
            },
            {
                "Referent": "idose (C12H22O11)",
                "Canonical name": "idose"
            },
            {
                "Referent": "tagatose (C12H22O11)",
                "Canonical name": "tagatose"
            },
            {
                "Referent": "gentiobiose (C12H22O11)",
                "Canonical name": "gentiobiose"
            },
            {
                "Referent": "allose (C12H22O11)",
                "Canonical name": "allose"
            },
            {
                "Referent": "gulose (C12H22O11)",
                "Canonical name": "gulose"
            },
            {
                "Referent": "aldohexose (C12H22O11)",
                "Canonical name": "aldohexose"
            },
            {
                "Referent": "galactofuranose (C12H22O11)",
                "Canonical name": "galactofuranose"
            },
            {
                "Referent": "glucose (C6H12O6)",
                "Canonical name": "glucose"
            },
            {
                "Referent": "fructose (C6H12O6)",
                "Canonical name": "fructose"
            },
            {
                "Referent": "ribose (C5H10O5)",
                "Canonical name": "ribose"
            },
            {
                "Referent": "xylose (C5H10O5)",
                "Canonical name": "xylose"
            },
            {
                "Referent": "arabinose (C5H10O5)",
                "Canonical name": "arabinose"
            },
            {
                "Referent": "lyxose (C5H10O5)",
                "Canonical name": "lyxose"
            },
            {
                "Referent": "erythrose (C4H8O4)",
                "Canonical name": "erythrose"
            },
            {
                "Referent": "threose (C4H8O4)",
                "Canonical name": "threose"
            },
            {
                "Referent": "erythrulose (C4H8O4)",
                "Canonical name": "erythrulose"
            },
            {
                "Referent": "ribulose (C5H10O5)",
                "Canonical name": "ribulose"
            },
            {
                "Referent": "xylulose (C5H10O5)",
                "Canonical name": "xylulose"
            },
            {
                "Referent": "arabinitol (C5H10O5)",
                "Canonical name": "arabinitol"
            },
            {
                "Referent": "lyxulose (C5H10O5)",
                "Canonical name": "lyxulose"
            },
            {
                "Referent": "ribonate (C5H10O5)",
                "Canonical name": "ribonate"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "sucrose",
                "cellobiose",
                "trehalose",
                "isomaltose"
            ],
            "mismatches": [
                "lactose",
                "maltose",
                "melezitose",
                "nystose",
                "psicose",
                "gentobiose",
                "galactobiose",
                "altrose",
                "idose",
                "tagatose",
                "gentiobiose",
                "allose",
                "gulose",
                "aldohexose",
                "galactofuranose",
                "glucose",
                "fructose",
                "ribose",
                "xylose",
                "arabinose",
                "lyxose",
                "erythrose",
                "threose",
                "erythrulose",
                "ribulose",
                "xylulose",
                "arabinitol",
                "lyxulose",
                "ribonate"
            ],
            "true_referents": [
                "1,3,3'-tri-O-methyl-4alpha-mannobiose",
                "1-O-acetyl-beta-maltose",
                "2-(beta-D-glucopyranosyluronic acid)-D-glucuronic acid",
                "2-O-(long-chain fatty acyl)-alpha,alpha-trehalose",
                "2-O-[6-O-octanoyl-alpha-D-glucosyl-(1->6)-alpha-D-glucosyl]-D-glyceric acid",
                "2-O-[alpha-D-glucosyl-(1->6)-alpha-D-glucosyl]-D-glyceric acid",
                "3-O-beta-D-galactopyranosyl-D-arabinose",
                "6-phospho-beta-D-glucosyl-(1->4)-D-glucose",
                "7-O-[alpha-L-rhamnosyl-(1->2)-beta-D-glucosyl]chrysoeriol",
                "D-galactosyl-(1->4)-beta-D-glucose",
                "D-ribitol-(5-P-2)-alpha-D-Galp-(1->3)-alpha-D-Glcp",
                "alpha-D-GlcpA-(1->6)-alpha-D-GlcpO[CH2]5NH2",
                "beta-D-GlcA3S-(1->3)-beta-D-Gal-OC6H4-4-[CH2]2NHC(O)[CH2]3SH",
                "beta-D-galactosyl-(1->4)-beta-D-glucoside",
                "beta-L-arabinofuranosyl-(1->2)-beta-L-arabinofuranose",
                "bis-D-fructose 2',1:2,1'-dianhydride",
                "cellobiose",
                "glycosylglucose",
                "isomaltose",
                "methyl-8-\\{[beta-D-galactosyl-(1->4)-N-acetyl-beta-D-glucosaminyl]oxy\\}nonanoate",
                "sucrose",
                "trehalose"
            ],
            "TP": 4,
            "FP": 29,
            "FN": 0
        }
    }
]