[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "fructose (Fru)",
            "psicose",
            "tagatose",
            "allofructose",
            "sedoheptulose",
            "erythrulose",
            "alloerythrulose",
            "sebastiose",
            "lactalose",
            "dulcitol",
            "allose",
            "galactose (Gal)",
            "glucose (Glc)",
            "mannose (Man)",
            "talose",
            "ribulose",
            "xylulose",
            "sorbose",
            "lyxose",
            "ribose (Rib)",
            "arabinose (Ara)",
            "xylose (Xyl)",
            "lyxofructose",
            "fructofuranose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fructose (Fru)",
                "psicose",
                "tagatose",
                "sedoheptulose",
                "erythrulose",
                "ribulose",
                "xylulose",
                "sorbose",
                "fructofuranose"
            ],
            "mismatches": [
                "allofructose",
                "alloerythrulose",
                "sebastiose",
                "lactalose",
                "dulcitol",
                "allose",
                "galactose (Gal)",
                "glucose (Glc)",
                "mannose (Man)",
                "talose",
                "lyxose",
                "ribose (Rib)",
                "arabinose (Ara)",
                "xylose (Xyl)",
                "lyxofructose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "D-erythrulose",
                "D-fructopyranose",
                "D-fructose",
                "D-manno-heptulose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructofuranose",
                "L-fructopyranose",
                "L-fructose",
                "L-glycero-L-galacto-octuluronate",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-xylulose",
                "alpha-D-manno-heptulopyranose",
                "alpha-D-ribulose",
                "erythrulose",
                "fructofuranose",
                "fructopyranose",
                "fructose",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose derivative",
                "sorbofuranose",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Fructose (Fru)",
            "Sorbose (Sor)",
            "Tagatose (Tag)",
            "Xylulose (Xyl)",
            "Ribulose (Rib)",
            "Ribulofuranose (Ribf)",
            "Xylulofuranose (Xylf)",
            "Ribose (Ribose)",
            "Ribofuranose (Ribf)",
            "Arabinose (Ara)",
            "Arabinofuranose (Araf)",
            "Lyxose (Lyx)",
            "Lyxofuranose (Lyxf)",
            "Lyxopyranose (Lyxp)",
            "Lyxarabinose (LyxAra)",
            "Lyxopyranose (Lyxp)",
            "Psicose (Psi)",
            "Erythrulose (Ery)",
            "Erythrulose (Ery)",
            "Tagatofuranose (Tagf)",
            "Gulose (Gul)",
            "Idose (Ido)",
            "Gulofuranose (Gulf)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone",
                "Fructose",
                "Sorbose",
                "Tagatose",
                "Xylulose",
                "Ribulose",
                "Psicose",
                "Erythrulose"
            ],
            "mismatches": [
                "Ribulofuranose",
                "Xylulofuranose",
                "Ribose",
                "Ribofuranose",
                "Arabinose",
                "Arabinofuranose",
                "Lyxose",
                "Lyxofuranose",
                "Lyxopyranose",
                "Lyxarabinose",
                "Tagatofuranose",
                "Gulose",
                "Idose",
                "Gulofuranose"
            ],
            "true_referents": [
                "3-dehydro-D-guloside",
                "3-deoxyglucosone",
                "D-erythrulose",
                "D-fructose",
                "D-psicose",
                "D-ribulose 1-phosphate",
                "D-tagatofuranose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbofuranose",
                "L-sorbopyranose",
                "L-tagatose",
                "L-xylulose",
                "alpha-D-ribulose",
                "alpha-D-tagatofuranose",
                "alpha-L-fructofuranose",
                "beta-D-tagatofuranose",
                "beta-L-sorbofuranose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructofuranose",
                "fructose",
                "psicose",
                "ribulose",
                "ribulose 1-phosphate",
                "ribulose phosphate",
                "sorbose",
                "sorbose derivative",
                "tagatose",
                "xylulose"
            ],
            "TP": 8,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Erythrulose",
            "Fructose",
            "Fructose-1,6-bisphosphate",
            "Fructose-6-phosphate",
            "Fructose-1-phosphate",
            "Psicose",
            "Sorbose",
            "Tagatose",
            "Xylulose",
            "D-Xylulose 5-phosphate",
            "Ribulose",
            "Ribulose 5-phosphate",
            "Sedoheptulose 7-phosphate",
            "Sedoheptulose 1,7-bisphosphate",
            "Allulose",
            "Turanose",
            "Allose",
            "Altrose",
            "Gulose",
            "Idose",
            "Talose",
            "Lyxose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone",
                "Dihydroxyacetone phosphate",
                "Erythrulose",
                "Fructose",
                "Psicose",
                "Sorbose",
                "Tagatose",
                "Xylulose",
                "D-Xylulose 5-phosphate",
                "Ribulose",
                "Ribulose 5-phosphate",
                "Sedoheptulose 7-phosphate",
                "Sedoheptulose 1,7-bisphosphate"
            ],
            "mismatches": [
                "Fructose-1,6-bisphosphate",
                "Fructose-6-phosphate",
                "Fructose-1-phosphate",
                "Allulose",
                "Turanose",
                "Allose",
                "Altrose",
                "Gulose",
                "Idose",
                "Talose",
                "Lyxose"
            ],
            "true_referents": [
                "3-deoxyglucosone",
                "D-erythrulose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-sorbose",
                "D-tagatopyranose",
                "D-tagatose",
                "D-xylulose",
                "D-xylulose 5-phosphate",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-xylulose",
                "alpha-D-tagatopyranose",
                "alpha-chrysopine",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "beta-D-tagatopyranose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-fructose",
                "ketotetrose",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose 1,7-bisphosphate",
                "sedoheptulose 7-phosphate",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Ribulose (Rul)",
            "Xylulose (Xyl)",
            "Psicose (Psy)",
            "Fructose (Fru)",
            "Sorbose (Sor)",
            "Tagatose (Tag)",
            "Mannose (Man) - as ketoisomer",
            "Sedoheptulose (Sed)",
            "Sedoheptululose (S7U)",
            "D-Threose (Thr)",
            "D-Erythrulose (Ery)",
            "D-Arabulose (Ara)",
            "D-Lyxulose (Lyu)",
            "2-Deoxyribose-5-ketone (2D5K)",
            "3-Deoxy-D-manno-octulosonic acid (KDO)",
            "Fructopyranose (Fpy)",
            "Fuculose (Fuc)",
            "Rhamulose (Rha)",
            "Allulose (Alu)",
            "Galactulose (Gal)",
            "Talose (Tal) - as ketoisomer",
            "Gulose (Gul) - as ketoisomer",
            "Ascorbic acid (Asc) - as metabolic intermediate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone",
                "Ribulose",
                "Xylulose",
                "Psicose",
                "Fructose",
                "Sorbose",
                "Tagatose",
                "Sedoheptulose",
                "D-Erythrulose",
                "Fructopyranose"
            ],
            "mismatches": [
                "Mannose (Man) - as ketoisomer",
                "Sedoheptululose (S7U)",
                "D-Threose (Thr)",
                "D-Arabulose (Ara)",
                "D-Lyxulose (Lyu)",
                "2-Deoxyribose-5-ketone (2D5K)",
                "3-Deoxy-D-manno-octulosonic acid (KDO)",
                "Fuculose (Fuc)",
                "Rhamulose (Rha)",
                "Allulose (Alu)",
                "Galactulose (Gal)",
                "Talose (Tal) - as ketoisomer",
                "Gulose (Gul) - as ketoisomer",
                "Ascorbic acid (Asc) - as metabolic intermediate"
            ],
            "true_referents": [
                "(3S,4R)-Ketose 1-phosphate",
                "1-(2-carboxyphenylamino)-1-deoxy-D-ribulose 5-phosphate",
                "3-dehydro-D-guloside",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "D-erythrulose",
                "D-fructopyranose",
                "D-fructose",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-fuculose 1-phosphate",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-tagatose",
                "L-xylulose",
                "alpha-D-ribulose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructopyranose",
                "fructose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-sorbose",
                "keto-L-tagatose",
                "ketotriose derivative",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose derivative",
                "tagatose",
                "xylulose"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Ribulose",
            "Xylulose",
            "Psicose",
            "Fructose",
            "Sorbose",
            "Tagatose",
            "Erythrulose",
            "Sedheptulose",
            "Sedoheptulose",
            "Allulose",
            "D-Arabinulose",
            "L-Arabinulose",
            "D-Threulose",
            "L-Threulose",
            "D-Erythrulose",
            "L-Erythrulose",
            "D-Lyxulose",
            "L-Lyxulose",
            "D-Mannoketose",
            "L-Mannoketose",
            "D-Galactoketose",
            "L-Galactoketose",
            "D-Altrokestose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose",
                "Xylulose",
                "Psicose",
                "Fructose",
                "Sorbose",
                "Tagatose",
                "Erythrulose",
                "Sedoheptulose",
                "Dihydroxyacetone",
                "D-Erythrulose",
                "L-Erythrulose"
            ],
            "mismatches": [
                "Dihydroxyacetone (DHA)",
                "Sedheptulose",
                "Allulose",
                "D-Arabinulose",
                "L-Arabinulose",
                "D-Threulose",
                "L-Threulose",
                "D-Lyxulose",
                "L-Lyxulose",
                "D-Mannoketose",
                "L-Mannoketose",
                "D-Galactoketose",
                "L-Galactoketose",
                "D-Altrokestose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructofuranose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "alpha-D-ribulose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-fructose",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 1-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 11,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-fructose (D-Fru)",
            "D-psicose (D-Psi)",
            "D-sorbose (D-Sor)",
            "D-tagatose (D-Tag)",
            "D-ribulose (D-Rib)",
            "D-xylulose (D-Xyl)",
            "D-erythrulose (D-Ery)",
            "D-arabinulose (D-Arab)",
            "D-lyxose (D-Lyx)",
            "D-idoheptulose (D-Ido)",
            "D-2-keto-D-glucose (2-Keto-D-Glc)",
            "D-fructofuranose (D-FruF)",
            "D-fructopyranose (D-FruP)",
            "D-2-deoxy-D-ribulose (2-Deoxy-D-Rib)",
            "D-2-keto-D-galactose (2-Keto-D-Gal)",
            "D-2-keto-D-mannose (2-Keto-D-Man)",
            "D-2-keto-D-lyxose (2-Keto-D-Lyx)",
            "L-fructose (L-Fru)",
            "L-psicose (L-Psi)",
            "L-sorbose (L-Sor)",
            "D-3-keto-D-hexose (3-Keto-D-Hex)",
            "D-3-deoxy-D-arabino-hexose (3-Deoxy-D-Arab-Hex)",
            "D-3-keto-D-galactose (3-Keto-D-Gal)",
            "D-3-keto-D-mannose (3-Keto-D-Man)",
            "D-3-keto-D-lyxose (3-Keto-D-Lyx)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "D-ribulose",
                "D-xylulose",
                "D-erythrulose",
                "D-fructofuranose",
                "D-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "D-3-keto-D-galactose"
            ],
            "mismatches": [
                "D-arabinulose",
                "D-lyxose",
                "D-idoheptulose",
                "D-2-keto-D-glucose",
                "D-2-deoxy-D-ribulose",
                "D-2-keto-D-galactose",
                "D-2-keto-D-mannose",
                "D-2-keto-D-lyxose",
                "D-3-keto-D-hexose",
                "D-3-deoxy-D-arabino-hexose",
                "D-3-keto-D-mannose",
                "D-3-keto-D-lyxose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructopyranose 1-phosphate",
                "D-fructose",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-xylulose",
                "alpha-D-fructofuranose",
                "alpha-D-fructopyranose",
                "alpha-D-ribulose",
                "keto-D-fructose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "ketohexose",
                "psicose"
            ],
            "TP": 13,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Fructose (C6H12O6)",
            "Ribulose (C5H10O5)",
            "Xylulose (C5H10O5)",
            "Seduheptulose (C7H14O7)",
            "Erythrulose (C4H8O4)",
            "Psicose (C6H12O6)",
            "Sorbose (C6H12O6)",
            "Tagatose (C6H12O6)",
            "Ribose (C5H10O5)",
            "Arabinose (C5H10O5)",
            "Xylose (C5H10O5)",
            "Lyxose (C5H10O5)",
            "Allose (C6H12O6)",
            "Altrose (C6H12O6)",
            "Gulose (C6H12O6)",
            "Idose (C6H12O6)",
            "Galactose (C6H12O6)",
            "Talose (C6H12O6)",
            "Threose (C4H8O4)",
            "Erythrose (C4H8O4)",
            "Threofuranose (C4H8O4)",
            "Erythrofuranose (C4H8O4)",
            "Ketohexose (C6H12O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone",
                "Fructose",
                "Ribulose",
                "Xylulose",
                "Erythrulose",
                "Psicose",
                "Sorbose",
                "Ketohexose"
            ],
            "mismatches": [
                "Seduheptulose",
                "Tagatose",
                "Ribose",
                "Arabinose",
                "Xylose",
                "Lyxose",
                "Allose",
                "Altrose",
                "Gulose",
                "Idose",
                "Galactose",
                "Talose",
                "Threose",
                "Erythrose",
                "Threofuranose",
                "Erythrofuranose"
            ],
            "true_referents": [
                "(3S,4R)-Ketose 1-phosphate",
                "1,5-anhydro-D-erythro-hex-1-en-3-ulose",
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "6-deoxy-beta-L-fructofuranose",
                "D-erythrulose",
                "D-fructose",
                "D-psicose",
                "D-psicose 6-phosphate(2-)",
                "D-ribulose 5-phosphate",
                "D-sorbofuranose",
                "D-sorbose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-glycero-L-galacto-octuluronate",
                "L-psicose",
                "L-sorbofuranose",
                "L-sorbose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructose",
                "keto-L-tagatose",
                "ketoheptose",
                "ketohexose",
                "ketohexose derivative",
                "ketose derivative",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sorbose",
                "tagatose derivative",
                "xylulose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose",
            "Ribose",
            "Glyceraldehyde",
            "Dihydroxyacetone",
            "Xylulose",
            "Erythrulose",
            "Sedoheptulose",
            "Psicose",
            "Sorbose",
            "Tagatose",
            "Allose",
            "Altrose",
            "Galactose",
            "Glucose",
            "Mannose",
            "Talose",
            "Idose",
            "Arabinose",
            "Lyxose",
            "Xylose",
            "Rhamnose",
            "Fucose",
            "Deoxyribose",
            "Threose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose",
                "Dihydroxyacetone",
                "Xylulose",
                "Erythrulose",
                "Sedoheptulose",
                "Psicose",
                "Sorbose",
                "Tagatose"
            ],
            "mismatches": [
                "Ribose",
                "Glyceraldehyde",
                "Allose",
                "Altrose",
                "Galactose",
                "Glucose",
                "Mannose",
                "Talose",
                "Idose",
                "Arabinose",
                "Lyxose",
                "Xylose",
                "Rhamnose",
                "Fucose",
                "Deoxyribose",
                "Threose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-dehydropyranoside",
                "3-deoxyglucosone",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "6-deoxy-beta-L-fructofuranose",
                "D-erythrulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-rhamnulose 1-phosphate",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-xylulose",
                "alpha-chrysopine",
                "deoxyketohexose",
                "deoxyribulose phosphate",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructopyranose",
                "fructose",
                "keto-fructose",
                "ketotetrose",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose derivative",
                "sorbopyranose",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "D-Fructose",
            "L-Fructose",
            "Sorbose (Sor)",
            "D-Sorbose",
            "L-Sorbose",
            "Ribulose (Rub)",
            "D-Ribulose",
            "L-Ribulose",
            "Xylulose (Xyl)",
            "D-Xylulose",
            "L-Xylulose",
            "Erythrulose",
            "D-Erythrulose",
            "L-Erythrulose",
            "Sedoheptulose (S7P)",
            "D-Sedoheptulose",
            "L-Sedoheptulose",
            "Allulose (Psicose)",
            "D-Allulose",
            "L-Allulose",
            "Tagatose (Tag)",
            "D-Tagatose",
            "L-Tagatose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose",
                "L-Fructose",
                "D-Sorbose",
                "L-Sorbose",
                "D-Ribulose",
                "L-Ribulose",
                "D-Xylulose",
                "L-Xylulose",
                "Erythrulose",
                "D-Erythrulose",
                "L-Erythrulose",
                "Sedoheptulose",
                "D-Tagatose",
                "L-Tagatose",
                "D-Psicose",
                "L-Psicose"
            ],
            "mismatches": [
                "Fructose (Fru)",
                "Sorbose (Sor)",
                "Ribulose (Rub)",
                "Xylulose (Xyl)",
                "Sedoheptulose (S7P)",
                "D-Sedoheptulose",
                "L-Sedoheptulose",
                "Allulose (Psicose)",
                "D-Allulose",
                "L-Allulose",
                "Tagatose (Tag)"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "erythrulose",
                "fructose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "ribulose",
                "ribulose derivative",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose derivative",
                "tagatose",
                "xylulose"
            ],
            "TP": 16,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fruit sugar)",
            "Ribulose",
            "Psicose (D-Allulose)",
            "Tagatose",
            "Sorbose",
            "Xylulose",
            "Sedoheptulose",
            "Erythrulose",
            "Dihydroxyacetone (DHA)",
            "Allulose",
            "Turanose",
            "Maltulose",
            "Lactulose",
            "Glucosone",
            "Fructose-6-phosphate (F6P)",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylulose-5-phosphate (X5P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Tagatose-6-phosphate",
            "Psicose-6-phosphate",
            "Sorbose-1-phosphate",
            "Erythrulose-4-phosphate",
            "Dihydroxyacetone phosphate (DHAP)",
            "Fructose-1,6-bisphosphate (F1,6BP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose",
                "Psicose (D-Allulose)",
                "Tagatose",
                "Sorbose",
                "Xylulose",
                "Sedoheptulose",
                "Erythrulose",
                "Dihydroxyacetone (DHA)",
                "Fructose-6-phosphate (F6P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylulose-5-phosphate (X5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Tagatose-6-phosphate",
                "Psicose-6-phosphate",
                "Sorbose-1-phosphate",
                "Erythrulose-4-phosphate",
                "Dihydroxyacetone phosphate (DHAP)"
            ],
            "mismatches": [
                "Fructose (Fruit sugar)",
                "Allulose",
                "Turanose",
                "Maltulose",
                "Lactulose",
                "Glucosone",
                "Fructose-1,6-bisphosphate (F1,6BP)"
            ],
            "true_referents": [
                "3-dehydro-D-glucoside",
                "D-erythrulose",
                "D-erythrulose 4-phosphate",
                "D-fructose",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-tagatopyranose",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 4-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "L-xylulose",
                "alpha-D-tagatopyranose",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "beta-D-tagatopyranose",
                "beta-L-fructopyranose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "erythrulose 1-phosphate",
                "fructopyranose",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-L-tagatose 6-phosphate",
                "levoglucosenone",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose 1-phosphate",
                "tagatose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Erythrulose",
            "D-Fructose",
            "L-Fructose",
            "D-Psicose",
            "L-Psicose",
            "D-Sorbose",
            "L-Sorbose",
            "D-Tagatose",
            "L-Tagatose",
            "D-Ribulose",
            "L-Ribulose",
            "D-Xylulose",
            "L-Xylulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone",
                "Erythrulose",
                "D-Fructose",
                "L-Fructose",
                "D-Psicose",
                "L-Psicose",
                "D-Sorbose",
                "L-Sorbose",
                "D-Tagatose",
                "L-Tagatose",
                "D-Ribulose",
                "L-Ribulose",
                "D-Xylulose",
                "L-Xylulose"
            ],
            "mismatches": [],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-fructose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "ribulose",
                "sorbose",
                "tagatose"
            ],
            "TP": 14,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Psicose",
            "Sorbose",
            "Tagatose",
            "Sedoheptulose",
            "Ribulose",
            "Xylulose",
            "Erythrulose",
            "Galactulose",
            "Acedosamines",
            "Talose",
            "Fuculose",
            "Heptulose",
            "Allose",
            "Idose",
            "Gulose",
            "Altrose",
            "Mannose",
            "Glucose",
            "Arabinose",
            "Xylose",
            "Lyxose",
            "Ribose",
            "Deoxyribose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Psicose",
                "Sorbose",
                "Tagatose",
                "Sedoheptulose",
                "Ribulose",
                "Xylulose",
                "Erythrulose"
            ],
            "mismatches": [
                "Fructose (Fru)",
                "Galactulose",
                "Acedosamines",
                "Talose",
                "Fuculose",
                "Heptulose",
                "Allose",
                "Idose",
                "Gulose",
                "Altrose",
                "Mannose",
                "Glucose",
                "Arabinose",
                "Xylose",
                "Lyxose",
                "Ribose",
                "Deoxyribose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-deoxyglucosone",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-fructose",
                "D-manno-heptulose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-fuculose 1-phosphate",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-xylulose",
                "alpha-chrysopine",
                "deoxyribulose phosphate",
                "erythrulose",
                "fructopyranose",
                "fructose",
                "ketotetrose",
                "manno-heptulose",
                "psicosamine",
                "psicosamine 3-phosphate",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose derivative",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Glucose (Glc)",
            "Fructose (Fru)",
            "Ribulose (Rbu)",
            "Xylulose (Xul)",
            "Erythrulose (Ery)",
            "Dihydroxyacetone (DHA)",
            "Pyruvaldehyde (PVA)",
            "Sedoheptulose (Sed)",
            "Psicose (Psi)",
            "Sorbose (Sor)",
            "Tagatose (Tag)",
            "Deoxyribose (dRib)",
            "Erythrulose-5-phosphate (Ery5P)",
            "Ribulose-5-phosphate (Ru5P)",
            "Fructose-1,6-bisphosphate (F1,6BP)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrulose-4-phosphate (Ery4P)",
            "Xylulose-5-phosphate (X5P)",
            "Fructose-6-phosphate (F6P)",
            "Glucose-6-phosphate (G6P)",
            "Pyruvate (Pyr)",
            "Acetyldihydroxyacetone (AcDHA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Ribulose (Rbu)",
                "Xylulose (Xul)",
                "Erythrulose (Ery)",
                "Dihydroxyacetone (DHA)",
                "Sedoheptulose (Sed)",
                "Psicose (Psi)",
                "Sorbose (Sor)",
                "Tagatose (Tag)",
                "Ribulose-5-phosphate (Ru5P)",
                "Fructose-1,6-bisphosphate (F1,6BP)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Erythrulose-4-phosphate (Ery4P)",
                "Xylulose-5-phosphate (X5P)",
                "Fructose-6-phosphate (F6P)"
            ],
            "mismatches": [
                "Glucose (Glc)",
                "Pyruvaldehyde (PVA)",
                "Deoxyribose (dRib)",
                "Erythrulose-5-phosphate (Ery5P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Glucose-6-phosphate (G6P)",
                "Pyruvate (Pyr)",
                "Acetyldihydroxyacetone (AcDHA)"
            ],
            "true_referents": [
                "1-dodecylglycerone 3-phosphate",
                "1-hydroxy-3-propoxyacetone",
                "D-erythrulose",
                "D-erythrulose 4-phosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 1-phosphate",
                "L-erythrulose 4-phosphate",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-tagatose",
                "L-xylulose",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "deoxyribulose phosphate",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "erythrulose 1-phosphate",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "psicose",
                "ribulose",
                "ribulose 1-phosphate",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose derivative",
                "tagatose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Xylulose",
            "Ribulose",
            "Erythrulose",
            "Psicose",
            "Talose",
            "D-Tagatose",
            "D-Ribulose",
            "D-Xylulose",
            "D-Erythrulose",
            "D-Psicose",
            "D-Talose",
            "L-Ribulose",
            "L-Xylulose",
            "L-Erythrulose",
            "L-Psicose",
            "L-Talose",
            "D-Ribose",
            "D-Ribulose-5-phosphate",
            "Ribulose-5-phosphate",
            "Xylulose-5-phosphate",
            "Erythrulose-5-phosphate",
            "Psicose-5-phosphate",
            "Talose-5-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Xylulose",
                "Ribulose",
                "Erythrulose",
                "Psicose",
                "D-Tagatose",
                "D-Ribulose",
                "D-Xylulose",
                "D-Erythrulose",
                "D-Psicose",
                "L-Ribulose",
                "L-Xylulose",
                "L-Erythrulose",
                "L-Psicose",
                "Ribulose-5-phosphate",
                "Xylulose-5-phosphate"
            ],
            "mismatches": [
                "Ribose (Rib)",
                "Talose",
                "D-Talose",
                "L-Talose",
                "D-Ribose",
                "Erythrulose-5-phosphate",
                "Psicose-5-phosphate",
                "Talose-5-phosphate"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-ribulose 5-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 1-phosphate",
                "L-psicose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "erythrulose",
                "erythrulose 1-phosphate",
                "keto-D-tagatose",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sorbose",
                "tagatose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 15,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Sorbose",
            "Tagatose",
            "Psicose",
            "Fructosamine",
            "D-Fructose-1-phosphate",
            "L-Sorbose",
            "D-Tagatose",
            "D-Psicose",
            "1-Deoxy-fructose",
            "2-C-Hexose",
            "D-Fructose-6-phosphate",
            "Ketose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Sorbose",
                "Tagatose",
                "Psicose",
                "D-Fructose-1-phosphate",
                "L-Sorbose",
                "D-Tagatose",
                "D-Psicose",
                "D-Fructose-6-phosphate"
            ],
            "mismatches": [
                "Fructosamine",
                "1-Deoxy-fructose",
                "2-C-Hexose",
                "Ketose"
            ],
            "true_referents": [
                "5-dehydro-D-fructose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "fructopyranose",
                "fructosamine 3-phosphate",
                "fructose",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-tagatose",
                "ketohexose",
                "ketooctose",
                "ketose derivative",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 9,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "glucose (C6H12O6)",
                "Canonical name": "Glucose"
            },
            {
                "Referent": "fructose (C6H12O6)",
                "Canonical name": "Fructose"
            },
            {
                "Referent": "ribose (C5H10O5)",
                "Canonical name": "Ribose"
            },
            {
                "Referent": "arabinose (C5H10O5)",
                "Canonical name": "Arabinose"
            },
            {
                "Referent": "xylose (C5H10O5)",
                "Canonical name": "Xylose"
            },
            {
                "Referent": "galactose (C6H12O6)",
                "Canonical name": "Galactose"
            },
            {
                "Referent": "mannose (C6H12O6)",
                "Canonical name": "Mannose"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Glucose",
                "Fructose",
                "Ribose",
                "Arabinose",
                "Xylose",
                "Galactose",
                "Mannose"
            ],
            "true_referents": [
                "(3S,4R)-Ketose 1-phosphate",
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "D-fructose",
                "D-psicose 6-phosphate(2-)",
                "L-glycero-L-galacto-octuluronate",
                "N-(1-deoxy-1-fructosyl)phenylalanine",
                "deoxyribulose phosphate"
            ],
            "TP": 0,
            "FP": 7,
            "FN": 0
        }
    }
]