[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-fructose (Fru)",
            "D-psicose (Psi)",
            "D-tagatose (Tag)",
            "D-sorbose (Sor)",
            "D-arabinose (Ara)",
            "D-ribose (Rib)",
            "D-xylose (Xyl)",
            "L-fructose",
            "L-psicose",
            "L-sorbose",
            "L-arabinose",
            "L-ribose",
            "L-xylose",
            "D-lyxose (Lyx)",
            "D-ribulose (Ribu)",
            "D-xylulose (Xylu)",
            "D-ribulose-5-phosphate (R5P)",
            "D-xylulose-5-phosphate (X5P)",
            "D-fructose-6-phosphate (F6P)",
            "D-fructose-1,6-bisphosphate (FBP)",
            "D-ribulose-1,5-bisphosphate (RuBP)",
            "D-xylulose-3-phosphate (X3P)",
            "D-sorbose-6-phosphate",
            "D-psicose-3-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-psicose",
                "D-tagatose",
                "D-sorbose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "D-ribulose",
                "D-xylulose",
                "D-ribulose-5-phosphate",
                "D-xylulose-5-phosphate",
                "D-fructose-6-phosphate",
                "D-fructose-1,6-bisphosphate",
                "D-ribulose-1,5-bisphosphate"
            ],
            "mismatches": [
                "D-arabinose",
                "D-ribose",
                "D-xylose",
                "L-arabinose",
                "L-ribose",
                "L-xylose",
                "D-lyxose",
                "D-xylulose-3-phosphate",
                "D-sorbose-6-phosphate",
                "D-psicose-3-phosphate"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "5-dehydro-D-fructose",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-ribulose",
                "D-ribulose 1,5-bisphosphate",
                "D-ribulose 1-phosphate",
                "D-ribulose 5-phosphate",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-xylulose",
                "D-xylulose 5-phosphate",
                "L-fructose",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "alpha-D-ribulose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-tagatose",
                "keto-L-fructose",
                "psicose",
                "xylulose phosphate"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Ribulose (Rib)",
            "Ribulose 5-phosphate",
            "Ribose 5-phosphate",
            "Xylulose (Xyl)",
            "Lyxose",
            "Sorbose",
            "Psicose",
            "Fructose 1-phosphate",
            "Fructose 6-phosphate",
            "Fructose 1,6-bisphosphate",
            "Ribulose 1,5-bisphosphate",
            "Ribulose 5-phosphate 3-epimerase",
            "Xylulose 5-phosphate",
            "Fructokinase",
            "Fructose-bisphosphate aldolase",
            "Phosphofructokinase",
            "Phosphoketolase",
            "Ketohexokinase",
            "Hexokinase",
            "Fructose-1,6-bisphosphatase",
            "Fructose-1,6-diphosphatase",
            "Sedoheptulose 7-phosphate",
            "Sedoheptulose 1,7-bisphosphatase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose 5-phosphate",
                "Xylulose 5-phosphate",
                "Sorbose",
                "Psicose",
                "Ribulose 1,5-bisphosphate",
                "Sedoheptulose 7-phosphate"
            ],
            "mismatches": [
                "Fructose (Fru)",
                "Ribulose (Rib)",
                "Ribose 5-phosphate",
                "Xylulose (Xyl)",
                "Lyxose",
                "Fructose 1-phosphate",
                "Fructose 6-phosphate",
                "Fructose 1,6-bisphosphate",
                "Ribulose 5-phosphate 3-epimerase",
                "Fructokinase",
                "Fructose-bisphosphate aldolase",
                "Phosphofructokinase",
                "Phosphoketolase",
                "Ketohexokinase",
                "Hexokinase",
                "Fructose-1,6-bisphosphatase",
                "Fructose-1,6-diphosphatase",
                "Sedoheptulose 1,7-bisphosphatase"
            ],
            "true_referents": [
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-ribulose 1,5-bisphosphate",
                "D-sorbose",
                "D-xylulose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-xylulose",
                "beta-D-fructofuranose",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "deoxyketohexose",
                "deoxyketohexose phosphate",
                "fructofuranose",
                "fructopyranose",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-tagatose 1,6-bisphosphate",
                "ketoheptose",
                "ketohexose",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose 1,7-bisphosphate",
                "sedoheptulose 7-phosphate",
                "sorbose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "D-Psicose (Psi)",
            "D-Sorbose (Sor)",
            "L-Sorbose",
            "D-Tagatose (Tag)",
            "D-Xylulose",
            "L-Xylulose",
            "D-Ribulose",
            "D-Erythrulose",
            "L-Erythrulose",
            "D-Glyceraldehyde",
            "L-Glyceraldehyde",
            "D-Sedoheptulose",
            "D-Mannoheptulose",
            "D-Altroheptulose",
            "D-Glucosamine",
            "D-Galactosamine",
            "D-Fructosamine",
            "D-Sedoheptulosamine",
            "D-Mannoheptulosamine",
            "D-Altroheptulosamine",
            "D-Fructose-1-phosphate",
            "D-Fructose-6-phosphate",
            "D-Fructose-1,6-bisphosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Psicose (Psi)",
                "D-Sorbose (Sor)",
                "L-Sorbose",
                "D-Tagatose (Tag)",
                "D-Xylulose",
                "L-Xylulose",
                "D-Ribulose",
                "D-Erythrulose",
                "L-Erythrulose",
                "D-Mannoheptulose",
                "D-Fructose-1-phosphate",
                "D-Fructose-6-phosphate",
                "D-Fructose-1,6-bisphosphate"
            ],
            "mismatches": [
                "D-Fructose (Fru)",
                "D-Glyceraldehyde",
                "L-Glyceraldehyde",
                "D-Sedoheptulose",
                "D-Altroheptulose",
                "D-Glucosamine",
                "D-Galactosamine",
                "D-Fructosamine",
                "D-Sedoheptulosamine",
                "D-Mannoheptulosamine",
                "D-Altroheptulosamine"
            ],
            "true_referents": [
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbopyranose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-sorbopyranose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "beta-D-sedoheptulopyranose",
                "erythrulose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-tagatose",
                "manno-heptulose",
                "psicose",
                "sedoheptulose",
                "sedoheptulose derivative"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "D-Ribulose (Rib)",
            "D-Xylulose (Xyl)",
            "D-Psicose (Psy)",
            "D-Sorbose (Sor)",
            "D-Tagatose (Tag)",
            "D-Altrose (Alt)",
            "D-Galactulose (Gal)",
            "D-Mannulose (Man)",
            "2-Deoxy-D-ribose (dRib)",
            "Sedoheptulose (Sed)",
            "D-Fuculose (Fuc)",
            "D-Rhamnose (Rha)",
            "Ascorbic Acid (Asc)",
            "Trehalose (Tre)",
            "Sucrose (Suc)",
            "Lactose (Lac)",
            "Maltose (Mal)",
            "Cellobiose (Cel)",
            "Ketopimose",
            "Tartaric acid (Tar)",
            "Dihydroxyacetone (DHA)",
            "Erythrulose (Ery)",
            "Ketose-6-phosphate (K6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-ribulose",
                "D-xylulose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "sedoheptulose",
                "dihydroxyacetone",
                "erythrulose"
            ],
            "mismatches": [
                "D-altrose",
                "D-galactulose",
                "D-mannulose",
                "2-deoxy-D-ribose",
                "D-fuculose",
                "D-rhamnose",
                "ascorbic acid",
                "trehalose",
                "sucrose",
                "lactose",
                "maltose",
                "cellobiose",
                "ketopimose",
                "tartaric acid",
                "ketose-6-phosphate"
            ],
            "true_referents": [
                "(3S,4R)-Ketose 1-phosphate",
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-fructofuranose",
                "D-fructofuranuronic acid",
                "D-fructose",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1,5-bisphosphate",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-fuculose",
                "L-fuculose 1-phosphate",
                "L-psicose",
                "L-rhamnulose",
                "L-rhamnulose 1-phosphate",
                "L-ribulose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "beta-D-fructopyranose",
                "beta-L-fructofuranose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructopyranose",
                "fructose",
                "keto-D-fructose 6-phosphate",
                "keto-D-tagatose",
                "ketopentose",
                "ketopentose derivative",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "sedoheptulose",
                "sedoheptulose derivative",
                "sorbose",
                "xylulose"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Ribulose",
            "Xylulose",
            "Psicose",
            "Fructose",
            "Sorbose",
            "Tagatose",
            "Erythrulose",
            "Sedheptulose",
            "Psyllose",
            "D-erythro-2-hexulose",
            "D-erythro-3-hexulose",
            "D-threo-2-pentulose",
            "L-threo-2-pentulose",
            "D-arabino-2-hexulose",
            "L-arabino-2-hexulose",
            "D-ribo-2-hexulose",
            "L-ribo-2-hexulose",
            "D-xylulose",
            "L-xylulose",
            "D-erythro-2-pentulose",
            "L-erythro-2-pentulose",
            "D-erythro-3-pentulose",
            "L-erythro-3-pentulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose",
                "Xylulose",
                "Psicose",
                "Fructose",
                "Sorbose",
                "Tagatose",
                "Erythrulose",
                "Dihydroxyacetone",
                "D-xylulose",
                "L-xylulose"
            ],
            "mismatches": [
                "Sedheptulose",
                "Psyllose",
                "D-erythro-2-hexulose",
                "D-erythro-3-hexulose",
                "D-threo-2-pentulose",
                "L-threo-2-pentulose",
                "D-arabino-2-hexulose",
                "L-arabino-2-hexulose",
                "D-ribo-2-hexulose",
                "L-ribo-2-hexulose",
                "D-erythro-2-pentulose",
                "L-erythro-2-pentulose",
                "D-erythro-3-pentulose",
                "L-erythro-3-pentulose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 1-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructose",
                "keto-fructose",
                "psicose",
                "ribulose",
                "ribulose phosphate",
                "sedoheptulose 1-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (C6H12O6)",
            "D-Ribulose (C5H10O5)",
            "D-Xylulose (C5H10O5)",
            "D-Sedoheptulose (C7H14O7)",
            "D-Erythrulose (C4H8O4)",
            "D-Tagatose (C6H12O6)",
            "D-Psicose (C6H12O6)",
            "D-Sorbose (C6H12O6)",
            "D-Mannose (C6H12O6)",
            "D-Altrose (C6H12O6)",
            "D-Galactose (C6H12O6)",
            "D-Glucose (C6H12O6)",
            "D-Fructosamine (C6H12N2O3)",
            "D-2-Keto-D-gluconic acid (C6H10O7)",
            "D-2-Keto-D-galactonic acid (C6H10O7)",
            "D-3-Keto-L-gulonic acid (C6H10O7)",
            "D-3-Keto-D-galactose (C6H10O6)",
            "D-3-Keto-D-glucose (C6H10O6)",
            "D-5-Keto-D-fructose (C6H10O6)",
            "D-6-Keto-D-fructose (C6H10O6)",
            "D-7-Keto-D-fructose (C6H10O6)",
            "D-Methylglyoxal (C3H4O2)",
            "D-Dihydroxyacetone (C3H6O3)",
            "D-Glyceraldehyde (C3H6O3)",
            "D-Dihydroxyacetone phosphate (C3H7O6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-ribulose",
                "D-xylulose",
                "D-erythrulose",
                "D-tagatose",
                "D-psicose",
                "D-sorbose",
                "3-keto-beta-D-galactose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate"
            ],
            "mismatches": [
                "D-sedoheptulose",
                "D-mannose",
                "D-altrose",
                "D-galactose",
                "D-glucose",
                "D-fructosamine",
                "D-2-keto-D-gluconic acid",
                "D-2-keto-D-galactonic acid",
                "D-3-keto-L-gulonic acid",
                "D-3-keto-D-glucose",
                "D-5-keto-D-fructose",
                "D-6-keto-D-fructose",
                "D-7-keto-D-fructose",
                "D-methylglyoxal",
                "D-glyceraldehyde"
            ],
            "true_referents": [
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "1-hydroxy-3-propoxyacetone",
                "3-dehydro-D-glucoside",
                "3-dehydro-D-guloside",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-manno-heptulose",
                "D-psicose",
                "D-psicose 6-phosphate(2-)",
                "D-ribulose",
                "D-ribulose 5-phosphate",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose 1,6-bisphosphate",
                "D-tagatose 6-phosphate",
                "D-xylulose",
                "L-erythrulose",
                "L-glycero-L-galacto-octuluronate",
                "L-psicose",
                "N-(1-deoxy-1-fructosyl)phenylalanine",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "keto-D-fructose",
                "keto-D-fructose 6-phosphate",
                "keto-D-sorbose",
                "sedoheptulose 7-phosphate",
                "sorbose"
            ],
            "TP": 10,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Ribulose (C5H10O5)",
            "Xylulose (C5H10O5)",
            "Fructose (C6H12O6)",
            "Sorbose (C6H12O6)",
            "Tagatose (C6H12O6)",
            "Psicose (C6H12O6)",
            "Fructose-6-phosphate (F6P)",
            "Fructose-1,6-bisphosphate (FBP)",
            "Sedoheptulose (C7H14O7)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrulose (C4H8O4)",
            "3-Deoxy-D-erythro-hexos-2-ulose (C6H10O5)",
            "D-erythro-Hex-2-ulose (C6H10O5)",
            "3-Keto-L-gulonic acid (C6H10O7)",
            "2-Keto-L-gulonic acid (C6H10O7)",
            "2-Keto-3-deoxygluconate (C6H10O6)",
            "2-Keto-3-deoxygalactonate (C6H10O6)",
            "2-Keto-3-deoxygluconic acid (C6H10O7)",
            "2-Keto-3-deoxy-D-gluconic acid (C6H10O7)",
            "2-Keto-L-gulonic acid (C6H10O7)",
            "2-Keto-3-deoxy-D-lyxo-heptulosonic acid (C7H12O7)",
            "2-Keto-3-deoxy-D-gluconate (C6H10O6)",
            "2-Keto-3-deoxy-D-galactonate (C6H10O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "dihydroxyacetone",
                "ribulose",
                "xylulose",
                "fructose",
                "sorbose",
                "psicose",
                "sedoheptulose 7-phosphate",
                "erythrulose"
            ],
            "mismatches": [
                "tagatose",
                "fructose-6-phosphate",
                "fructose-1,6-bisphosphate",
                "sedoheptulose",
                "3-deoxy-D-erythro-hexos-2-ulose",
                "D-erythro-Hex-2-ulose",
                "3-keto-L-gulonic acid",
                "2-keto-L-gulonic acid",
                "2-keto-3-deoxygluconate",
                "2-keto-3-deoxygalactonate",
                "2-keto-3-deoxygluconic acid",
                "2-keto-3-deoxy-D-gluconic acid",
                "2-keto-3-deoxy-D-lyxo-heptulosonic acid",
                "2-keto-3-deoxy-D-gluconate",
                "2-keto-3-deoxy-D-galactonate"
            ],
            "true_referents": [
                "1,5-anhydro-D-erythro-hex-1-en-3-ulose",
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "D-erythrulose",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructose",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-sorbose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-glycero-L-galacto-octuluronate",
                "L-psicose",
                "L-sorbose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-fructuronic acid",
                "keto-D-sorbose",
                "keto-L-sorbose",
                "keto-L-tagatose",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "sedoheptulose 7-phosphate",
                "sorbose",
                "tagatose derivative",
                "xylulose"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-fructose",
            "D-tagatose",
            "D-sorbose",
            "D-psicose",
            "D-ribose",
            "D-lyxose",
            "D-xylose",
            "D-arabinose",
            "D-ribulose",
            "D-xylulose",
            "D-galactose",
            "D-talose",
            "D-lyxulose",
            "D-erythrulose",
            "D-threose",
            "D-glyceraldehyde",
            "D-erythrose",
            "D-ribose-5-phosphate",
            "D-fructose-6-phosphate",
            "D-fructose-1,6-bisphosphate",
            "D-fructose-2,6-bisphosphate",
            "D-fructose-1-phosphate",
            "D-fructose-6-sulfate",
            "D-fructose-2-sulfate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-tagatose",
                "D-sorbose",
                "D-psicose",
                "D-ribulose",
                "D-xylulose",
                "D-erythrulose",
                "D-fructose-6-phosphate",
                "D-fructose-1,6-bisphosphate",
                "D-fructose-2,6-bisphosphate",
                "D-fructose-1-phosphate"
            ],
            "mismatches": [
                "D-ribose",
                "D-lyxose",
                "D-xylose",
                "D-arabinose",
                "D-galactose",
                "D-talose",
                "D-lyxulose",
                "D-threose",
                "D-glyceraldehyde",
                "D-erythrose",
                "D-ribose-5-phosphate",
                "D-fructose-6-sulfate",
                "D-fructose-2-sulfate"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructofuranose 2,6-bisphosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-ribulose 5-phosphate",
                "D-sorbopyranose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-xylulose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "L-Fructose",
            "D-Xylulose (Xyl)",
            "L-Xylulose",
            "D-Ribulose (Rib)",
            "L-Ribulose",
            "D-Psicose (Psi)",
            "L-Psicose",
            "D-Tagatose (Tag)",
            "L-Tagatose",
            "D-Sorbose",
            "L-Sorbose",
            "D-Erythrulose",
            "L-Erythrulose",
            "D-Ketose",
            "L-Ketose",
            "D-Tetrosulose",
            "L-Tetrosulose",
            "D-Pentulose",
            "L-Pentulose",
            "D-Hexulose",
            "L-Hexulose",
            "D-Heptulose",
            "L-Heptulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "L-fructose",
                "D-xylulose",
                "L-xylulose",
                "D-ribulose",
                "L-ribulose",
                "D-psicose",
                "L-psicose",
                "D-tagatose",
                "L-tagatose",
                "D-sorbose",
                "L-sorbose",
                "D-erythrulose",
                "L-erythrulose"
            ],
            "mismatches": [
                "D-ketose",
                "L-ketose",
                "D-tetrosulose",
                "L-tetrosulose",
                "D-pentulose",
                "L-pentulose",
                "D-hexulose",
                "L-hexulose",
                "D-heptulose",
                "L-heptulose"
            ],
            "true_referents": [
                "(3S,4R)-Ketose 1-phosphate",
                "1-deoxy-D-xylulose",
                "5-dehydro-D-fructose",
                "6-deoxy-6-sulfo-D-fructofuranose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "erythrulose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "ketose derivative",
                "manno-heptulose",
                "psicose",
                "ribulose",
                "sorbose",
                "tagatose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (D-fructose)",
            "Psicose (D-psicose)",
            "Tagatose (D-tagatose)",
            "Sorbose (L-sorbose)",
            "Ribulose",
            "Xylulose",
            "Sedoheptulose",
            "Erythrulose",
            "Dihydroxyacetone",
            "Allulose",
            "Fructose-1,6-bisphosphate",
            "Ribulose-5-phosphate",
            "Xylulose-5-phosphate",
            "Sedoheptulose-7-phosphate",
            "Fructose-6-phosphate",
            "Tagatose-6-phosphate",
            "Ribulose-1,5-bisphosphate",
            "Erythrulose-4-phosphate",
            "Fructose-1-phosphate",
            "Psicose-6-phosphate",
            "Sorbose-1-phosphate",
            "Dihydroxyacetone phosphate (DHAP)",
            "2-Ketogluconate",
            "5-Keto-D-gluconate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-psicose",
                "D-tagatose",
                "L-sorbose",
                "ribulose",
                "xylulose",
                "sedoheptulose",
                "erythrulose",
                "dihydroxyacetone",
                "ribulose 5-phosphate",
                "xylulose 5-phosphate",
                "sedoheptulose 7-phosphate",
                "D-fructose 6-phosphate",
                "D-tagatose 6-phosphate",
                "D-ribulose 1,5-bisphosphate",
                "D-erythrulose 4-phosphate",
                "D-fructose 1-phosphate",
                "D-psicose 6-phosphate",
                "sorbose 1-phosphate",
                "dihydroxyacetone phosphate"
            ],
            "mismatches": [
                "Allulose",
                "Fructose-1,6-bisphosphate",
                "2-Ketogluconate",
                "5-Keto-D-gluconate"
            ],
            "true_referents": [
                "3-deoxy-keto-D-fructose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 4-phosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-ribulose 1,5-bisphosphate",
                "D-sorbose",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 4-phosphate",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-sorbose",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "L-xylulose",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "erythrulose 1-phosphate",
                "keto-D-fructose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-L-fructose",
                "keto-L-tagatose 6-phosphate",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose 1-phosphate",
                "tagatose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-fructose",
            "D-psicose",
            "D-sorbose",
            "D-tagatose",
            "D-xylulose",
            "L-fructose",
            "L-psicose",
            "L-sorbose",
            "L-tagatose",
            "L-ribulose",
            "L-xylulose",
            "D-ribulose",
            "Dihydroxyacetone (DHA)",
            "Erythrulose",
            "D-erythro-pentulose",
            "L-erythro-pentulose",
            "D-threo-pentulose",
            "L-threo-pentulose",
            "Lactulose",
            "Sedoheptulose",
            "Octulose",
            "Nonulose",
            "Deculose",
            "Undeculose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-tagatose",
                "L-ribulose",
                "L-xylulose",
                "D-ribulose",
                "Erythrulose",
                "Sedoheptulose"
            ],
            "mismatches": [
                "Dihydroxyacetone (DHA)",
                "D-erythro-pentulose",
                "L-erythro-pentulose",
                "D-threo-pentulose",
                "L-threo-pentulose",
                "Lactulose",
                "Octulose",
                "Nonulose",
                "Deculose",
                "Undeculose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 1-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "dihydroxyacetone",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "octulose 1-phosphate",
                "octulose 8-phosphate",
                "psicose",
                "ribulose",
                "sedoheptulose",
                "sedoheptulose derivative",
                "sorbose",
                "tagatose",
                "xylulose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Ribulose (D-Rbu)",
            "D-Xylulose (D-Xlu)",
            "D-Arabinose (D-Ara)",
            "D-Lyxose (D-Lyx)",
            "D-Xylose (D-Xyl)",
            "D-Ribose (D-Rib)",
            "D-Glucose (D-Glc)",
            "D-Mannose (D-Man)",
            "D-Fructose (D-Fru)",
            "D-Sorbose (D-Sor)",
            "D-Tagatose (D-Tag)",
            "D-Psicose (D-Psi)",
            "D-Allose (D-All)",
            "D-Altrose (D-Alt)",
            "D-Idose (D-Ido)",
            "D-Galactose (D-Gal)",
            "D-Gulose (D-Gul)",
            "D-Talose (D-Tal)",
            "D-Erythrulose (D-Eru)",
            "D-Erythrose (D-Ery)",
            "D-Threose (D-Thr)",
            "D-Glycerose (D-Gly)",
            "D-Erythropentulose (D-Epu)",
            "D-Erythropentulose (D-Epu)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Ribulose",
                "D-Xylulose",
                "D-Fructose",
                "D-Sorbose",
                "D-Tagatose",
                "D-Psicose",
                "D-Erythrulose"
            ],
            "mismatches": [
                "D-Arabinose",
                "D-Lyxose",
                "D-Xylose",
                "D-Ribose",
                "D-Glucose",
                "D-Mannose",
                "D-Allose",
                "D-Altrose",
                "D-Idose",
                "D-Galactose",
                "D-Gulose",
                "D-Talose",
                "D-Erythrose",
                "D-Threose",
                "D-Glycerose",
                "D-Erythropentulose"
            ],
            "true_referents": [
                "1-deoxy-D-altro-heptulose 7-phosphate",
                "1-deoxy-D-xylulose",
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-keto-manno-heptulose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "D-xylulose",
                "L-erythrulose",
                "L-fructose",
                "L-psicose",
                "L-xylulose",
                "alpha-D-manno-heptulopyranose",
                "alpha-D-ribulose",
                "keto-D-tagatose",
                "psicose"
            ],
            "TP": 7,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Sorbose (Sor)",
            "Tagatose (Tag)",
            "Psicose (Psi)",
            "Allulose (All)",
            "Erythrulose (Ery)",
            "Ribulose (Rib)",
            "Xylulose (Xyl)",
            "Sedoheptulose (Sed)",
            "Erythrulose-5-phosphate (Ery-5-P)",
            "Ribulose-5-phosphate (Rib-5-P)",
            "Xylulose-5-phosphate (Xyl-5-P)",
            "Fructose-1-phosphate (Fru-1-P)",
            "Fructose-6-phosphate (Fru-6-P)",
            "Fructose-1,6-bisphosphate (Fru-1,6-P2)",
            "Sedoheptulose-7-phosphate (Sed-7-P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Fructose-2,6-bisphosphate (Fru-2,6-P2)",
            "Fructose-1,2,6-trisphosphate (Fru-1,2,6-P3)",
            "Ribulose-1,5-bisphosphate (Rib-1,5-P2)",
            "Xylulose-5-phosphate (Xyl-5-P)",
            "Sedoheptulose-1,7-bisphosphate (Sed-1,7-P2)",
            "Fructose-6-phosphate,2-kinase/fructose-2,6-bisphosphatase (PFK-2/FBPase-2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Sorbose (Sor)",
                "Tagatose (Tag)",
                "Psicose (Psi)",
                "Erythrulose (Ery)",
                "Ribulose (Rib)",
                "Xylulose (Xyl)",
                "Sedoheptulose (Sed)",
                "Ribulose-5-phosphate (Rib-5-P)",
                "Xylulose-5-phosphate (Xyl-5-P)",
                "Fructose-1-phosphate (Fru-1-P)",
                "Fructose-6-phosphate (Fru-6-P)",
                "Sedoheptulose-7-phosphate (Sed-7-P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Fructose-2,6-bisphosphate (Fru-2,6-P2)",
                "Ribulose-1,5-bisphosphate (Rib-1,5-P2)",
                "Sedoheptulose-1,7-bisphosphate (Sed-1,7-P2)"
            ],
            "mismatches": [
                "Allulose (All)",
                "Erythrulose-5-phosphate (Ery-5-P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Fructose-1,2,6-trisphosphate (Fru-1,2,6-P3)",
                "Fructose-6-phosphate,2-kinase/fructose-2,6-bisphosphatase (PFK-2/FBPase-2)"
            ],
            "true_referents": [
                "1-dodecylglycerone 3-phosphate",
                "D-erythrulose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-ribulose 1,5-bisphosphate",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-erythrulose 1-phosphate",
                "L-fructose",
                "L-psicose",
                "L-ribulose",
                "L-tagatose",
                "L-xylulose",
                "beta-D-fructofuranose 2,6-bisphosphate",
                "dihydroxyacetone phosphate",
                "erythrulose",
                "erythrulose 1-phosphate",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "psicose",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "sedoheptulose",
                "sedoheptulose 1,7-bisphosphate",
                "sedoheptulose 7-phosphate",
                "sedoheptulose derivative",
                "sorbose",
                "sorbose derivative",
                "tagatose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 17,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-fructose (Fru)",
            "D-ribose",
            "D-tagatose",
            "D-psicose",
            "D-allulose",
            "D-mannose",
            "D-galactose",
            "D-glucose",
            "D-xylose",
            "D-arabinose",
            "D-lyxose",
            "D-altrose",
            "L-fructose",
            "L-ribose",
            "L-tagatose",
            "L-psicose",
            "L-allulose",
            "L-mannose",
            "L-galactose",
            "L-glucose",
            "L-xylose",
            "L-arabinose",
            "L-lyxose",
            "L-altrose",
            "Fructose-1,6-bisphosphate",
            "Fructose-6-phosphate",
            "Fructose-1-phosphate",
            "Fructose-2,6-bisphosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-tagatose",
                "D-psicose",
                "L-fructose",
                "L-tagatose",
                "L-psicose"
            ],
            "mismatches": [
                "D-ribose",
                "D-allulose",
                "D-mannose",
                "D-galactose",
                "D-glucose",
                "D-xylose",
                "D-arabinose",
                "D-lyxose",
                "D-altrose",
                "L-ribose",
                "L-allulose",
                "L-mannose",
                "L-galactose",
                "L-glucose",
                "L-xylose",
                "L-arabinose",
                "L-lyxose",
                "L-altrose",
                "Fructose-1,6-bisphosphate",
                "Fructose-6-phosphate",
                "Fructose-1-phosphate",
                "Fructose-2,6-bisphosphate"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructopyranose",
                "D-fructose",
                "D-manno-heptulose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-rhamnulose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "L-xylulose",
                "alpha-chrysopine",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "tagatose"
            ],
            "TP": 6,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "ketose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "D-Erythrulose",
            "D-Ribulose",
            "D-Xylulose",
            "L-Fructose",
            "D-Tagatose",
            "D-Sorbose",
            "L-Sorbose",
            "D-Psicose",
            "L-Psicose",
            "D-Talose",
            "L-Talose",
            "D-Idose",
            "L-Idose",
            "D-Gulose",
            "L-Gulose",
            "D-Galactose",
            "L-Galactose",
            "D-Allose",
            "L-Allose",
            "D-Altrose",
            "L-Altrose",
            "D-Arabinese",
            "L-Arabinese",
            "D-Lyxose",
            "L-Lyxose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose",
                "D-Erythrulose",
                "D-Ribulose",
                "D-Xylulose",
                "L-Fructose",
                "D-Sorbose",
                "L-Sorbose",
                "D-Psicose",
                "L-Psicose"
            ],
            "mismatches": [
                "D-Tagatose",
                "D-Talose",
                "L-Talose",
                "D-Idose",
                "L-Idose",
                "D-Gulose",
                "L-Gulose",
                "D-Galactose",
                "L-Galactose",
                "D-Allose",
                "L-Allose",
                "D-Altrose",
                "L-Altrose",
                "D-Arabinese",
                "L-Arabinese",
                "D-Lyxose",
                "L-Lyxose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-erythrulose",
                "D-erythrulose 1-phosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-sorbose",
                "D-tagatose",
                "D-xylulose",
                "L-erythrulose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylulose",
                "alpha-chrysopine",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 9,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "ketose",
        "domain": "chemistry",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]