[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Tagatose",
            "Psicose",
            "Sorbose",
            "Ribulose",
            "Xylulose",
            "Erythrulose",
            "Gulose",
            "Sedoheptulose",
            "Aldohexose (not a ketohexose, included for completeness of hexose compounds)",
            "Galactulose",
            "Altriose",
            "Talose",
            "Ribulose-5-phosphate",
            "Xylulose-5-phosphate",
            "Erythrulose-4-phosphate",
            "Gulose-6-phosphate",
            "Fructose-6-phosphate",
            "Fructose-1-phosphate",
            "Tagatose-6-phosphate",
            "Psicose-3-phosphate",
            "Sorbose-4-phosphate",
            "Galactulose-6-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Tagatose",
                "Psicose",
                "Sorbose",
                "Fructose-6-phosphate",
                "Fructose-1-phosphate",
                "Tagatose-6-phosphate"
            ],
            "mismatches": [
                "Ribulose",
                "Xylulose",
                "Erythrulose",
                "Gulose",
                "Sedoheptulose",
                "Aldohexose (not a ketohexose, included for completeness of hexose compounds)",
                "Galactulose",
                "Altriose",
                "Talose",
                "Ribulose-5-phosphate",
                "Xylulose-5-phosphate",
                "Erythrulose-4-phosphate",
                "Gulose-6-phosphate",
                "Psicose-3-phosphate",
                "Sorbose-4-phosphate",
                "Galactulose-6-phosphate"
            ],
            "true_referents": [
                "1,5-anhydro-D-erythro-hex-1-en-3-ulose",
                "3-keto-beta-D-galactose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructose",
                "L-psicose",
                "L-rhamnulose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "L-xylo-3-hexulose",
                "alpha-D-fructofuranose 1,6-bisphosphate(4-)",
                "beta-D-fructofuranose 1,6-bisphosphate(4-)",
                "beta-D-fructofuranose 6-phosphate",
                "beta-D-fructopyranose 1-phosphate",
                "deoxyketohexose",
                "fructofuranose",
                "fructopyranose",
                "fructose",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-tagatose",
                "keto-L-tagatose",
                "keto-L-tagatose 6-phosphate",
                "ketohexose derivative",
                "psicosamine 3-phosphate",
                "psicose",
                "sorbose",
                "sorbose 1-phosphate",
                "tagatose"
            ],
            "TP": 7,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Sorbose",
            "Tagatose",
            "Psicose",
            "Raffinose",
            "Phloretose",
            "Sedoheptulose",
            "Fructosamine",
            "Levulose",
            "Dihydroxyacetone fructose",
            "D-Fructose",
            "D-Psicose",
            "D-Tagatose",
            "D-Sorbose",
            "D-Psicose 6-phosphate",
            "D-Tagatose 6-phosphate",
            "D-Sorbose 6-phosphate",
            "D-Fructose 6-phosphate",
            "D-Fructose 1,6-bisphosphate",
            "D-Fructose 1-phosphate",
            "Fructosylglycine",
            "Fructosylamino acids",
            "Fructosyl valine",
            "Fructosyllysine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sorbose",
                "Tagatose",
                "Psicose",
                "D-Fructose",
                "D-Psicose",
                "D-Tagatose",
                "D-Sorbose",
                "D-Psicose 6-phosphate",
                "D-Tagatose 6-phosphate",
                "D-Fructose 6-phosphate",
                "D-Fructose 1,6-bisphosphate",
                "D-Fructose 1-phosphate"
            ],
            "mismatches": [
                "Fructose (Fru)",
                "Raffinose",
                "Phloretose",
                "Sedoheptulose",
                "Fructosamine",
                "Levulose",
                "Dihydroxyacetone fructose",
                "D-Sorbose 6-phosphate",
                "Fructosylglycine",
                "Fructosylamino acids",
                "Fructosyl valine",
                "Fructosyllysine"
            ],
            "true_referents": [
                "5-dehydro-D-fructose",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-sorbose 1,6-bisphosphate",
                "D-sorbose 1-phosphate",
                "D-tagatofuranose",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructofuranose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "N-(1-deoxy-1-fructosyl)phenylalanine",
                "alpha-D-fructuronic acid",
                "alpha-D-tagatofuranose",
                "beta-D-tagatofuranose",
                "fructofuranose",
                "fructopyranose",
                "fructosamine 3-phosphate",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-D-tagatose 6-phosphate",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "D-Psicose",
            "D-Sorbose",
            "D-Tagatose",
            "L-Sorbose",
            "L-Tagatose",
            "D-Fructofuranose",
            "D-Fructopyranose",
            "D-Psicopyranose",
            "L-Fructose",
            "L-Psicose",
            "D-Sorbopyranose",
            "L-Sorbopyranose",
            "D-Tagatopyranose",
            "L-Tagatopyranose",
            "D-Fructose 1-phosphate",
            "D-Fructose 6-phosphate",
            "D-Fructose 1,6-bisphosphate",
            "D-Fructose 2,6-bisphosphate",
            "D-Fructose 2-phosphate",
            "D-Fructose 2,3-bisphosphate",
            "D-Fructose 2,4-bisphosphate",
            "D-Fructose 3-phosphate",
            "D-Fructose 5-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Psicose",
                "D-Sorbose",
                "D-Tagatose",
                "L-Sorbose",
                "L-Tagatose",
                "D-Fructofuranose",
                "D-Fructopyranose",
                "L-Fructose",
                "L-Psicose",
                "D-Tagatopyranose",
                "D-Fructose 1-phosphate",
                "D-Fructose 6-phosphate",
                "D-Fructose 1,6-bisphosphate",
                "D-Fructose 2,6-bisphosphate"
            ],
            "mismatches": [
                "D-Fructose (Fru)",
                "D-Psicopyranose",
                "D-Sorbopyranose",
                "L-Sorbopyranose",
                "L-Tagatopyranose",
                "D-Fructose 2-phosphate",
                "D-Fructose 2,3-bisphosphate",
                "D-Fructose 2,4-bisphosphate",
                "D-Fructose 3-phosphate",
                "D-Fructose 5-phosphate"
            ],
            "true_referents": [
                "5-dehydro-D-fructose",
                "D-fructofuranose",
                "D-fructofuranose 1,6-bisphosphate",
                "D-fructofuranose 2,6-bisphosphate",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-sorbopyranose",
                "D-sorbose",
                "D-tagatopyranose",
                "D-tagatose",
                "L-fructose",
                "L-psicose",
                "L-sorbopyranose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "alpha-D-tagatopyranose",
                "beta-D-fructofuranose",
                "beta-D-fructopyranose",
                "beta-D-sorbopyranose",
                "beta-D-tagatopyranose",
                "beta-L-sorbopyranose",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-fructose 6-phosphate",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Glucose (D-Glc)",
            "D-Fructose (D-Fru)",
            "D-Galactose (D-Gal)",
            "D-Mannose (D-Man)",
            "D-Sorbose (D-Sor)",
            "D-Tagatose (D-Tag)",
            "L-Glucose (L-Glc)",
            "L-Fructose (L-Fru)",
            "L-Galactose (L-Gal)",
            "L-Mannose (L-Man)",
            "D-Allose (D-Alo)",
            "D-Altrose (D-Alt)",
            "D-Gulose (D-Gul)",
            "D-Idose (D-Ido)",
            "D-Talose (D-Tal)",
            "D-Psicose (D-Psi)",
            "2-Deoxy-D-Glucose (2-DG)",
            "3-Deoxy-D-Glucose (3-DG)",
            "6-Deoxy-D-Glucose (6-DG)",
            "D-Glucose-6-Phosphate (Glc-6-P)",
            "D-Fructose-6-Phosphate (Fru-6-P)",
            "D-Galactose-1-Phosphate (Gal-1-P)",
            "D-Mannose-6-Phosphate (Man-6-P)",
            "Sorbitol (D-Glucitol)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose",
                "D-Sorbose",
                "D-Tagatose",
                "L-Fructose",
                "D-Psicose",
                "D-Fructose-6-Phosphate"
            ],
            "mismatches": [
                "D-Glucose (D-Glc)",
                "D-Galactose (D-Gal)",
                "D-Mannose (D-Man)",
                "L-Glucose (L-Glc)",
                "L-Galactose (L-Gal)",
                "L-Mannose (L-Man)",
                "D-Allose (D-Alo)",
                "D-Altrose (D-Alt)",
                "D-Gulose (D-Gul)",
                "D-Idose (D-Ido)",
                "D-Talose (D-Tal)",
                "2-Deoxy-D-Glucose (2-DG)",
                "3-Deoxy-D-Glucose (3-DG)",
                "6-Deoxy-D-Glucose (6-DG)",
                "D-Glucose-6-Phosphate (Glc-6-P)",
                "D-Galactose-1-Phosphate (Gal-1-P)",
                "D-Mannose-6-Phosphate (Man-6-P)",
                "Sorbitol (D-Glucitol)"
            ],
            "true_referents": [
                "3-dehydro-D-glucoside",
                "3-dehydro-D-guloside",
                "3-deoxyglucosone",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "6-deoxy-beta-L-fructofuranose",
                "D-fructofuranose",
                "D-fructofuranose 6-phosphate",
                "D-fructopyranose",
                "D-fructopyranose 1-phosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "deoxyketohexose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "psicose",
                "sorbose"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Sorbose",
            "Tagatose",
            "Psicose",
            "D-Allulose",
            "D-Fructose",
            "D-Sorbose",
            "D-Tagatose",
            "D-Psicose",
            "D-Allulose",
            "L-Fructose",
            "L-Sorbose",
            "L-Tagatose",
            "L-Psicose",
            "L-Allulose",
            "L-Idose",
            "D-Idose",
            "D-Mannoheptulose",
            "L-Mannoheptulose",
            "D-Sedoheptulose",
            "L-Sedoheptulose",
            "D-Glucoheptulose",
            "L-Glucoheptulose",
            "D-Galactoheptulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Sorbose",
                "Tagatose",
                "Psicose",
                "D-Fructose",
                "D-Sorbose",
                "D-Tagatose",
                "D-Psicose",
                "L-Fructose",
                "L-Sorbose",
                "L-Tagatose",
                "L-Psicose"
            ],
            "mismatches": [
                "D-Allulose",
                "L-Allulose",
                "L-Idose",
                "D-Idose",
                "D-Mannoheptulose",
                "L-Mannoheptulose",
                "D-Sedoheptulose",
                "L-Sedoheptulose",
                "D-Glucoheptulose",
                "L-Glucoheptulose",
                "D-Galactoheptulose"
            ],
            "true_referents": [
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "L-fructofuranose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "fructose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 12,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (C6H12O6)",
            "D-Mannose (C6H12O6)",
            "D-Galactose (C6H12O6)",
            "D-Tagatose (C6H12O6)",
            "D-Sorbose (C6H12O6)",
            "D-Psicose (C6H12O6)",
            "D-Ribulose (C5H10O5)",
            "D-Xylulose (C5H10O5)",
            "D-Glucose (C6H12O6)",
            "D-Mannitol (C6H14O6)",
            "D-Fructosamine (C6H12N2O5)",
            "D-Glucosamine (C6H13NO5)",
            "D-Acetylfructose (C6H10O6)",
            "D-Methylfructoside (C9H18O9)",
            "D-2-Keto-D-gluconic acid (C6H10O7)",
            "D-3-Keto-D-galactonic acid (C6H10O7)",
            "D-2-Keto-D-mannonic acid (C6H10O7)",
            "D-2-Keto-D-gluconate (C6H10O7)",
            "D-Fructosaccharides (C6H12O6)",
            "D-1,5-Anhydro-D-fructose (C6H10O5)",
            "D-Lyxose (C5H10O5)",
            "D-Ribose (C5H10O5)",
            "D-Arabinose (C5H10O5)",
            "D-Glucose-6-phosphate (C6H13O9P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose (C6H12O6)",
                "D-Tagatose (C6H12O6)",
                "D-Sorbose (C6H12O6)",
                "D-Psicose (C6H12O6)"
            ],
            "mismatches": [
                "D-Mannose (C6H12O6)",
                "D-Galactose (C6H12O6)",
                "D-Ribulose (C5H10O5)",
                "D-Xylulose (C5H10O5)",
                "D-Glucose (C6H12O6)",
                "D-Mannitol (C6H14O6)",
                "D-Fructosamine (C6H12N2O5)",
                "D-Glucosamine (C6H13NO5)",
                "D-Acetylfructose (C6H10O6)",
                "D-Methylfructoside (C9H18O9)",
                "D-2-Keto-D-gluconic acid (C6H10O7)",
                "D-3-Keto-D-galactonic acid (C6H10O7)",
                "D-2-Keto-D-mannonic acid (C6H10O7)",
                "D-2-Keto-D-gluconate (C6H10O7)",
                "D-Fructosaccharides (C6H12O6)",
                "D-1,5-Anhydro-D-fructose (C6H10O5)",
                "D-Lyxose (C5H10O5)",
                "D-Ribose (C5H10O5)",
                "D-Arabinose (C5H10O5)",
                "D-Glucose-6-phosphate (C6H13O9P)"
            ],
            "true_referents": [
                "3-deoxy-3-methyl-beta-D-fructofuranose",
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructopyranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose 1,6-bisphosphate",
                "D-tagatose 6-phosphate",
                "L-psicose",
                "L-xylo-3-hexulose",
                "N-(1-deoxy-1-fructosyl)phenylalanine",
                "alpha-D-fructopyranose",
                "keto-D-fructose 6-phosphate",
                "keto-D-fructuronic acid",
                "keto-D-sorbose",
                "sorbose"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (D-Fru)",
            "L-Fructose (L-Fru)",
            "D-Psicose (D-Psi)",
            "L-Psicose (L-Psi)",
            "D-Sorbose (D-Sor)",
            "L-Sorbose (L-Sor)",
            "D-Tagatose (D-Tag)",
            "L-Tagatose (L-Tag)",
            "D-Mannose (D-Man)",
            "L-Mannose (L-Man)",
            "D-Allose (D-All)",
            "L-Allose (L-All)",
            "D-Gulose (D-Gul)",
            "L-Gulose (L-Gul)",
            "D-Idose (D-Idd)",
            "L-Idose (L-Idd)",
            "D-Galactose (D-Gal)",
            "L-Galactose (L-Gal)",
            "D-Talose (D-Tal)",
            "L-Talose (L-Tal)",
            "D-Altrose (D-Alt)",
            "L-Altrose (L-Alt)",
            "D-Glucose (D-Glc)",
            "L-Glucose (L-Glc)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose (D-Fru)",
                "L-Fructose (L-Fru)",
                "D-Psicose (D-Psi)",
                "L-Psicose (L-Psi)",
                "D-Sorbose (D-Sor)",
                "L-Sorbose (L-Sor)",
                "D-Tagatose (D-Tag)",
                "L-Tagatose (L-Tag)"
            ],
            "mismatches": [
                "D-Mannose (D-Man)",
                "L-Mannose (L-Man)",
                "D-Allose (D-All)",
                "L-Allose (L-All)",
                "D-Gulose (D-Gul)",
                "L-Gulose (L-Gul)",
                "D-Idose (D-Idd)",
                "L-Idose (L-Idd)",
                "D-Galactose (D-Gal)",
                "L-Galactose (L-Gal)",
                "D-Talose (D-Tal)",
                "L-Talose (L-Tal)",
                "D-Altrose (D-Alt)",
                "L-Altrose (L-Alt)",
                "D-Glucose (D-Glc)",
                "L-Glucose (L-Glc)"
            ],
            "true_referents": [
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "alpha-chrysopine",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-fructose",
            "D-psicose",
            "D-sorbose",
            "D-tagatose",
            "D-allose",
            "D-altrose",
            "D-glucose",
            "D-mannose",
            "D-gulose",
            "D-idose",
            "D-talose",
            "D-lyxose",
            "L-fructose",
            "L-psicose",
            "L-sorbose",
            "L-tagatose",
            "L-allose",
            "L-altrose",
            "L-glucose",
            "L-mannose",
            "L-gulose",
            "L-idose",
            "L-talose",
            "L-lyxose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-tagatose"
            ],
            "mismatches": [
                "D-allose",
                "D-altrose",
                "D-glucose",
                "D-mannose",
                "D-gulose",
                "D-idose",
                "D-talose",
                "D-lyxose",
                "L-allose",
                "L-altrose",
                "L-glucose",
                "L-mannose",
                "L-gulose",
                "L-idose",
                "L-talose",
                "L-lyxose"
            ],
            "true_referents": [
                "3-dehydro-D-guloside",
                "5-dehydro-D-fructose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "alpha-chrysopine",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (Fru)",
            "L-Fructose",
            "D-Psicose (Allulose)",
            "L-Psicose",
            "D-Sorbose (Sor)",
            "L-Sorbose",
            "D-Tagatose (Tag)",
            "L-Tagatose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "L-fructose",
                "D-psicose",
                "L-psicose",
                "D-sorbose",
                "L-sorbose",
                "D-tagatose",
                "L-tagatose"
            ],
            "mismatches": [],
            "true_referents": [
                "5-dehydro-D-fructose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "psicose derivative",
                "tagatose"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Fructose (D-Fru)",
            "L-Fructose (L-Fru)",
            "D-Psicose (D-Psi)",
            "L-Psicose (L-Psi)",
            "D-Sorbose (D-Sor)",
            "L-Sorbose (L-Sor)",
            "D-Tagatose (D-Tag)",
            "L-Tagatose (L-Tag)",
            "2-Ketoglucose",
            "3-Ketoglucose",
            "4-Ketoglucose",
            "5-Ketoglucose",
            "Allofructose",
            "Allosorbose",
            "Allulose",
            "Fructopyranose",
            "Fructofuranose",
            "Psicopyranose",
            "Psicofuranose",
            "Sorbopyranose",
            "Sorbofuranose",
            "Tagatopyranose",
            "Tagatofuranose",
            "1,5-Anhydro-D-fructose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "L-fructose",
                "D-psicose",
                "L-psicose",
                "D-sorbose",
                "L-sorbose",
                "D-tagatose",
                "L-tagatose",
                "fructopyranose",
                "fructofuranose",
                "sorbopyranose",
                "sorbofuranose"
            ],
            "mismatches": [
                "2-Ketoglucose",
                "3-Ketoglucose",
                "4-Ketoglucose",
                "5-Ketoglucose",
                "Allofructose",
                "Allosorbose",
                "Allulose",
                "Psicopyranose",
                "Psicofuranose",
                "Tagatopyranose",
                "Tagatofuranose",
                "1,5-Anhydro-D-fructose"
            ],
            "true_referents": [
                "3-deoxy-keto-D-fructose",
                "5-dehydro-D-fructose",
                "D-fructose",
                "D-psicose",
                "D-sorbofuranose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatofuranose",
                "D-tagatopyranose",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructofuranose",
                "L-fructopyranose",
                "L-fructose",
                "L-psicose",
                "L-sorbopyranose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "L-xylo-3-hexulose",
                "alpha-D-tagatofuranose",
                "alpha-D-tagatopyranose",
                "beta-D-tagatofuranose",
                "beta-D-tagatopyranose",
                "fructofuranose",
                "fructopyranose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-sorbose",
                "keto-L-tagatose",
                "psicofuranin",
                "psicose",
                "psicose derivative",
                "sorbofuranose",
                "sorbopyranose",
                "sorbose"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-fructose",
            "L-fructose",
            "D-psicose",
            "L-psicose",
            "D-sorbose",
            "L-sorbose",
            "D-tagatose",
            "L-tagatose",
            "D-allohexulose",
            "L-allohexulose",
            "D-allulohexulose",
            "L-allulohexulose",
            "D-gulohexulose",
            "L-gulohexulose",
            "D-idohexulose",
            "L-idohexulose",
            "D-galactohexulose",
            "L-galactohexulose",
            "D-talohexulose",
            "L-talohexulose",
            "D-glucohexulose",
            "L-glucohexulose",
            "D-mannohexulose",
            "L-mannohexulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "L-fructose",
                "D-psicose",
                "L-psicose",
                "D-sorbose",
                "L-sorbose",
                "D-tagatose",
                "L-tagatose"
            ],
            "mismatches": [
                "D-allohexulose",
                "L-allohexulose",
                "D-allulohexulose",
                "L-allulohexulose",
                "D-gulohexulose",
                "L-gulohexulose",
                "D-idohexulose",
                "L-idohexulose",
                "D-galactohexulose",
                "L-galactohexulose",
                "D-talohexulose",
                "L-talohexulose",
                "D-glucohexulose",
                "L-glucohexulose",
                "D-mannohexulose",
                "L-mannohexulose"
            ],
            "true_referents": [
                "3-dehydro-D-glucoside",
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-tagatose",
                "L-fructofuranose",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "keto-D-sorbose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "D-Glucose (D-Glc)",
            "D-Fructose (D-Fru)",
            "D-Galactose (D-Gal)",
            "D-Mannose (D-Man)",
            "L-Glucose (L-Glc)",
            "L-Fructose (L-Fru)",
            "L-Galactose (L-Gal)",
            "L-Mannose (L-Man)",
            "D-Psicose (D-Psi)",
            "D-Tagatose (D-Tag)",
            "D-Sorbose (D-Sor)",
            "L-Psicose (L-Psi)",
            "L-Tagatose (L-Tag)",
            "L-Sorbose (L-Sor)",
            "D-Idose (D-Ido)",
            "L-Idose (L-Ido)",
            "D-Gulose (D-Gul)",
            "L-Gulose (L-Gul)",
            "D-Altrose (D-Alt)",
            "L-Altrose (L-Alt)",
            "D-Allose (D-All)",
            "L-Allose (L-All)",
            "D-Galactohexodialdose (D-Gal-2,5-dialdose)",
            "L-Galactohexodialdose (L-Gal-2,5-dialdose)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Fructose (D-Fru)",
                "L-Fructose (L-Fru)",
                "D-Psicose (D-Psi)",
                "D-Tagatose (D-Tag)",
                "D-Sorbose (D-Sor)",
                "L-Psicose (L-Psi)",
                "L-Tagatose (L-Tag)",
                "L-Sorbose (L-Sor)"
            ],
            "mismatches": [
                "D-Glucose (D-Glc)",
                "D-Galactose (D-Gal)",
                "D-Mannose (D-Man)",
                "L-Glucose (L-Glc)",
                "L-Galactose (L-Gal)",
                "L-Mannose (L-Man)",
                "D-Idose (D-Ido)",
                "L-Idose (L-Ido)",
                "D-Gulose (D-Gul)",
                "L-Gulose (L-Gul)",
                "D-Altrose (D-Alt)",
                "L-Altrose (L-Alt)",
                "D-Allose (D-All)",
                "L-Allose (L-All)",
                "D-Galactohexodialdose (D-Gal-2,5-dialdose)",
                "L-Galactohexodialdose (L-Gal-2,5-dialdose)"
            ],
            "true_referents": [
                "3-dehydro-D-guloside",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "alpha-chrysopine",
                "bis-beta-D-fructofuranose 1,2':2,3'-dianhydride",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Glucose (Glc)",
            "Fructose (Fru)",
            "Galactose (Gal)",
            "Mannose (Man)",
            "Tagatose (Tag)",
            "Psicose (Psi)",
            "Sorbose (Sor)",
            "Erythrulose (Ery)",
            "Ribulose (Rbu)",
            "Xylulose (Xul)",
            "Sedoheptulose (Sed)",
            "Erythrulose-1-phosphate (Ery-1-P)",
            "Fructose-1-phosphate (Fru-1-P)",
            "Fructose-6-phosphate (Fru-6-P)",
            "Galactose-1-phosphate (Gal-1-P)",
            "Glucose-1-phosphate (Glc-1-P)",
            "Glucose-6-phosphate (Glc-6-P)",
            "Mannose-6-phosphate (Man-6-P)",
            "Ribulose-5-phosphate (Rbu-5-P)",
            "Sedoheptulose-1,7-bisphosphate (Sed-1,7-P2)",
            "Sedoheptulose-7-phosphate (Sed-7-P)",
            "Xylulose-5-phosphate (Xul-5-P)",
            "Fructose-1,6-bisphosphate (Fru-1,6-P2)",
            "Glucose-1,6-bisphosphate (Glc-1,6-P2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Tagatose (Tag)",
                "Psicose (Psi)",
                "Sorbose (Sor)",
                "Fructose-1-phosphate (Fru-1-P)",
                "Fructose-6-phosphate (Fru-6-P)",
                "Fructose-1,6-bisphosphate (Fru-1,6-P2)"
            ],
            "mismatches": [
                "Glucose (Glc)",
                "Galactose (Gal)",
                "Mannose (Man)",
                "Erythrulose (Ery)",
                "Ribulose (Rbu)",
                "Xylulose (Xul)",
                "Sedoheptulose (Sed)",
                "Erythrulose-1-phosphate (Ery-1-P)",
                "Galactose-1-phosphate (Gal-1-P)",
                "Glucose-1-phosphate (Glc-1-P)",
                "Glucose-6-phosphate (Glc-6-P)",
                "Mannose-6-phosphate (Man-6-P)",
                "Ribulose-5-phosphate (Rbu-5-P)",
                "Sedoheptulose-1,7-bisphosphate (Sed-1,7-P2)",
                "Sedoheptulose-7-phosphate (Sed-7-P)",
                "Xylulose-5-phosphate (Xul-5-P)",
                "Glucose-1,6-bisphosphate (Glc-1,6-P2)"
            ],
            "true_referents": [
                "1,5-anhydro-D-erythro-hex-1-en-3-ulose",
                "3-deoxy-3-methyl-beta-D-fructofuranose",
                "3-keto-beta-D-galactose",
                "6-deoxy-beta-L-fructofuranose",
                "D-fructofuranose 6-phosphate",
                "D-fructopyranose 1-phosphate",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-fructose 6-phosphate",
                "D-psicose",
                "D-psicose 6-phosphate",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-sorbose 1,6-bisphosphate",
                "D-tagatose",
                "L-fructose",
                "L-fuculose 1-phosphate",
                "L-psicose",
                "L-rhamnulose",
                "L-rhamnulose 1-phosphate",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "beta-D-fructofuranose 1,6-bisphosphate",
                "beta-D-fructofuranose 2,6-bisphosphate",
                "beta-D-fructofuranose 6-phosphate",
                "beta-D-fructopyranose 1-phosphate",
                "beta-D-tagatofuranose",
                "deoxyketohexose phosphate",
                "fructopyranose",
                "fructose",
                "fructoselysine 6-phosphate",
                "keto-D-fructose 1,6-bisphosphate",
                "keto-D-fructose 1-phosphate",
                "keto-D-tagatose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "sorbose 1-phosphate",
                "tagatose"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Glucose (Glc)",
            "Arabinose",
            "Xylose",
            "Ribose",
            "Mannose",
            "Galactose (Gal)",
            "Talose",
            "Allose",
            "Altrose",
            "Idose",
            "Psicose",
            "Tagatose",
            "Rhamnose",
            "Fucose",
            "Mannofructose",
            "Isomaltose",
            "Maltose",
            "Maltulose",
            "Isomaltulose",
            "Turanose",
            "Palatinose",
            "Lactose (Lac)",
            "Maltotriose",
            "Isomaltotriose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Psicose",
                "Tagatose"
            ],
            "mismatches": [
                "Glucose (Glc)",
                "Arabinose",
                "Xylose",
                "Ribose",
                "Mannose",
                "Galactose (Gal)",
                "Talose",
                "Allose",
                "Altrose",
                "Idose",
                "Rhamnose",
                "Fucose",
                "Mannofructose",
                "Isomaltose",
                "Maltose",
                "Maltulose",
                "Isomaltulose",
                "Turanose",
                "Palatinose",
                "Lactose (Lac)",
                "Maltotriose",
                "Isomaltotriose"
            ],
            "true_referents": [
                "3-deoxyglucosone",
                "3-keto-beta-D-galactose",
                "6-deoxy-beta-L-fructofuranose",
                "D-fructose",
                "D-fructose 1-phosphate",
                "D-psicose",
                "D-tagatopyranose",
                "D-tagatose",
                "L-fructofuranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-rhamnulose",
                "L-rhamnulose 1-phosphate",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "N-(1-deoxy-1-fructosyl)phenylalanine",
                "alpha-D-tagatopyranose",
                "alpha-chrysopine",
                "beta-D-tagatopyranose",
                "beta-L-fructopyranose",
                "beta-chrysopine",
                "fructofuranose",
                "fructopyranose",
                "fructose",
                "keto-D-tagatose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 3,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            "Fructose (Fru)",
            "Tagatose (Tag)",
            "Sorbose (Sor)",
            "Psicose (Psi)",
            "L-Fructose (L-Fru)",
            "D-Altratose (D-Alt)",
            "L-Altratose (L-Alt)",
            "D-Idose (D-Ido)",
            "L-Idose (L-Ido)",
            "D-Galactose (D-Gal)",
            "L-Galactose (L-Gal)",
            "D-Talose (D-Tal)",
            "L-Talose (L-Tal)",
            "D-Manose (D-Man)",
            "L-Manose (L-Man)",
            "D-Glucose (D-Glc)",
            "L-Glucose (L-Glc)",
            "Allulose (Psicose)",
            "D-Psicose (D-Psi)",
            "L-Psicose (L-Psi)",
            "D-Tagatose (D-Tag)",
            "L-Tagatose (L-Tag)",
            "D-Sorbose (D-Sor)",
            "L-Sorbose (L-Sor)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Fructose (Fru)",
                "Tagatose (Tag)",
                "Sorbose (Sor)",
                "Psicose (Psi)",
                "L-Fructose (L-Fru)",
                "Allulose (Psicose)",
                "D-Psicose (D-Psi)",
                "L-Psicose (L-Psi)",
                "D-Tagatose (D-Tag)",
                "L-Tagatose (L-Tag)",
                "D-Sorbose (D-Sor)",
                "L-Sorbose (L-Sor)"
            ],
            "mismatches": [
                "D-Altratose (D-Alt)",
                "L-Altratose (L-Alt)",
                "D-Idose (D-Ido)",
                "L-Idose (L-Ido)",
                "D-Galactose (D-Gal)",
                "L-Galactose (L-Gal)",
                "D-Talose (D-Tal)",
                "L-Talose (L-Tal)",
                "D-Manose (D-Man)",
                "L-Manose (L-Man)",
                "D-Glucose (D-Glc)",
                "L-Glucose (L-Glc)"
            ],
            "true_referents": [
                "3-keto-beta-D-galactose",
                "D-fructofuranose",
                "D-fructopyranose",
                "D-fructose",
                "D-psicose",
                "D-sorbose",
                "D-sorbose 1-phosphate",
                "D-tagatose",
                "D-tagatose 6-phosphate",
                "L-fructopyranose",
                "L-fructose",
                "L-fuculose",
                "L-psicose",
                "L-sorbose",
                "L-sorbose 1-phosphate",
                "L-tagatose",
                "L-tagatose 6-phosphate",
                "fructose",
                "keto-D-tagatose",
                "keto-L-fructose",
                "keto-L-tagatose",
                "psicose",
                "sorbose",
                "tagatose"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "ketohexose",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "D-fructose",
                "Canonical name": "D-fructose"
            },
            {
                "Referent": "D-glucose",
                "Canonical name": "D-glucose"
            },
            {
                "Referent": "D-galactose",
                "Canonical name": "D-galactose"
            },
            {
                "Referent": "D-mannose",
                "Canonical name": "D-mannose"
            },
            {
                "Referent": "D-glucuronic acid",
                "Canonical name": "D-glucuronic acid"
            },
            {
                "Referent": "D-idose",
                "Canonical name": "D-idose"
            },
            {
                "Referent": "D-altrose",
                "Canonical name": "D-altrose"
            },
            {
                "Referent": "D-sorbose",
                "Canonical name": "D-sorbose"
            },
            {
                "Referent": "D-tagatose",
                "Canonical name": "D-tagatose"
            },
            {
                "Referent": "D-allose",
                "Canonical name": "D-allose"
            },
            {
                "Referent": "D-gulose",
                "Canonical name": "D-gulose"
            },
            {
                "Referent": "D-talose",
                "Canonical name": "D-talose"
            },
            {
                "Referent": "D-ribose",
                "Canonical name": "D-ribose"
            },
            {
                "Referent": "D-xylose",
                "Canonical name": "D-xylose"
            },
            {
                "Referent": "D-arabinose",
                "Canonical name": "D-arabinose"
            },
            {
                "Referent": "D-lyxose",
                "Canonical name": "D-lyxose"
            },
            {
                "Referent": "D-erythrose",
                "Canonical name": "D-erythrose"
            },
            {
                "Referent": "D-lyxonic acid",
                "Canonical name": "D-lyxonic acid"
            },
            {
                "Referent": "D-ribonic acid",
                "Canonical name": "D-ribonic acid"
            },
            {
                "Referent": "D-arabonic acid",
                "Canonical name": "D-arabonic acid"
            },
            {
                "Referent": "D-xylononic acid",
                "Canonical name": "D-xylononic acid"
            },
            {
                "Referent": "D-lyxonic acid",
                "Canonical name": "D-lyxonic acid"
            },
            {
                "Referent": "D-ribonic acid",
                "Canonical name": "D-ribonic acid"
            },
            {
                "Referent": "D-arabonic acid",
                "Canonical name": "D-arabonic acid"
            },
            {
                "Referent": "D-xylononic acid",
                "Canonical name": "D-xylononic acid"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-fructose",
                "D-sorbose",
                "D-tagatose"
            ],
            "mismatches": [
                "D-glucose",
                "D-galactose",
                "D-mannose",
                "D-glucuronic acid",
                "D-idose",
                "D-altrose",
                "D-allose",
                "D-gulose",
                "D-talose",
                "D-ribose",
                "D-xylose",
                "D-arabinose",
                "D-lyxose",
                "D-erythrose",
                "D-lyxonic acid",
                "D-ribonic acid",
                "D-arabonic acid",
                "D-xylononic acid"
            ],
            "true_referents": [
                "3-deoxy-keto-D-fructose",
                "3-keto-beta-D-galactose",
                "5-dehydro-D-fructose",
                "D-fructofuranuronic acid",
                "D-fructose",
                "D-fructuronic acid",
                "D-psicose",
                "D-psicose 6-phosphate(2-)",
                "D-sorbose",
                "D-tagatose",
                "L-fructose",
                "L-psicose",
                "L-sorbose",
                "L-tagatose",
                "L-xylo-3-hexulose",
                "alpha-chrysopine",
                "beta-D-fructopyranose",
                "bis-beta-D-fructofuranose 1,2':2,3'-dianhydride",
                "deoxyketohexose",
                "keto-D-tagatose",
                "sorbose"
            ],
            "TP": 3,
            "FP": 18,
            "FN": 0
        }
    }
]