[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Glyceraldehyde",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Glyceraldehyde-3-phosphate (GAP)",
            "Lactaldehyde",
            "Dihydroxyacetone phosphate (DHAP)",
            "D-ribulose",
            "D-arabinose",
            "D-fructose-1,6-bisphosphate (FBP)",
            "D-fructose-1,6-diphosphate (FDP)",
            "D-arabinose-3-phosphate",
            "D-ribulose-5-phosphate",
            "D-ribulose-1,5-bisphosphate (RuBP)",
            "D-xylulose",
            "D-xylulose-5-phosphate",
            "D-erythrose",
            "D-erythrose-4-phosphate (E4P)",
            "D-ribulose-3-phosphate (R3P)",
            "D-xylulose-5-phosphate (X5P)",
            "D-ribulose-5-phosphate (R5P)",
            "D-xylulose-1-phosphate",
            "D-ribulose-1-phosphate",
            "D-ribulose-3-phosphate (R3P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde-3-phosphate (GAP)",
                "Lactaldehyde",
                "Dihydroxyacetone phosphate (DHAP)",
                "D-ribulose",
                "D-arabinose",
                "D-fructose-1,6-bisphosphate (FBP)",
                "D-fructose-1,6-diphosphate (FDP)",
                "D-arabinose-3-phosphate",
                "D-ribulose-5-phosphate",
                "D-ribulose-1,5-bisphosphate (RuBP)",
                "D-xylulose",
                "D-xylulose-5-phosphate",
                "D-erythrose",
                "D-erythrose-4-phosphate (E4P)",
                "D-ribulose-3-phosphate (R3P)",
                "D-xylulose-5-phosphate (X5P)",
                "D-ribulose-5-phosphate (R5P)",
                "D-xylulose-1-phosphate",
                "D-ribulose-1-phosphate"
            ],
            "mismatches": [],
            "true_referents": [
                "D-arabinose",
                "D-arabinose-3-phosphate",
                "D-erythrose",
                "D-erythrose-4-phosphate (E4P)",
                "D-fructose-1,6-bisphosphate (FBP)",
                "D-fructose-1,6-diphosphate (FDP)",
                "D-ribulose",
                "D-ribulose-1,5-bisphosphate (RuBP)",
                "D-ribulose-1-phosphate",
                "D-ribulose-3-phosphate (R3P)",
                "D-ribulose-5-phosphate",
                "D-ribulose-5-phosphate (R5P)",
                "D-xylulose",
                "D-xylulose-1-phosphate",
                "D-xylulose-5-phosphate",
                "D-xylulose-5-phosphate (X5P)",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Glyceraldehyde",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde-3-phosphate (GAP)",
                "Lactaldehyde"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Dihydroxyacetone (DHA)",
            "Glyceraldehyde (G3P)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Glyceraldehyde dehydrogenase",
            "Triose phosphate isomerase",
            "Fructose 1,3-bisphosphate (F1,3BP)",
            "Fructose 1,6-bisphosphate (F1,6BP)",
            "Glycolysis",
            "Calvin cycle",
            "Embden-Meyerhof pathway",
            "Pentose phosphate pathway",
            "Glyceraldehyde-3-phosphate dehydrogenase",
            "Triose phosphate dehydrogenase",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Triose phosphate isomerase reaction",
            "Phosphoglycerate kinase",
            "Phosphoglyceraldehyde",
            "Triose phosphate",
            "3-Phosphoglyceraldehyde",
            "Triose phosphate isomerase deficiency",
            "Glycolytic pathway",
            "Photosynthesis",
            "Aldehyde dehydrogenase",
            "Triose phosphate intermediates"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde (G3P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde dehydrogenase",
                "Triose phosphate isomerase",
                "Fructose 1,3-bisphosphate (F1,3BP)",
                "Fructose 1,6-bisphosphate (F1,6BP)",
                "Glycolysis",
                "Calvin cycle",
                "Embden-Meyerhof pathway",
                "Pentose phosphate pathway",
                "Glyceraldehyde-3-phosphate dehydrogenase",
                "Triose phosphate dehydrogenase",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Triose phosphate isomerase reaction",
                "Phosphoglycerate kinase",
                "Phosphoglyceraldehyde",
                "Triose phosphate",
                "3-Phosphoglyceraldehyde",
                "Triose phosphate isomerase deficiency",
                "Glycolytic pathway",
                "Photosynthesis",
                "Aldehyde dehydrogenase",
                "Triose phosphate intermediates"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Phosphoglyceraldehyde",
                "Aldehyde dehydrogenase",
                "Calvin cycle",
                "Dihydroxyacetone (DHA)",
                "Embden-Meyerhof pathway",
                "Fructose 1,3-bisphosphate (F1,3BP)",
                "Fructose 1,6-bisphosphate (F1,6BP)",
                "Glyceraldehyde (G3P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde dehydrogenase",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Glyceraldehyde-3-phosphate dehydrogenase",
                "Glycolysis",
                "Glycolytic pathway",
                "Pentose phosphate pathway",
                "Phosphoglyceraldehyde",
                "Phosphoglycerate kinase",
                "Photosynthesis",
                "Triose phosphate",
                "Triose phosphate dehydrogenase",
                "Triose phosphate intermediates",
                "Triose phosphate isomerase",
                "Triose phosphate isomerase deficiency",
                "Triose phosphate isomerase reaction"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GAL)",
            "Dihydroxyacetone (DHA)",
            "Glycerone",
            "Triose phosphate",
            "3-Phosphoglyceraldehyde",
            "Dihydroxyacetone phosphate (DHAP)",
            "Triose reductase",
            "Triose phosphate isomerase",
            "Triose kinase",
            "Triose-3-phosphate",
            "Triose-3-phosphate dehydrogenase",
            "Triose-3-phosphate isomerase",
            "Triose-3-phosphate kinase",
            "Triose-3-phosphate phosphatase",
            "Triose-3-phosphate reductase",
            "Triose-3-phosphate synthase",
            "Triose-3-phosphate transaminase",
            "Triose-3-phosphate transketolase",
            "Triose-3-phosphate transferase",
            "Triose-3-phosphate transporter",
            "Triose-3-phosphate mutase",
            "Triose-3-phosphate epimerase",
            "Triose-3-phosphate racemase",
            "Triose-3-phosphate decarboxylase",
            "Triose-3-phosphate dehydrogenase (NADP+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GAL)",
                "Dihydroxyacetone (DHA)",
                "Glycerone",
                "Triose phosphate",
                "3-Phosphoglyceraldehyde",
                "Dihydroxyacetone phosphate (DHAP)",
                "Triose reductase",
                "Triose phosphate isomerase",
                "Triose kinase",
                "Triose-3-phosphate",
                "Triose-3-phosphate dehydrogenase",
                "Triose-3-phosphate isomerase",
                "Triose-3-phosphate kinase",
                "Triose-3-phosphate phosphatase",
                "Triose-3-phosphate reductase",
                "Triose-3-phosphate synthase",
                "Triose-3-phosphate transaminase",
                "Triose-3-phosphate transketolase",
                "Triose-3-phosphate transferase",
                "Triose-3-phosphate transporter",
                "Triose-3-phosphate mutase",
                "Triose-3-phosphate epimerase",
                "Triose-3-phosphate racemase",
                "Triose-3-phosphate decarboxylase"
            ],
            "mismatches": [
                "Triose-3-phosphate dehydrogenase (NADP+)"
            ],
            "true_referents": [
                "3-Phosphoglyceraldehyde",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Glyceraldehyde (GAL)",
                "Glycerone",
                "Triose kinase",
                "Triose phosphate",
                "Triose phosphate isomerase",
                "Triose reductase",
                "Triose-3-phosphate",
                "Triose-3-phosphate decarboxylase",
                "Triose-3-phosphate dehydrogenase",
                "Triose-3-phosphate epimerase",
                "Triose-3-phosphate isomerase",
                "Triose-3-phosphate kinase",
                "Triose-3-phosphate mutase",
                "Triose-3-phosphate phosphatase",
                "Triose-3-phosphate racemase",
                "Triose-3-phosphate reductase",
                "Triose-3-phosphate synthase",
                "Triose-3-phosphate transaminase",
                "Triose-3-phosphate transferase",
                "Triose-3-phosphate transketolase",
                "Triose-3-phosphate transporter"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GA)",
            "Dihydroxyacetone (DHA)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Erythrose-4-phosphate (E4P)",
            "Threose (THR)",
            "Erythrulose (ERY)",
            "Ribulose (RIB)",
            "Xylulose (XYL)",
            "D-Glyceraldehyde",
            "L-Glyceraldehyde",
            "D-Dihydroxyacetone",
            "L-Dihydroxyacetone",
            "Fructose-1,6-bisphosphate (FBP) - cleavage product",
            "Sedoheptulose-1,7-bisphosphate (SBP) - cleavage product",
            "Triose phosphate isomerase substrate (TIM)",
            "Calvin cycle intermediate",
            "Glycolysis intermediate",
            "Gluconeogenesis intermediate",
            "Pentose phosphate pathway intermediate",
            "Fructose metabolism intermediate",
            "Polyhydroxybutyrate precursor",
            "Glycerol metabolic pathway component"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GA)",
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Erythrose-4-phosphate (E4P)",
                "Threose (THR)",
                "Erythrulose (ERY)",
                "Ribulose (RIB)",
                "Xylulose (XYL)",
                "D-Glyceraldehyde",
                "L-Glyceraldehyde",
                "D-Dihydroxyacetone",
                "L-Dihydroxyacetone",
                "Fructose-1,6-bisphosphate (FBP) - cleavage product",
                "Sedoheptulose-1,7-bisphosphate (SBP) - cleavage product",
                "Triose phosphate isomerase substrate (TIM)",
                "Calvin cycle intermediate",
                "Glycolysis intermediate",
                "Gluconeogenesis intermediate",
                "Pentose phosphate pathway intermediate",
                "Fructose metabolism intermediate",
                "Polyhydroxybutyrate precursor"
            ],
            "mismatches": [
                "Glycerol metabolic pathway component"
            ],
            "true_referents": [
                "Calvin cycle intermediate",
                "D-Dihydroxyacetone",
                "D-Glyceraldehyde",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose-4-phosphate (E4P)",
                "Erythrulose (ERY)",
                "Fructose metabolism intermediate",
                "Fructose-1,6-bisphosphate (FBP) - cleavage product",
                "Gluconeogenesis intermediate",
                "Glyceraldehyde (GA)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Glycolysis intermediate",
                "L-Dihydroxyacetone",
                "L-Glyceraldehyde",
                "Pentose phosphate pathway intermediate",
                "Polyhydroxybutyrate precursor",
                "Ribulose (RIB)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose-1,7-bisphosphate (SBP) - cleavage product",
                "Threose (THR)",
                "Triose phosphate isomerase substrate (TIM)",
                "Xylulose (XYL)",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (C3H6O3)",
            "Dihydroxyacetone (C3H6O3)",
            "L-Glyceraldehyde",
            "D-Glyceraldehyde",
            "L-Dihydroxyacetone",
            "D-Dihydroxyacetone",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "L-Glyceraldehyde-3-phosphate",
            "D-Glyceraldehyde-3-phosphate",
            "L-Dihydroxyacetone phosphate",
            "D-Dihydroxyacetone phosphate",
            "Glyceraldehyde acetonide",
            "Dihydroxyacetone acetonide",
            "1,3-Dihydroxyacetone",
            "2,3-Dihydroxypropanal",
            "1,3-Dihydroxypropanone",
            "2,3-Dihydroxypropanone",
            "2,3-Dihydroxypropionaldehyde",
            "1,3-Dihydroxypropionaldehyde",
            "3-Hydroxy-2-oxopropanal",
            "3-Hydroxy-2-oxopropanone",
            "2-Hydroxy-3-oxopropanal",
            "2-Hydroxy-3-oxopropanone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (C3H6O3)",
                "Dihydroxyacetone (C3H6O3)",
                "L-Glyceraldehyde",
                "D-Glyceraldehyde",
                "L-Dihydroxyacetone",
                "D-Dihydroxyacetone",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "L-Glyceraldehyde-3-phosphate",
                "D-Glyceraldehyde-3-phosphate",
                "L-Dihydroxyacetone phosphate",
                "D-Dihydroxyacetone phosphate",
                "Glyceraldehyde acetonide",
                "Dihydroxyacetone acetonide",
                "1,3-Dihydroxyacetone",
                "2,3-Dihydroxypropanal",
                "1,3-Dihydroxypropanone",
                "2,3-Dihydroxypropanone",
                "2,3-Dihydroxypropionaldehyde",
                "1,3-Dihydroxypropionaldehyde",
                "3-Hydroxy-2-oxopropanal",
                "3-Hydroxy-2-oxopropanone",
                "2-Hydroxy-3-oxopropanal",
                "2-Hydroxy-3-oxopropanone"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Dihydroxyacetone",
                "1,3-Dihydroxypropanone",
                "1,3-Dihydroxypropionaldehyde",
                "2,3-Dihydroxypropanal",
                "2,3-Dihydroxypropanone",
                "2,3-Dihydroxypropionaldehyde",
                "2-Hydroxy-3-oxopropanal",
                "2-Hydroxy-3-oxopropanone",
                "3-Hydroxy-2-oxopropanal",
                "3-Hydroxy-2-oxopropanone",
                "D-Dihydroxyacetone",
                "D-Dihydroxyacetone phosphate",
                "D-Glyceraldehyde",
                "D-Glyceraldehyde-3-phosphate",
                "Dihydroxyacetone (C3H6O3)",
                "Dihydroxyacetone acetonide",
                "Dihydroxyacetone phosphate (DHAP)",
                "Glyceraldehyde (C3H6O3)",
                "Glyceraldehyde acetonide",
                "Glyceraldehyde-3-phosphate (G3P)",
                "L-Dihydroxyacetone",
                "L-Dihydroxyacetone phosphate",
                "L-Glyceraldehyde",
                "L-Glyceraldehyde-3-phosphate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (C3H6O3)",
            "Dihydroxyacetone (C3H6O3)",
            "D-Glucose (C6H12O6)",
            "D-Fructose (C6H12O6)",
            "L-Glyceraldehyde (C3H6O3)",
            "L-Dihydroxyacetone (C3H6O3)",
            "D-Ribose (C5H10O5)",
            "L-Ribose (C5H10O5)",
            "D-Arabinose (C5H10O5)",
            "D-Xylose (C5H10O5)",
            "D-Glucose-6-phosphate (C6H13O9P)",
            "D-Fructose-6-phosphate (C6H13O9P)",
            "Glycerol-3-phosphate (C3H7O6P)",
            "D-Erythrose (C4H8O4)",
            "D-Threose (C4H8O4)",
            "D-Tagatose (C6H12O6)",
            "D-Sedoheptulose (C7H14O7)",
            "D-Ribulose (C5H10O5)",
            "D-Xylulose (C5H10O5)",
            "Glycerol (C3H8O3)",
            "D-Glucosamine (C6H13N1O5)",
            "D-Mannose (C6H12O6)",
            "D-Galactose (C6H12O6)",
            "D-Sorbitol (C6H14O6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (C3H6O3)",
                "Dihydroxyacetone (C3H6O3)",
                "D-Glucose (C6H12O6)",
                "D-Fructose (C6H12O6)",
                "L-Glyceraldehyde (C3H6O3)",
                "L-Dihydroxyacetone (C3H6O3)",
                "D-Ribose (C5H10O5)",
                "L-Ribose (C5H10O5)",
                "D-Arabinose (C5H10O5)",
                "D-Xylose (C5H10O5)",
                "D-Glucose-6-phosphate (C6H13O9P)",
                "D-Fructose-6-phosphate (C6H13O9P)",
                "Glycerol-3-phosphate (C3H7O6P)",
                "D-Erythrose (C4H8O4)",
                "D-Threose (C4H8O4)",
                "D-Tagatose (C6H12O6)",
                "D-Sedoheptulose (C7H14O7)",
                "D-Ribulose (C5H10O5)",
                "D-Xylulose (C5H10O5)",
                "Glycerol (C3H8O3)",
                "D-Glucosamine (C6H13N1O5)",
                "D-Mannose (C6H12O6)",
                "D-Galactose (C6H12O6)",
                "D-Sorbitol (C6H14O6)"
            ],
            "mismatches": [],
            "true_referents": [
                "D-Arabinose (C5H10O5)",
                "D-Erythrose (C4H8O4)",
                "D-Fructose (C6H12O6)",
                "D-Fructose-6-phosphate (C6H13O9P)",
                "D-Galactose (C6H12O6)",
                "D-Glucosamine (C6H13N1O5)",
                "D-Glucose (C6H12O6)",
                "D-Glucose-6-phosphate (C6H13O9P)",
                "D-Mannose (C6H12O6)",
                "D-Ribose (C5H10O5)",
                "D-Ribulose (C5H10O5)",
                "D-Sedoheptulose (C7H14O7)",
                "D-Sorbitol (C6H14O6)",
                "D-Tagatose (C6H12O6)",
                "D-Threose (C4H8O4)",
                "D-Xylose (C5H10O5)",
                "D-Xylulose (C5H10O5)",
                "Dihydroxyacetone (C3H6O3)",
                "Glyceraldehyde (C3H6O3)",
                "Glycerol (C3H8O3)",
                "Glycerol-3-phosphate (C3H7O6P)",
                "L-Dihydroxyacetone (C3H6O3)",
                "L-Glyceraldehyde (C3H6O3)",
                "L-Ribose (C5H10O5)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (G3P)",
            "Dihydroxyacetone (DHAP)",
            "1,3-Diphosphoglycerate (1,3-BPG)",
            "3-Phosphoglycerate (3PG)",
            "2-Phosphoglycerate (2PG)",
            "Phosphoenolpyruvate (PEP)",
            "1,2-Diphosphoglycerate (1,2-BPG)",
            "1-Phosphoglycerate (1PG)",
            "2,3-Diphosphoglycerate (2,3-BPG)",
            "Glyceraldehyde 3-phosphate (GA3P)",
            "Glyceraldehyde 1-phosphate (GA1P)",
            "Glyceraldehyde 2-phosphate (GA2P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "1,3-Bisphosphoglyceric acid (1,3-BPGA)",
            "3-Phosphoglyceraldehyde (3-PGA)",
            "2-Phosphoglyceraldehyde (2-PGA)",
            "Phosphoenolpyruvic acid (PEPA)",
            "1,2-Bisphosphoglyceric acid (1,2-BPGA)",
            "1-Phosphoglyceraldehyde (1-PGA)",
            "2,3-Bisphosphoglyceric acid (2,3-BPGA)",
            "Glyceraldehyde 3-phosphoric acid (GA3PA)",
            "Glyceraldehyde 1-phosphoric acid (GA1PA)",
            "Glyceraldehyde 2-phosphoric acid (GA2PA)",
            "Dihydroxyacetone phosphoric acid (DHAPA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (G3P)",
                "Dihydroxyacetone (DHAP)",
                "1,3-Diphosphoglycerate (1,3-BPG)",
                "3-Phosphoglycerate (3PG)",
                "2-Phosphoglycerate (2PG)",
                "Phosphoenolpyruvate (PEP)",
                "1,2-Diphosphoglycerate (1,2-BPG)",
                "1-Phosphoglycerate (1PG)",
                "2,3-Diphosphoglycerate (2,3-BPG)",
                "Glyceraldehyde 3-phosphate (GA3P)",
                "Glyceraldehyde 1-phosphate (GA1P)",
                "Glyceraldehyde 2-phosphate (GA2P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "1,3-Bisphosphoglyceric acid (1,3-BPGA)",
                "3-Phosphoglyceraldehyde (3-PGA)",
                "2-Phosphoglyceraldehyde (2-PGA)",
                "Phosphoenolpyruvic acid (PEPA)",
                "1,2-Bisphosphoglyceric acid (1,2-BPGA)",
                "1-Phosphoglyceraldehyde (1-PGA)",
                "2,3-Bisphosphoglyceric acid (2,3-BPGA)",
                "Glyceraldehyde 3-phosphoric acid (GA3PA)",
                "Glyceraldehyde 1-phosphoric acid (GA1PA)",
                "Glyceraldehyde 2-phosphoric acid (GA2PA)",
                "Dihydroxyacetone phosphoric acid (DHAPA)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Bisphosphoglyceric acid (1,2-BPGA)",
                "1,2-Diphosphoglycerate (1,2-BPG)",
                "1,3-Bisphosphoglyceric acid (1,3-BPGA)",
                "1,3-Diphosphoglycerate (1,3-BPG)",
                "1-Phosphoglyceraldehyde (1-PGA)",
                "1-Phosphoglycerate (1PG)",
                "2,3-Bisphosphoglyceric acid (2,3-BPGA)",
                "2,3-Diphosphoglycerate (2,3-BPG)",
                "2-Phosphoglyceraldehyde (2-PGA)",
                "2-Phosphoglycerate (2PG)",
                "3-Phosphoglyceraldehyde (3-PGA)",
                "3-Phosphoglycerate (3PG)",
                "Dihydroxyacetone (DHAP)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Dihydroxyacetone phosphoric acid (DHAPA)",
                "Glyceraldehyde (G3P)",
                "Glyceraldehyde 1-phosphate (GA1P)",
                "Glyceraldehyde 1-phosphoric acid (GA1PA)",
                "Glyceraldehyde 2-phosphate (GA2P)",
                "Glyceraldehyde 2-phosphoric acid (GA2PA)",
                "Glyceraldehyde 3-phosphate (GA3P)",
                "Glyceraldehyde 3-phosphoric acid (GA3PA)",
                "Phosphoenolpyruvate (PEP)",
                "Phosphoenolpyruvic acid (PEPA)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GADP)",
            "Dihydroxyacetone (DHA)",
            "D-Glyceraldehyde",
            "Dihydroxyacetone phosphate (DHAP)",
            "Erythrose",
            "Erythrose-4-phosphate (E4P)",
            "Threose",
            "Threose-4-phosphate (T4P)",
            "Ribose",
            "Ribose-5-phosphate (R5P)",
            "Xylulose",
            "Xylulose-5-phosphate (X5P)",
            "Lyxose",
            "Lyxose-5-phosphate (L5P)",
            "Arabinose",
            "Arabinose-5-phosphate (A5P)",
            "Ribulose",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylose",
            "Xylose-5-phosphate (Xu5P)",
            "Allose",
            "Allose-6-phosphate (A6P)",
            "Altrose",
            "Altrose-6-phosphate (Alt6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GADP)",
                "Dihydroxyacetone (DHA)",
                "D-Glyceraldehyde",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose",
                "Erythrose-4-phosphate (E4P)",
                "Threose",
                "Threose-4-phosphate (T4P)",
                "Ribose",
                "Ribose-5-phosphate (R5P)",
                "Xylulose",
                "Xylulose-5-phosphate (X5P)",
                "Lyxose",
                "Lyxose-5-phosphate (L5P)",
                "Arabinose",
                "Arabinose-5-phosphate (A5P)",
                "Ribulose",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylose",
                "Xylose-5-phosphate (Xu5P)",
                "Allose",
                "Allose-6-phosphate (A6P)",
                "Altrose",
                "Altrose-6-phosphate (Alt6P)"
            ],
            "mismatches": [],
            "true_referents": [
                "Allose",
                "Allose-6-phosphate (A6P)",
                "Altrose",
                "Altrose-6-phosphate (Alt6P)",
                "Arabinose",
                "Arabinose-5-phosphate (A5P)",
                "D-Glyceraldehyde",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose",
                "Erythrose-4-phosphate (E4P)",
                "Glyceraldehyde (GADP)",
                "Lyxose",
                "Lyxose-5-phosphate (L5P)",
                "Ribose",
                "Ribose-5-phosphate (R5P)",
                "Ribulose",
                "Ribulose-5-phosphate (Ru5P)",
                "Threose",
                "Threose-4-phosphate (T4P)",
                "Xylose",
                "Xylose-5-phosphate (Xu5P)",
                "Xylulose",
                "Xylulose-5-phosphate (X5P)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (G3A)",
            "Dihydroxyacetone (DHA)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Erythrose",
            "Threose",
            "Erythrulose",
            "Allose",
            "Glycolaldehyde",
            "Glycolaldehyde phosphate",
            "Glyceraldehyde-1-phosphate",
            "Glyceraldehyde-2-phosphate",
            "Triose",
            "Triose phosphate",
            "L-Glyceraldehyde",
            "D-Glyceraldehyde",
            "L-Threose",
            "D-Threose",
            "L-Erythrose",
            "D-Erythrose",
            "Mesotriose",
            "Leptotriose",
            "Panotriose",
            "Phosphoglycolate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (G3A)",
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose",
                "Threose",
                "Erythrulose",
                "Allose",
                "Glycolaldehyde",
                "Glycolaldehyde phosphate",
                "Glyceraldehyde-1-phosphate",
                "Glyceraldehyde-2-phosphate",
                "Triose",
                "Triose phosphate",
                "L-Glyceraldehyde",
                "D-Glyceraldehyde",
                "L-Threose",
                "D-Threose",
                "L-Erythrose",
                "D-Erythrose",
                "Mesotriose",
                "Leptotriose",
                "Panotriose",
                "Phosphoglycolate"
            ],
            "mismatches": [],
            "true_referents": [
                "Allose",
                "D-Erythrose",
                "D-Glyceraldehyde",
                "D-Threose",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose",
                "Erythrulose",
                "Glyceraldehyde (G3A)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde-1-phosphate",
                "Glyceraldehyde-2-phosphate",
                "Glycolaldehyde",
                "Glycolaldehyde phosphate",
                "L-Erythrose",
                "L-Glyceraldehyde",
                "L-Threose",
                "Leptotriose",
                "Mesotriose",
                "Panotriose",
                "Phosphoglycolate",
                "Threose",
                "Triose",
                "Triose phosphate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GLA)",
            "Dihydroxyacetone (DHA)",
            "1,3-Dihydroxyacetone phosphate (DHAP)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "3-Phosphoglyceraldehyde (3PGA)",
            "1,3-Bisphosphoglycerate (1,3-BPG)",
            "Methylglyoxal",
            "Glycerol",
            "Propionaldehyde",
            "Acetol",
            "Hydroxyacetone",
            "Lactaldehyde",
            "3-Hydroxypropionaldehyde",
            "Glyceraldehyde 2-phosphate",
            "2-Phosphoglyceraldehyde",
            "1-Phosphoglyceraldehyde",
            "Glycerone",
            "3-Hydroxypropanal",
            "2,3-Dihydroxypropanal",
            "1,2-Dihydroxypropanal",
            "3-Hydroxy-2-oxopropanal",
            "2-Hydroxy-3-oxopropanal",
            "1-Hydroxy-3-oxopropanal",
            "Triose reductone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GLA)",
                "Dihydroxyacetone (DHA)",
                "1,3-Dihydroxyacetone phosphate (DHAP)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "3-Phosphoglyceraldehyde (3PGA)",
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "Methylglyoxal",
                "Glycerol",
                "Propionaldehyde",
                "Acetol",
                "Hydroxyacetone",
                "Lactaldehyde",
                "3-Hydroxypropionaldehyde",
                "Glyceraldehyde 2-phosphate",
                "2-Phosphoglyceraldehyde",
                "1-Phosphoglyceraldehyde",
                "Glycerone",
                "3-Hydroxypropanal",
                "2,3-Dihydroxypropanal",
                "1,2-Dihydroxypropanal",
                "3-Hydroxy-2-oxopropanal",
                "2-Hydroxy-3-oxopropanal",
                "1-Hydroxy-3-oxopropanal",
                "Triose reductone"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dihydroxypropanal",
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "1,3-Dihydroxyacetone phosphate (DHAP)",
                "1-Hydroxy-3-oxopropanal",
                "1-Phosphoglyceraldehyde",
                "2,3-Dihydroxypropanal",
                "2-Hydroxy-3-oxopropanal",
                "2-Phosphoglyceraldehyde",
                "3-Hydroxy-2-oxopropanal",
                "3-Hydroxypropanal",
                "3-Hydroxypropionaldehyde",
                "3-Phosphoglyceraldehyde (3PGA)",
                "Acetol",
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde (GLA)",
                "Glyceraldehyde 2-phosphate",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glycerol",
                "Glycerone",
                "Hydroxyacetone",
                "Lactaldehyde",
                "Methylglyoxal",
                "Propionaldehyde",
                "Triose reductone"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GA)",
            "Dihydroxyacetone (DHA)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "1,3-Bisphosphoglycerate (1,3-BPG)",
            "Glycerol",
            "Glycerol 3-phosphate",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Triosephosphate isomerase (TPI)",
            "Fructose 1,6-bisphosphate aldolase (ALDO)",
            "Glycerol-3-phosphate dehydrogenase (GPD)",
            "Glycerol kinase (GK)",
            "Triose phosphate translocator (TPT)",
            "Triose phosphate utilization (TPU)",
            "Methylglyoxal",
            "Pyruvaldehyde",
            "Lactaldehyde",
            "Hydroxypyruvaldehyde",
            "Glyceraldehyde-3-phosphate acetyltransferase (GAPN)",
            "Triose-phosphate isomerase deficiency",
            "Glycerol-3-phosphate shuttle",
            "Glycerol-3-phosphate acyltransferase (GPAT)",
            "Glyceraldehyde-derived advanced glycation end-products (AGEs)",
            "Triose model of Alzheimer's disease"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GA)",
                "Dihydroxyacetone (DHA)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "Glycerol",
                "Glycerol 3-phosphate",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Triosephosphate isomerase (TPI)",
                "Fructose 1,6-bisphosphate aldolase (ALDO)",
                "Glycerol-3-phosphate dehydrogenase (GPD)",
                "Glycerol kinase (GK)",
                "Triose phosphate translocator (TPT)",
                "Triose phosphate utilization (TPU)",
                "Methylglyoxal",
                "Pyruvaldehyde",
                "Lactaldehyde",
                "Hydroxypyruvaldehyde",
                "Glyceraldehyde-3-phosphate acetyltransferase (GAPN)",
                "Triose-phosphate isomerase deficiency",
                "Glycerol-3-phosphate shuttle",
                "Glycerol-3-phosphate acyltransferase (GPAT)",
                "Glyceraldehyde-derived advanced glycation end-products (AGEs)",
                "Triose model of Alzheimer's disease"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Fructose 1,6-bisphosphate aldolase (ALDO)",
                "Glyceraldehyde (GA)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Glyceraldehyde-3-phosphate acetyltransferase (GAPN)",
                "Glyceraldehyde-derived advanced glycation end-products (AGEs)",
                "Glycerol",
                "Glycerol 3-phosphate",
                "Glycerol kinase (GK)",
                "Glycerol-3-phosphate acyltransferase (GPAT)",
                "Glycerol-3-phosphate dehydrogenase (GPD)",
                "Glycerol-3-phosphate shuttle",
                "Hydroxypyruvaldehyde",
                "Lactaldehyde",
                "Methylglyoxal",
                "Pyruvaldehyde",
                "Triose model of Alzheimer's disease",
                "Triose phosphate translocator (TPT)",
                "Triose phosphate utilization (TPU)",
                "Triose-phosphate isomerase deficiency",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (GA)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Dihydroxyacetone (DHA)",
            "Dihydroxyacetone phosphate (DHAP)",
            "1,3-Bisphosphoglycerate (1,3-BPG)",
            "3-Phosphoglycerate (3-PG)",
            "2-Phosphoglycerate (2-PG)",
            "Phosphoenolpyruvate (PEP)",
            "Methylglyoxal",
            "D-Glyceraldehyde",
            "D-Erythrulose",
            "D-Ribulose",
            "D-Xylulose",
            "L-Glyceraldehyde",
            "L-Erythrulose",
            "L-Ribulose",
            "L-Xylulose",
            "Sedoheptulose 1,7-bisphosphate",
            "Sedoheptulose 7-phosphate",
            "Fructose 1,6-bisphosphate",
            "Fructose 6-phosphate",
            "Glucose 6-phosphate",
            "Erythrose 4-phosphate",
            "Ribulose 5-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (GA)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "3-Phosphoglycerate (3-PG)",
                "2-Phosphoglycerate (2-PG)",
                "Phosphoenolpyruvate (PEP)",
                "Methylglyoxal",
                "D-Glyceraldehyde",
                "D-Erythrulose",
                "D-Ribulose",
                "D-Xylulose",
                "L-Glyceraldehyde",
                "L-Erythrulose",
                "L-Ribulose",
                "L-Xylulose",
                "Sedoheptulose 1,7-bisphosphate",
                "Sedoheptulose 7-phosphate",
                "Fructose 1,6-bisphosphate",
                "Fructose 6-phosphate",
                "Glucose 6-phosphate",
                "Erythrose 4-phosphate",
                "Ribulose 5-phosphate"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "2-Phosphoglycerate (2-PG)",
                "3-Phosphoglycerate (3-PG)",
                "D-Erythrulose",
                "D-Glyceraldehyde",
                "D-Ribulose",
                "D-Xylulose",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose 4-phosphate",
                "Fructose 1,6-bisphosphate",
                "Fructose 6-phosphate",
                "Glucose 6-phosphate",
                "Glyceraldehyde (GA)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "L-Erythrulose",
                "L-Glyceraldehyde",
                "L-Ribulose",
                "L-Xylulose",
                "Methylglyoxal",
                "Phosphoenolpyruvate (PEP)",
                "Ribulose 5-phosphate",
                "Sedoheptulose 1,7-bisphosphate",
                "Sedoheptulose 7-phosphate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "Glyceraldehyde (Gly)",
            "Dihydroxyacetone (DHA)",
            "Glycerone (Gce)",
            "Glycerol-3-phosphate (G3P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Erythrulose",
            "Threose",
            "Erythrulose-1-phosphate",
            "Glyceraldehyde-2-phosphate",
            "Glyceraldehyde-1,3-bisphosphate",
            "Glycerol-1-phosphate",
            "Glycerol-2-phosphate",
            "Glycerone-1-phosphate",
            "Glycerone-2-phosphate",
            "Glyceraldehyde-1-phosphate",
            "Glyceraldehyde-2,3-bisphosphate",
            "Glycerol-1,2-bisphosphate",
            "Glycerol-1,3-bisphosphate",
            "Glycerone-1,3-bisphosphate",
            "Glyceraldehyde-1,2,3-trisphosphate",
            "Glycerol-1,2,3-trisphosphate",
            "Glycerone-1,2,3-trisphosphate",
            "Erythrulose-1,4-bisphosphate",
            "Threose-1,4-bisphosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde (Gly)",
                "Dihydroxyacetone (DHA)",
                "Glycerone (Gce)",
                "Glycerol-3-phosphate (G3P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Erythrulose",
                "Threose",
                "Erythrulose-1-phosphate",
                "Glyceraldehyde-2-phosphate",
                "Glyceraldehyde-1,3-bisphosphate",
                "Glycerol-1-phosphate",
                "Glycerol-2-phosphate",
                "Glycerone-1-phosphate",
                "Glycerone-2-phosphate",
                "Glyceraldehyde-1-phosphate",
                "Glyceraldehyde-2,3-bisphosphate",
                "Glycerol-1,2-bisphosphate",
                "Glycerol-1,3-bisphosphate",
                "Glycerone-1,3-bisphosphate",
                "Glyceraldehyde-1,2,3-trisphosphate",
                "Glycerol-1,2,3-trisphosphate",
                "Glycerone-1,2,3-trisphosphate",
                "Erythrulose-1,4-bisphosphate",
                "Threose-1,4-bisphosphate"
            ],
            "mismatches": [],
            "true_referents": [
                "Dihydroxyacetone (DHA)",
                "Erythrulose",
                "Erythrulose-1,4-bisphosphate",
                "Erythrulose-1-phosphate",
                "Glyceraldehyde (Gly)",
                "Glyceraldehyde-1,2,3-trisphosphate",
                "Glyceraldehyde-1,3-bisphosphate",
                "Glyceraldehyde-1-phosphate",
                "Glyceraldehyde-2,3-bisphosphate",
                "Glyceraldehyde-2-phosphate",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Glycerol-1,2,3-trisphosphate",
                "Glycerol-1,2-bisphosphate",
                "Glycerol-1,3-bisphosphate",
                "Glycerol-1-phosphate",
                "Glycerol-2-phosphate",
                "Glycerol-3-phosphate (G3P)",
                "Glycerone (Gce)",
                "Glycerone-1,2,3-trisphosphate",
                "Glycerone-1,3-bisphosphate",
                "Glycerone-1-phosphate",
                "Glycerone-2-phosphate",
                "Threose",
                "Threose-1,4-bisphosphate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "D-Ribulose (Rib)",
            "D-Arabinose (Ara)",
            "D-Xylulose (Xyl)",
            "D-Ructose (Fru)",
            "D-Glucose (Glc)",
            "D-Galactose (Gal)",
            "D-Mannose (Man)",
            "D-Allose (Allo)",
            "D-Talose (Tal)",
            "D-Fructose-3-phosphate",
            "D-Ribulose-5-phosphate",
            "D-Ribose-5-phosphate",
            "D-Xylulose-5-phosphate",
            "D-Glucose-6-phosphate",
            "D-Glucose-1-phosphate",
            "D-Glucose-3-phosphate",
            "D-Glucose-6-phosphatase",
            "D-Ribose-1,5-bisphosphate",
            "D-Ribose-5-phosphate isomerase",
            "D-Xylulose reductase",
            "D-Fructose-1,6-bisphosphate",
            "D-Fructose-2,6-bisphosphate",
            "D-Ribose-1-phosphate",
            "D-Ribose-2,5-bisphosphate",
            "D-Xylulose-1-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Ribulose (Rib)",
                "D-Arabinose (Ara)",
                "D-Xylulose (Xyl)",
                "D-Ructose (Fru)",
                "D-Glucose (Glc)",
                "D-Galactose (Gal)",
                "D-Mannose (Man)",
                "D-Allose (Allo)",
                "D-Talose (Tal)",
                "D-Fructose-3-phosphate",
                "D-Ribulose-5-phosphate",
                "D-Ribose-5-phosphate",
                "D-Xylulose-5-phosphate",
                "D-Glucose-6-phosphate",
                "D-Glucose-1-phosphate",
                "D-Glucose-3-phosphate",
                "D-Glucose-6-phosphatase",
                "D-Ribose-1,5-bisphosphate",
                "D-Ribose-5-phosphate isomerase",
                "D-Xylulose reductase",
                "D-Fructose-1,6-bisphosphate",
                "D-Fructose-2,6-bisphosphate",
                "D-Ribose-1-phosphate",
                "D-Ribose-2,5-bisphosphate"
            ],
            "mismatches": [
                "D-Xylulose-1-phosphate"
            ],
            "true_referents": [
                "D-Allose (Allo)",
                "D-Arabinose (Ara)",
                "D-Fructose-1,6-bisphosphate",
                "D-Fructose-2,6-bisphosphate",
                "D-Fructose-3-phosphate",
                "D-Galactose (Gal)",
                "D-Glucose (Glc)",
                "D-Glucose-1-phosphate",
                "D-Glucose-3-phosphate",
                "D-Glucose-6-phosphatase",
                "D-Glucose-6-phosphate",
                "D-Mannose (Man)",
                "D-Ribose-1,5-bisphosphate",
                "D-Ribose-1-phosphate",
                "D-Ribose-2,5-bisphosphate",
                "D-Ribose-5-phosphate",
                "D-Ribose-5-phosphate isomerase",
                "D-Ribulose (Rib)",
                "D-Ribulose-5-phosphate",
                "D-Ructose (Fru)",
                "D-Talose (Tal)",
                "D-Xylulose (Xyl)",
                "D-Xylulose reductase",
                "D-Xylulose-5-phosphate"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            "D-Glyceraldehyde (GAL)",
            "L-Glyceraldehyde",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Dihydroxyacetone (DHA)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Triosephosphate (TP)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) (GAPN)",
            "Triosephosphate isomerase (TPI)",
            "Triosephosphate isomerase 1 (TPI1)",
            "Aldolase A (ALDOA)",
            "Aldolase B (ALDOB)",
            "Aldolase C (ALDOC)",
            "Fructose-bisphosphate aldolase (ALDOL)",
            "Glyceraldehyde",
            "D-Glyceraldehyde",
            "L-Glyceraldehyde",
            "Triose sugar",
            "Triose phosphate pathway",
            "Triose phosphate isomerase deficiency",
            "Triosephosphate isomerase 1 deficiency",
            "Glyceraldehyde 3-phosphate dehydrogenase deficiency",
            "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Glyceraldehyde (GAL)",
                "L-Glyceraldehyde",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Triosephosphate (TP)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) (GAPN)",
                "Triosephosphate isomerase (TPI)",
                "Triosephosphate isomerase 1 (TPI1)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Fructose-bisphosphate aldolase (ALDOL)",
                "Glyceraldehyde",
                "Triose sugar",
                "Triose phosphate pathway",
                "Triose phosphate isomerase deficiency",
                "Triosephosphate isomerase 1 deficiency",
                "Glyceraldehyde 3-phosphate dehydrogenase deficiency",
                "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "D-Glyceraldehyde",
                "D-Glyceraldehyde (GAL)",
                "Dihydroxyacetone (DHA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Fructose-bisphosphate aldolase (ALDOL)",
                "Glyceraldehyde",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) (GAPN)",
                "Glyceraldehyde 3-phosphate dehydrogenase (NADP+) deficiency",
                "Glyceraldehyde 3-phosphate dehydrogenase deficiency",
                "Glyceraldehyde-3-phosphate (G3P)",
                "L-Glyceraldehyde",
                "Triose phosphate isomerase deficiency",
                "Triose phosphate pathway",
                "Triose sugar",
                "Triosephosphate (TP)",
                "Triosephosphate isomerase (TPI)",
                "Triosephosphate isomerase 1 (TPI1)",
                "Triosephosphate isomerase 1 deficiency"
            ],
            "TP": 21,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "triose",
        "domain": "chemistry",
        "response": [
            {
                "Term": "Glyceraldehyde",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedoheptulose 7-phosphate",
                "Abbreviated identifier": "S7P"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            },
            {
                "Term": "Glycerone phosphate",
                "Abbreviated identifier": "G3P"
            },
            {
                "Term": "Dihydroxyacetone phosphate",
                "Abbreviated identifier": "DHAP"
            },
            {
                "Term": "Sedolactose",
                "Abbreviated identifier": "SL"
            },
            {
                "Term": "Erythrose 4-phosphate",
                "Abbreviated identifier": "E4P"
            },
            {
                "Term": "Fructose 6-phosphate",
                "Abbreviated identifier": "F6P"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glyceraldehyde",
                "Dihydroxyacetone phosphate",
                "Sedoheptulose 7-phosphate",
                "Erythrose 4-phosphate",
                "Fructose 6-phosphate",
                "Glycerone phosphate",
                "Sedolactose"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Term\": \"Dihydroxyacetone phosphate\", \"Abbreviated identifier\": \"DHAP\"}",
                "{\"Term\": \"Erythrose 4-phosphate\", \"Abbreviated identifier\": \"E4P\"}",
                "{\"Term\": \"Fructose 6-phosphate\", \"Abbreviated identifier\": \"F6P\"}",
                "{\"Term\": \"Glyceraldehyde\", \"Abbreviated identifier\": \"G3P\"}",
                "{\"Term\": \"Glycerone phosphate\", \"Abbreviated identifier\": \"G3P\"}",
                "{\"Term\": \"Sedoheptulose 7-phosphate\", \"Abbreviated identifier\": \"S7P\"}",
                "{\"Term\": \"Sedolactose\", \"Abbreviated identifier\": \"SL\"}"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 0
        }
    }
]