[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Ribulose 5-phosphate (R5P)",
            "Xylulose 5-phosphate (X5P)",
            "Ribose 5-phosphate (R5P)",
            "Erythrose 4-phosphate (E4P)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Sedoheptulose 7-phosphate (S7P)",
            "D-fructose 6-phosphate",
            "D-glyceraldehyde 3-phosphate",
            "D-ribose 5-phosphate",
            "D-xylulose 5-phosphate",
            "D-ribulose 5-phosphate",
            "D-arabinose 5-phosphate",
            "Erythrulose 4-phosphate",
            "D-sedoheptulose 7-phosphate",
            "D-ribulose 5-phosphate 3-epimerase",
            "D-arabinose 5-phosphate isomerase",
            "Transketolase",
            "Transaldolase",
            "D-fructose 6-phosphate 1-phosphatase",
            "D-fructose 6-phosphate kinase",
            "D-ribulose 5-phosphate 4-epimerase",
            "D-xylulose 5-phosphate reductoisomerase",
            "D-ribulose 5-phosphate 3-epimerase",
            "D-arabinose 5-phosphate isomerase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose 5-phosphate (R5P)",
                "Xylulose 5-phosphate (X5P)",
                "Ribose 5-phosphate (R5P)",
                "Erythrose 4-phosphate (E4P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "D-fructose 6-phosphate",
                "D-glyceraldehyde 3-phosphate",
                "D-ribose 5-phosphate",
                "D-xylulose 5-phosphate",
                "D-ribulose 5-phosphate",
                "D-arabinose 5-phosphate",
                "Erythrulose 4-phosphate",
                "D-sedoheptulose 7-phosphate",
                "D-ribulose 5-phosphate 3-epimerase",
                "D-arabinose 5-phosphate isomerase",
                "Transketolase",
                "Transaldolase",
                "D-fructose 6-phosphate 1-phosphatase",
                "D-fructose 6-phosphate kinase",
                "D-ribulose 5-phosphate 4-epimerase",
                "D-xylulose 5-phosphate reductoisomerase"
            ],
            "mismatches": [],
            "true_referents": [
                "D-arabinose 5-phosphate",
                "D-arabinose 5-phosphate isomerase",
                "D-fructose 6-phosphate",
                "D-fructose 6-phosphate 1-phosphatase",
                "D-fructose 6-phosphate kinase",
                "D-glyceraldehyde 3-phosphate",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate",
                "D-ribulose 5-phosphate 3-epimerase",
                "D-ribulose 5-phosphate 4-epimerase",
                "D-sedoheptulose 7-phosphate",
                "D-xylulose 5-phosphate",
                "D-xylulose 5-phosphate reductoisomerase",
                "Erythrose 4-phosphate (E4P)",
                "Erythrulose 4-phosphate",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Ribose 5-phosphate (R5P)",
                "Ribulose 5-phosphate (R5P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "Transaldolase",
                "Transketolase",
                "Xylulose 5-phosphate (X5P)"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Ribulose-5-phosphate (Ru5P)",
            "Ribose-5-phosphate (R5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrose-4-phosphate (E4P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Fructose-6-phosphate (F6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose-5-phosphate (Ru5P)",
                "Ribose-5-phosphate (R5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Erythrose-4-phosphate (E4P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Fructose-6-phosphate (F6P)"
            ],
            "mismatches": [],
            "true_referents": [
                "Erythrose-4-phosphate (E4P)",
                "Fructose-6-phosphate (F6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Ribose 5-phosphate (R5P)",
            "Ribulose 5-phosphate (Ru5P)",
            "Xylulose 5-phosphate (Xu5P)",
            "Sedoheptulose 7-phosphate (S7P)",
            "6-Phosphogluconolactone",
            "6-Phosphogluconate",
            "Fructose 6-phosphate (F6P)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Erythrose 4-phosphate (E4P)",
            "Glyceraldehyde",
            "Dihydroxyacetone phosphate (DHAP)",
            "Sedoheptulose 1,7-bisphosphate",
            "Transketolase",
            "Transaldolase",
            "Glucose 6-phosphate (G6P)",
            "6-Phosphogluconate dehydrogenase",
            "Glucose-6-phosphate dehydrogenase (G6PD)",
            "Phosphogluconolactonase",
            "6-Phosphogluconate dehydrogenase",
            "Ribulose 5-phosphate 3-epimerase",
            "Ribose 5-phosphate isomerase",
            "Transketolase",
            "Transaldolase",
            "Phosphoribosyl pyrophosphate (PRPP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose 5-phosphate (R5P)",
                "Ribulose 5-phosphate (Ru5P)",
                "Xylulose 5-phosphate (Xu5P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "6-Phosphogluconolactone",
                "6-Phosphogluconate",
                "Fructose 6-phosphate (F6P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Erythrose 4-phosphate (E4P)",
                "Glyceraldehyde",
                "Dihydroxyacetone phosphate (DHAP)",
                "Sedoheptulose 1,7-bisphosphate",
                "Transketolase",
                "Transaldolase",
                "Glucose 6-phosphate (G6P)",
                "6-Phosphogluconate dehydrogenase",
                "Glucose-6-phosphate dehydrogenase (G6PD)",
                "Phosphogluconolactonase",
                "Ribulose 5-phosphate 3-epimerase",
                "Ribose 5-phosphate isomerase",
                "Phosphoribosyl pyrophosphate (PRPP)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconate",
                "6-Phosphogluconate dehydrogenase",
                "6-Phosphogluconolactone",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose 4-phosphate (E4P)",
                "Fructose 6-phosphate (F6P)",
                "Glucose 6-phosphate (G6P)",
                "Glucose-6-phosphate dehydrogenase (G6PD)",
                "Glyceraldehyde",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Phosphogluconolactonase",
                "Phosphoribosyl pyrophosphate (PRPP)",
                "Ribose 5-phosphate (R5P)",
                "Ribose 5-phosphate isomerase",
                "Ribulose 5-phosphate (Ru5P)",
                "Ribulose 5-phosphate 3-epimerase",
                "Sedoheptulose 1,7-bisphosphate",
                "Sedoheptulose 7-phosphate (S7P)",
                "Transaldolase",
                "Transketolase",
                "Xylulose 5-phosphate (Xu5P)"
            ],
            "TP": 21,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate (G6P)",
            "6-Phosphoglucono-\u03b4-lactone (6-PGL)",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Ribose-5-phosphate (R5P)",
            "Fructose-6-phosphate (F6P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Erythrose-4-phosphate (E4P)",
            "Sedoheptulose-1,7-bisphosphate (S7BP)",
            "Fructose-1,6-bisphosphate (F1,6BP)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Phosphoglycerate (PGA)",
            "3-Phosphoglycerate (3PG)",
            "1,3-Bisphosphoglycerate (1,3BPG)",
            "Phosphoenolpyruvate (PEP)",
            "Pyruvate (Pyr)",
            "Acetyl-CoA (AcCoA)",
            "NADP+ (NADP)",
            "NADPH (NADPH)",
            "Glucose (Glc)",
            "Fructose (Frc)",
            "Mannose (Man)",
            "Galactose (Gal)",
            "Xylose (Xyl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate (G6P)",
                "6-Phosphoglucono-\u03b4-lactone (6-PGL)",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Ribose-5-phosphate (R5P)",
                "Fructose-6-phosphate (F6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Erythrose-4-phosphate (E4P)",
                "Sedoheptulose-1,7-bisphosphate (S7BP)",
                "Fructose-1,6-bisphosphate (F1,6BP)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Phosphoglycerate (PGA)",
                "3-Phosphoglycerate (3PG)",
                "1,3-Bisphosphoglycerate (1,3BPG)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate (Pyr)",
                "Acetyl-CoA (AcCoA)",
                "NADP+ (NADP)",
                "NADPH (NADPH)",
                "Glucose (Glc)",
                "Fructose (Frc)",
                "Mannose (Man)",
                "Galactose (Gal)",
                "Xylose (Xyl)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglycerate (1,3BPG)",
                "3-Phosphoglycerate (3PG)",
                "6-Phosphoglucono-\u03b4-lactone (6-PGL)",
                "Acetyl-CoA (AcCoA)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose-4-phosphate (E4P)",
                "Fructose (Frc)",
                "Fructose-1,6-bisphosphate (F1,6BP)",
                "Fructose-6-phosphate (F6P)",
                "Galactose (Gal)",
                "Glucose (Glc)",
                "Glucose-6-phosphate (G6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Mannose (Man)",
                "NADP+ (NADP)",
                "NADPH (NADPH)",
                "Phosphoenolpyruvate (PEP)",
                "Phosphoglycerate (PGA)",
                "Pyruvate (Pyr)",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose-1,7-bisphosphate (S7BP)",
                "Xylose (Xyl)",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "6-Phosphogluconolactone (6PGL)",
            "6-Phosphogluconate (6PG)",
            "Ribulose-5-phosphate (Ru5P)",
            "Ribose-5-phosphate (R5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrose-4-phosphate (E4P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Fructose-6-phosphate (F6P)",
            "NADPH",
            "NADP+",
            "CO2",
            "ATP",
            "ADP",
            "Pi (Inorganic Phosphate)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Gluconate",
            "Gluconolactone",
            "Phosphoribosyl pyrophosphate (PRPP)",
            "Sedoheptulose",
            "Erythrose",
            "Ribulose",
            "Ribose",
            "Xylulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6-Phosphogluconolactone (6PGL)",
                "6-Phosphogluconate (6PG)",
                "Ribulose-5-phosphate (Ru5P)",
                "Ribose-5-phosphate (R5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Erythrose-4-phosphate (E4P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Fructose-6-phosphate (F6P)",
                "NADPH",
                "NADP+",
                "CO2",
                "ATP",
                "ADP",
                "Pi (Inorganic Phosphate)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Gluconate",
                "Gluconolactone",
                "Phosphoribosyl pyrophosphate (PRPP)",
                "Sedoheptulose",
                "Erythrose",
                "Ribulose",
                "Ribose",
                "Xylulose"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconate (6PG)",
                "6-Phosphogluconolactone (6PGL)",
                "ADP",
                "ATP",
                "CO2",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose",
                "Erythrose-4-phosphate (E4P)",
                "Fructose-6-phosphate (F6P)",
                "Gluconate",
                "Gluconolactone",
                "Glyceraldehyde-3-phosphate (G3P)",
                "NADP+",
                "NADPH",
                "Phosphoribosyl pyrophosphate (PRPP)",
                "Pi (Inorganic Phosphate)",
                "Ribose",
                "Ribose-5-phosphate (R5P)",
                "Ribulose",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose",
                "Sedoheptulose-7-phosphate (S7P)",
                "Xylulose",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Ribulose-5-phosphate (Ru5P)",
            "Ribose-5-phosphate (R5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Fructose-6-phosphate (F6P)",
            "Erythrose-4-phosphate (E4P)",
            "Sedulose-7-phosphate (S7P)",
            "Transaldolase product (TAP)",
            "Transketolase product (TKP)",
            "D-ribulose (D-Ribulose)",
            "D-xylulose (D-Xylulose)",
            "D-erythrose (D-Erythrose)",
            "D-glyceraldehyde (D-Glyceraldehyde)",
            "D-fructose (D-Fructose)",
            "D-sedoheptulose (D-Sedoheptulose)",
            "D-xylose (D-Xylose)",
            "D-ribose (D-Ribose)",
            "D-sedoheptulose-7-phosphate (D-S7P)",
            "D-xylulose-5-phosphate (D-Xu5P)",
            "D-ribulose-5-phosphate (D-Ru5P)",
            "D-erythrose-4-phosphate (D-E4P)",
            "D-fructose-6-phosphate (D-F6P)",
            "D-glyceraldehyde-3-phosphate (D-G3P)",
            "D-sedoheptulose-7-phosphate (D-S7P)",
            "D-xylulose-5-phosphate (D-Xu5P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose-5-phosphate (Ru5P)",
                "Ribose-5-phosphate (R5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Fructose-6-phosphate (F6P)",
                "Erythrose-4-phosphate (E4P)",
                "Sedulose-7-phosphate (S7P)",
                "Transaldolase product (TAP)",
                "Transketolase product (TKP)",
                "D-ribulose (D-Ribulose)",
                "D-xylulose (D-Xylulose)",
                "D-erythrose (D-Erythrose)",
                "D-glyceraldehyde (D-Glyceraldehyde)",
                "D-fructose (D-Fructose)",
                "D-sedoheptulose (D-Sedoheptulose)",
                "D-xylose (D-Xylose)",
                "D-ribose (D-Ribose)",
                "D-sedoheptulose-7-phosphate (D-S7P)",
                "D-xylulose-5-phosphate (D-Xu5P)",
                "D-ribulose-5-phosphate (D-Ru5P)",
                "D-erythrose-4-phosphate (D-E4P)",
                "D-fructose-6-phosphate (D-F6P)",
                "D-glyceraldehyde-3-phosphate (D-G3P)"
            ],
            "mismatches": [],
            "true_referents": [
                "D-erythrose (D-Erythrose)",
                "D-erythrose-4-phosphate (D-E4P)",
                "D-fructose (D-Fructose)",
                "D-fructose-6-phosphate (D-F6P)",
                "D-glyceraldehyde (D-Glyceraldehyde)",
                "D-glyceraldehyde-3-phosphate (D-G3P)",
                "D-ribose (D-Ribose)",
                "D-ribulose (D-Ribulose)",
                "D-ribulose-5-phosphate (D-Ru5P)",
                "D-sedoheptulose (D-Sedoheptulose)",
                "D-sedoheptulose-7-phosphate (D-S7P)",
                "D-xylose (D-Xylose)",
                "D-xylulose (D-Xylulose)",
                "D-xylulose-5-phosphate (D-Xu5P)",
                "Erythrose-4-phosphate (E4P)",
                "Fructose-6-phosphate (F6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedulose-7-phosphate (S7P)",
                "Transaldolase product (TAP)",
                "Transketolase product (TKP)",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate (G6P)",
            "6-Phosphoglucono-\u03b4-lactone (6PGL)",
            "6-Phosphogluconate (6PG)",
            "Ribulose 5-phosphate (Ru5P)",
            "Xylulose 5-phosphate (Xu5P)",
            "Ribose 5-phosphate (R5P)",
            "Sedoheptulose 7-phosphate (S7P)",
            "Erythrose 4-phosphate (E4P)",
            "Fructose 6-phosphate (F6P)",
            "Glyceraldehyde 3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "1,3-Bisphosphoglycerate (1,3-BPG)",
            "3-Phosphoglycerate (3PG)",
            "2-Phosphoglycerate (2PG)",
            "Phosphoenolpyruvate (PEP)",
            "Pyruvate (Pyr)",
            "Citrate (Cit)",
            "Isocitrate (Isocit)",
            "\u03b1-Ketoglutarate (\u03b1-KG)",
            "Succinyl-CoA (Succ-CoA)",
            "Succinate (Succ)",
            "Fumarate (Fum)",
            "Malate (Mal)",
            "Oxaloacetate (OAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate (G6P)",
                "6-Phosphoglucono-\u03b4-lactone (6PGL)",
                "6-Phosphogluconate (6PG)",
                "Ribulose 5-phosphate (Ru5P)",
                "Xylulose 5-phosphate (Xu5P)",
                "Ribose 5-phosphate (R5P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "Erythrose 4-phosphate (E4P)",
                "Fructose 6-phosphate (F6P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "3-Phosphoglycerate (3PG)",
                "2-Phosphoglycerate (2PG)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate (Pyr)",
                "Citrate (Cit)",
                "Isocitrate (Isocit)",
                "\u03b1-Ketoglutarate (\u03b1-KG)",
                "Succinyl-CoA (Succ-CoA)",
                "Succinate (Succ)",
                "Fumarate (Fum)",
                "Malate (Mal)",
                "Oxaloacetate (OAA)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglycerate (1,3-BPG)",
                "2-Phosphoglycerate (2PG)",
                "3-Phosphoglycerate (3PG)",
                "6-Phosphogluconate (6PG)",
                "6-Phosphoglucono-\u03b4-lactone (6PGL)",
                "Citrate (Cit)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose 4-phosphate (E4P)",
                "Fructose 6-phosphate (F6P)",
                "Fumarate (Fum)",
                "Glucose-6-phosphate (G6P)",
                "Glyceraldehyde 3-phosphate (G3P)",
                "Isocitrate (Isocit)",
                "Malate (Mal)",
                "Oxaloacetate (OAA)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate (Pyr)",
                "Ribose 5-phosphate (R5P)",
                "Ribulose 5-phosphate (Ru5P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "Succinate (Succ)",
                "Succinyl-CoA (Succ-CoA)",
                "Xylulose 5-phosphate (Xu5P)",
                "\u03b1-Ketoglutarate (\u03b1-KG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate",
            "Ribulose-5-phosphate",
            "Xylulose-5-phosphate",
            "Sedoheptulose-7-phosphate",
            "Erythrose-4-phosphate",
            "Fructose-6-phosphate",
            "Glyceraldehyde-3-phosphate",
            "Dihydroxyacetone phosphate",
            "6-Phosphogluconate",
            "Ribose-5-phosphate",
            "Xylulose-5-phosphate",
            "Sedoheptulose-7-phosphate",
            "Erythrose-4-phosphate",
            "Fructose-6-phosphate",
            "Glyceraldehyde-3-phosphate",
            "Dihydroxyacetone phosphate",
            "6-Phosphogluconate",
            "Ribose-5-phosphate",
            "Xylulose-5-phosphate",
            "Sedoheptulose-7-phosphate",
            "Erythrose-4-phosphate",
            "Fructose-6-phosphate",
            "Glyceraldehyde-3-phosphate",
            "Dihydroxyacetone phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate",
                "Ribulose-5-phosphate",
                "Xylulose-5-phosphate",
                "Sedoheptulose-7-phosphate",
                "Erythrose-4-phosphate",
                "Fructose-6-phosphate",
                "Glyceraldehyde-3-phosphate",
                "Dihydroxyacetone phosphate",
                "6-Phosphogluconate",
                "Ribose-5-phosphate"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconate",
                "Dihydroxyacetone phosphate",
                "Erythrose-4-phosphate",
                "Fructose-6-phosphate",
                "Glucose-6-phosphate",
                "Glyceraldehyde-3-phosphate",
                "Ribose-5-phosphate",
                "Ribulose-5-phosphate",
                "Sedoheptulose-7-phosphate",
                "Xylulose-5-phosphate"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate (G6P)",
            "6-Phosphogluconolactone (6-PGL)",
            "6-Phosphogluconate (6-PG)",
            "Ribulose-5-phosphate (Ru5P)",
            "Ribose-5-phosphate (R5P)",
            "Xylulose-5-phosphate (Xu5P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrose-4-phosphate (E4P)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Fructose-6-phosphate (F6P)",
            "Sedoheptulose-1,7-bisphosphate",
            "Transketolase-bound intermediates",
            "Transaldolase-bound intermediates",
            "Dihydroxyacetone phosphate (DHAP)",
            "Glycolaldehyde phosphate",
            "Sedoheptulose-1-phosphate",
            "Fructose-1,6-bisphosphate",
            "Erythrose-5-phosphate",
            "Xylulose-1-phosphate",
            "Sedoseptulose-7-phosphate",
            "Sedoheptulose-7-phosphate",
            "Phosphoribosyl pyrophosphate (PRPP)",
            "Phosphoribulose-5-phosphate",
            "Phosphogluconolactone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate (G6P)",
                "6-Phosphogluconolactone (6-PGL)",
                "6-Phosphogluconate (6-PG)",
                "Ribulose-5-phosphate (Ru5P)",
                "Ribose-5-phosphate (R5P)",
                "Xylulose-5-phosphate (Xu5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Erythrose-4-phosphate (E4P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Fructose-6-phosphate (F6P)",
                "Sedoheptulose-1,7-bisphosphate",
                "Transketolase-bound intermediates",
                "Transaldolase-bound intermediates",
                "Dihydroxyacetone phosphate (DHAP)",
                "Glycolaldehyde phosphate",
                "Sedoheptulose-1-phosphate",
                "Fructose-1,6-bisphosphate",
                "Erythrose-5-phosphate",
                "Xylulose-1-phosphate",
                "Sedoseptulose-7-phosphate",
                "Phosphoribosyl pyrophosphate (PRPP)",
                "Phosphoribulose-5-phosphate",
                "Phosphogluconolactone"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconate (6-PG)",
                "6-Phosphogluconolactone (6-PGL)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrose-4-phosphate (E4P)",
                "Erythrose-5-phosphate",
                "Fructose-1,6-bisphosphate",
                "Fructose-6-phosphate (F6P)",
                "Glucose-6-phosphate (G6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Glycolaldehyde phosphate",
                "Phosphogluconolactone",
                "Phosphoribosyl pyrophosphate (PRPP)",
                "Phosphoribulose-5-phosphate",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose-1,7-bisphosphate",
                "Sedoheptulose-1-phosphate",
                "Sedoheptulose-7-phosphate",
                "Sedoheptulose-7-phosphate (S7P)",
                "Sedoseptulose-7-phosphate",
                "Transaldolase-bound intermediates",
                "Transketolase-bound intermediates",
                "Xylulose-1-phosphate",
                "Xylulose-5-phosphate (Xu5P)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose 6-phosphate (G6P)",
            "6-Phosphogluconolactone",
            "6-Phosphogluconate",
            "Ribulose 5-phosphate",
            "Ribose 5-phosphate (R5P)",
            "Xylulose 5-phosphate (X5P)",
            "Sedoheptulose 7-phosphate (S7P)",
            "Erythrose 4-phosphate (E4P)",
            "Glyceraldehyde 3-phosphate (GAP)",
            "Fructose 6-phosphate (F6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose 6-phosphate (G6P)",
                "6-Phosphogluconolactone",
                "6-Phosphogluconate",
                "Ribulose 5-phosphate",
                "Ribose 5-phosphate (R5P)",
                "Xylulose 5-phosphate (X5P)",
                "Sedoheptulose 7-phosphate (S7P)",
                "Erythrose 4-phosphate (E4P)",
                "Glyceraldehyde 3-phosphate (GAP)",
                "Fructose 6-phosphate (F6P)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconate",
                "6-Phosphogluconolactone",
                "Erythrose 4-phosphate (E4P)",
                "Fructose 6-phosphate (F6P)",
                "Glucose 6-phosphate (G6P)",
                "Glyceraldehyde 3-phosphate (GAP)",
                "Ribose 5-phosphate (R5P)",
                "Ribulose 5-phosphate",
                "Sedoheptulose 7-phosphate (S7P)",
                "Xylulose 5-phosphate (X5P)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "6-Phosphogluconic acid",
            "Ribulose 5-phosphate",
            "Ribose 5-phosphate",
            "Xylulose 5-phosphate",
            "Sedoheptulose 7-phosphate",
            "Erythrose 4-phosphate",
            "Glyceraldehyde 3-phosphate (GA3P)",
            "Fructose 6-phosphate",
            "Glucose 6-phosphate (G6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6-Phosphogluconic acid",
                "Ribulose 5-phosphate",
                "Ribose 5-phosphate",
                "Xylulose 5-phosphate",
                "Sedoheptulose 7-phosphate",
                "Erythrose 4-phosphate",
                "Glyceraldehyde 3-phosphate (GA3P)",
                "Fructose 6-phosphate",
                "Glucose 6-phosphate (G6P)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphogluconic acid",
                "Erythrose 4-phosphate",
                "Fructose 6-phosphate",
                "Glucose 6-phosphate (G6P)",
                "Glyceraldehyde 3-phosphate (GA3P)",
                "Ribose 5-phosphate",
                "Ribulose 5-phosphate",
                "Sedoheptulose 7-phosphate",
                "Xylulose 5-phosphate"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate (G6P)",
            "6-Phosphogluconolactone",
            "6-Phosphogluconate",
            "Ribulose-5-phosphate",
            "Ribose-5-phosphate",
            "Xylulose-5-phosphate",
            "Sedoheptulose-7-phosphate",
            "Erythrose-4-phosphate",
            "Fructose-6-phosphate",
            "Glyceraldehyde-3-phosphate",
            "Dihydroxyacetone phosphate",
            "2-Keto-3-deoxy-6-phosphogluconate",
            "Gluconolactone",
            "Gluconate",
            "Ribulose-1,5-bisphosphate",
            "Xylulose-1,5-bisphosphate",
            "1,3-Bisphospho-D-glycerate",
            "3-Phosphoglycerate",
            "2-Phosphoglycerate",
            "Phosphoenolpyruvate",
            "6-Phosphogluconate dehydrogenase",
            "Glucose-6-phosphate dehydrogenase",
            "Transketolase",
            "Transaldolase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate (G6P)",
                "6-Phosphogluconolactone",
                "6-Phosphogluconate",
                "Ribulose-5-phosphate",
                "Ribose-5-phosphate",
                "Xylulose-5-phosphate",
                "Sedoheptulose-7-phosphate",
                "Erythrose-4-phosphate",
                "Fructose-6-phosphate",
                "Glyceraldehyde-3-phosphate",
                "Dihydroxyacetone phosphate",
                "2-Keto-3-deoxy-6-phosphogluconate",
                "Gluconolactone",
                "Gluconate",
                "Ribulose-1,5-bisphosphate",
                "Xylulose-1,5-bisphosphate",
                "1,3-Bisphospho-D-glycerate",
                "3-Phosphoglycerate",
                "2-Phosphoglycerate",
                "Phosphoenolpyruvate",
                "6-Phosphogluconate dehydrogenase",
                "Glucose-6-phosphate dehydrogenase",
                "Transketolase",
                "Transaldolase"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphospho-D-glycerate",
                "2-Keto-3-deoxy-6-phosphogluconate",
                "2-Phosphoglycerate",
                "3-Phosphoglycerate",
                "6-Phosphogluconate",
                "6-Phosphogluconate dehydrogenase",
                "6-Phosphogluconolactone",
                "Dihydroxyacetone phosphate",
                "Erythrose-4-phosphate",
                "Fructose-6-phosphate",
                "Gluconate",
                "Gluconolactone",
                "Glucose-6-phosphate (G6P)",
                "Glucose-6-phosphate dehydrogenase",
                "Glyceraldehyde-3-phosphate",
                "Phosphoenolpyruvate",
                "Ribose-5-phosphate",
                "Ribulose-1,5-bisphosphate",
                "Ribulose-5-phosphate",
                "Sedoheptulose-7-phosphate",
                "Transaldolase",
                "Transketolase",
                "Xylulose-1,5-bisphosphate",
                "Xylulose-5-phosphate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphate (G6P)",
            "Fructose-6-phosphate (F6P)",
            "Ribose-5-phosphate (R5P)",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylulose-5-phosphate (X5P)",
            "Sedoheptulose-7-phosphate (S7P)",
            "Erythrulose-4-phosphate (E4P)",
            "6-Phosphogluconate (6PG)",
            "2-Keto-3-deoxy-6-phosphogluconate (KDPG)",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Dihydroxyacetone phosphate (DHAP)",
            "Fructose-1,6-bisphosphate (FBP)",
            "Sedoheptulose-1,7-bisphosphate (SBP)",
            "Ribulose-1,5-bisphosphate (RuBP)",
            "Phosphoenolpyruvate (PEP)",
            "Pyruvate (Pyr)",
            "Acetyl-CoA (AcCoA)",
            "Oxaloacetate (OAA)",
            "Citrate (Cit)",
            "Isocitrate (ICit)",
            "\u03b1-Ketoglutarate (\u03b1KG)",
            "Succinyl-CoA (SucCoA)",
            "Succinate (Suc)",
            "Fumarate (Fum)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphate (G6P)",
                "Fructose-6-phosphate (F6P)",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylulose-5-phosphate (X5P)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Erythrulose-4-phosphate (E4P)",
                "6-Phosphogluconate (6PG)",
                "2-Keto-3-deoxy-6-phosphogluconate (KDPG)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Fructose-1,6-bisphosphate (FBP)",
                "Sedoheptulose-1,7-bisphosphate (SBP)",
                "Ribulose-1,5-bisphosphate (RuBP)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate (Pyr)",
                "Acetyl-CoA (AcCoA)",
                "Oxaloacetate (OAA)",
                "Citrate (Cit)",
                "Isocitrate (ICit)",
                "\u03b1-Ketoglutarate (\u03b1KG)",
                "Succinyl-CoA (SucCoA)",
                "Succinate (Suc)",
                "Fumarate (Fum)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Keto-3-deoxy-6-phosphogluconate (KDPG)",
                "6-Phosphogluconate (6PG)",
                "Acetyl-CoA (AcCoA)",
                "Citrate (Cit)",
                "Dihydroxyacetone phosphate (DHAP)",
                "Erythrulose-4-phosphate (E4P)",
                "Fructose-1,6-bisphosphate (FBP)",
                "Fructose-6-phosphate (F6P)",
                "Fumarate (Fum)",
                "Glucose-6-phosphate (G6P)",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Isocitrate (ICit)",
                "Oxaloacetate (OAA)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate (Pyr)",
                "Ribose-5-phosphate (R5P)",
                "Ribulose-1,5-bisphosphate (RuBP)",
                "Ribulose-5-phosphate (Ru5P)",
                "Sedoheptulose-1,7-bisphosphate (SBP)",
                "Sedoheptulose-7-phosphate (S7P)",
                "Succinate (Suc)",
                "Succinyl-CoA (SucCoA)",
                "Xylulose-5-phosphate (X5P)",
                "\u03b1-Ketoglutarate (\u03b1KG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "Ribose-5-phosphate (R5P)",
            "Ribulose-5-phosphate (Ru5P)",
            "Xylulose-5-phosphate (X5P)",
            "Erythrose-4-phosphate (E4P)",
            "Fructose-6-phosphate (F6P)",
            "Glucose-6-phosphate (G6P)",
            "Glucose-1-phosphate (G1P)",
            "Galactose-1-phosphate (G1P)",
            "Galactose-6-phosphate (G6P)",
            "Ribose-1-phosphate (R1P)",
            "Ribose-2-phosphate (R2P)",
            "Ribose-3-phosphate (R3P)",
            "Ribose-4-phosphate (R4P)",
            "Ribose-6-phosphate (R6P)",
            "Ribulose-1-phosphate (Ru1P)",
            "Ribulose-2-phosphate (Ru2P)",
            "Ribulose-3-phosphate (Ru3P)",
            "Ribulose-4-phosphate (Ru4P)",
            "Ribulose-6-phosphate (Ru6P)",
            "Xylulose-1-phosphate (X1P)",
            "Xylulose-2-phosphate (X2P)",
            "Xylulose-3-phosphate (X3P)",
            "Xylulose-4-phosphate (X4P)",
            "Xylulose-6-phosphate (X6P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose-5-phosphate (R5P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Xylulose-5-phosphate (X5P)",
                "Erythrose-4-phosphate (E4P)",
                "Fructose-6-phosphate (F6P)",
                "Glucose-6-phosphate (G6P)",
                "Glucose-1-phosphate (G1P)",
                "Galactose-1-phosphate (G1P)",
                "Galactose-6-phosphate (G6P)",
                "Ribose-1-phosphate (R1P)",
                "Ribose-2-phosphate (R2P)",
                "Ribose-3-phosphate (R3P)",
                "Ribose-4-phosphate (R4P)",
                "Ribose-6-phosphate (R6P)",
                "Ribulose-1-phosphate (Ru1P)",
                "Ribulose-2-phosphate (Ru2P)",
                "Ribulose-3-phosphate (Ru3P)",
                "Ribulose-4-phosphate (Ru4P)",
                "Ribulose-6-phosphate (Ru6P)",
                "Xylulose-1-phosphate (X1P)",
                "Xylulose-2-phosphate (X2P)",
                "Xylulose-3-phosphate (X3P)",
                "Xylulose-4-phosphate (X4P)",
                "Xylulose-6-phosphate (X6P)"
            ],
            "mismatches": [],
            "true_referents": [
                "Erythrose-4-phosphate (E4P)",
                "Fructose-6-phosphate (F6P)",
                "Galactose-1-phosphate (G1P)",
                "Galactose-6-phosphate (G6P)",
                "Glucose-1-phosphate (G1P)",
                "Glucose-6-phosphate (G6P)",
                "Ribose-1-phosphate (R1P)",
                "Ribose-2-phosphate (R2P)",
                "Ribose-3-phosphate (R3P)",
                "Ribose-4-phosphate (R4P)",
                "Ribose-5-phosphate (R5P)",
                "Ribose-6-phosphate (R6P)",
                "Ribulose-1-phosphate (Ru1P)",
                "Ribulose-2-phosphate (Ru2P)",
                "Ribulose-3-phosphate (Ru3P)",
                "Ribulose-4-phosphate (Ru4P)",
                "Ribulose-5-phosphate (Ru5P)",
                "Ribulose-6-phosphate (Ru6P)",
                "Xylulose-1-phosphate (X1P)",
                "Xylulose-2-phosphate (X2P)",
                "Xylulose-3-phosphate (X3P)",
                "Xylulose-4-phosphate (X4P)",
                "Xylulose-5-phosphate (X5P)",
                "Xylulose-6-phosphate (X6P)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            "D-Ribose-5-phosphate (R5P)",
            "D-Ribulose-5-phosphate (R5P)",
            "D-Xylulose-5-phosphate (X5P)",
            "D-Arabitol-1-phosphate",
            "D-Arabinitol-1-phosphate",
            "D-Ribitol-1-phosphate",
            "D-Ribitol-5-phosphate",
            "D-Arabinose-5-phosphate",
            "2-Deoxy-D-ribose-5-phosphate",
            "D-Ribose-1-phosphate",
            "D-Ribulose-1-phosphate",
            "D-Xylulose-1-phosphate",
            "D-Arabinitol-5-phosphate",
            "6-Phospho-D-glucono-1,5-lactone",
            "6-Phosphogluconate",
            "D-Gluconate-6-phosphate",
            "D-Glucono-1,5-lactone-6-phosphate",
            "D-Gluconic acid-6-phosphate",
            "6-Phosphoglucono-Delta-lactone",
            "6-Phosphoglucono-1,5-lactone",
            "D-Glucono-1,5-lactone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "D-Ribose-5-phosphate (R5P)",
                "D-Ribulose-5-phosphate (R5P)",
                "D-Xylulose-5-phosphate (X5P)",
                "D-Arabitol-1-phosphate",
                "D-Arabinitol-1-phosphate",
                "D-Ribitol-1-phosphate",
                "D-Ribitol-5-phosphate",
                "D-Arabinose-5-phosphate",
                "2-Deoxy-D-ribose-5-phosphate",
                "D-Ribose-1-phosphate",
                "D-Ribulose-1-phosphate",
                "D-Xylulose-1-phosphate",
                "D-Arabinitol-5-phosphate",
                "6-Phospho-D-glucono-1,5-lactone",
                "6-Phosphogluconate",
                "D-Gluconate-6-phosphate",
                "D-Glucono-1,5-lactone-6-phosphate",
                "D-Gluconic acid-6-phosphate",
                "6-Phosphoglucono-Delta-lactone",
                "6-Phosphoglucono-1,5-lactone",
                "D-Glucono-1,5-lactone"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Deoxy-D-ribose-5-phosphate",
                "6-Phospho-D-glucono-1,5-lactone",
                "6-Phosphogluconate",
                "6-Phosphoglucono-1,5-lactone",
                "6-Phosphoglucono-Delta-lactone",
                "D-Arabinitol-1-phosphate",
                "D-Arabinitol-5-phosphate",
                "D-Arabinose-5-phosphate",
                "D-Arabitol-1-phosphate",
                "D-Gluconate-6-phosphate",
                "D-Gluconic acid-6-phosphate",
                "D-Glucono-1,5-lactone",
                "D-Glucono-1,5-lactone-6-phosphate",
                "D-Ribitol-1-phosphate",
                "D-Ribitol-5-phosphate",
                "D-Ribose-1-phosphate",
                "D-Ribose-5-phosphate (R5P)",
                "D-Ribulose-1-phosphate",
                "D-Ribulose-5-phosphate (R5P)",
                "D-Xylulose-1-phosphate",
                "D-Xylulose-5-phosphate (X5P)"
            ],
            "TP": 21,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "pentose phosphate pathway organic intermediate",
        "domain": "biology",
        "response": [
            [
                "Canonical name",
                "ribulose-5-phosphate"
            ],
            [
                "Canonical name",
                "xylulose-5-phosphate"
            ]
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "[\"Canonical name\", \"ribulose-5-phosphate\"]",
                "[\"Canonical name\", \"xylulose-5-phosphate\"]"
            ],
            "mismatches": [],
            "true_referents": [
                "[\"Canonical name\", \"ribulose-5-phosphate\"]",
                "[\"Canonical name\", \"xylulose-5-phosphate\"]"
            ],
            "TP": 2,
            "FP": 0,
            "FN": 0
        }
    }
]