[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "oxaloacetate (OAA)",
            "citrate",
            "cis-aconitate",
            "isocitrate",
            "alpha-ketoglutarate (AKG)",
            "succinyl-CoA",
            "succinate",
            "fumarate",
            "malate",
            "alpha-keto\u620a\u4e8c\u9178",
            "citrato",
            "aconitato",
            "isocitrato",
            "succinil-CoA",
            "fumaro",
            "malato",
            "oxaloac\u00e9tico",
            "citrico",
            "aconitico",
            "isocitrico",
            "succin\u00edlico-CoA",
            "succ\u00ednico",
            "fum\u00e1rico",
            "mal\u00ednico"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "oxaloacetate (OAA)",
                "citrate",
                "cis-aconitate",
                "isocitrate",
                "alpha-ketoglutarate (AKG)",
                "succinyl-CoA",
                "succinate",
                "fumarate",
                "malate",
                "alpha-keto\u7094\u4e8c\u9178",
                "citrato",
                "aconitato",
                "isocitrato",
                "succinil-CoA",
                "fumaro",
                "malato",
                "oxaloac\u00e9tico",
                "citrico",
                "aconitico",
                "isocitrico",
                "succin\u00edlico-CoA",
                "succ\u00ednico",
                "fum\u00e1rico",
                "mal\u00ednico"
            ],
            "mismatches": [],
            "true_referents": [
                "aconitato",
                "aconitico",
                "alpha-ketoglutarate (AKG)",
                "alpha-keto\u620a\u4e8c\u9178",
                "cis-aconitate",
                "citrate",
                "citrato",
                "citrico",
                "fumarate",
                "fumaro",
                "fum\u00e1rico",
                "isocitrate",
                "isocitrato",
                "isocitrico",
                "malate",
                "malato",
                "mal\u00ednico",
                "oxaloacetate (OAA)",
                "oxaloac\u00e9tico",
                "succinate",
                "succinil-CoA",
                "succinyl-CoA",
                "succin\u00edlico-CoA",
                "succ\u00ednico"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (C6H8O7)",
            "Isocitrate (C6H8O7)",
            "\u03b1-Ketoglutarate (C5H6O5)",
            "Succinyl-CoA (C25H38N7O19P3S)",
            "Succinate (C4H6O4)",
            "Fumarate (C4H4O4)",
            "Malate (C4H6O5)",
            "Oxaloacetate (C4H4O5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (C6H8O7)",
                "Isocitrate (C6H8O7)",
                "\u03b1-Ketoglutarate (C5H6O5)",
                "Succinyl-CoA (C25H38N7O19P3S)",
                "Succinate (C4H6O4)",
                "Fumarate (C4H4O4)",
                "Malate (C4H6O5)",
                "Oxaloacetate (C4H4O5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Citrate (C6H8O7)",
                "Fumarate (C4H4O4)",
                "Isocitrate (C6H8O7)",
                "Malate (C4H6O5)",
                "Oxaloacetate (C4H4O5)",
                "Succinate (C4H6O4)",
                "Succinyl-CoA (C25H38N7O19P3S)",
                "\u03b1-Ketoglutarate (C5H6O5)"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (Citric Acid)",
            "Cis-Aconitate",
            "Isocitrate",
            "\u03b1-Ketoglutarate",
            "Succinyl-CoA",
            "Succinate",
            "Fumarate",
            "Malate",
            "Oxaloacetate",
            "Acetyl-CoA",
            "Coenzyme A",
            "NAD+",
            "NADH",
            "FAD",
            "FADH2",
            "GTP",
            "CoA",
            "ADP",
            "ATP",
            "CO2",
            "H2O",
            "Nicotinamide",
            "Riboflavin",
            "Thiamine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (Citric Acid)",
                "Cis-Aconitate",
                "Isocitrate",
                "\u03b1-Ketoglutarate",
                "Succinyl-CoA",
                "Succinate",
                "Fumarate",
                "Malate",
                "Oxaloacetate",
                "Acetyl-CoA",
                "Coenzyme A",
                "NAD+",
                "NADH",
                "FAD",
                "FADH2",
                "GTP",
                "CoA",
                "ADP",
                "ATP",
                "CO2",
                "H2O",
                "Nicotinamide",
                "Riboflavin",
                "Thiamine"
            ],
            "mismatches": [],
            "true_referents": [
                "ADP",
                "ATP",
                "Acetyl-CoA",
                "CO2",
                "Cis-Aconitate",
                "Citrate (Citric Acid)",
                "CoA",
                "Coenzyme A",
                "FAD",
                "FADH2",
                "Fumarate",
                "GTP",
                "H2O",
                "Isocitrate",
                "Malate",
                "NAD+",
                "NADH",
                "Nicotinamide",
                "Oxaloacetate",
                "Riboflavin",
                "Succinate",
                "Succinyl-CoA",
                "Thiamine",
                "\u03b1-Ketoglutarate"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (CIT)",
            "Isocitrate (ISO)",
            "\u03b1-Ketoglutarate (\u03b1-KG)",
            "Succinyl-CoA (SUCOA)",
            "Succinate (SUCC)",
            "Fumarate (FUM)",
            "Malate (MAL)",
            "Oxaloacetate (OAA)",
            "Cis-aconitate (CIS-ACON)",
            "Citryl-CoA (CIT-COA)",
            "Threonyl-CoA (THR-COA)",
            "Glyoxylate (GLY)",
            "Glycolate (GLC)",
            "Methylmalonate semialdehyde (MMSA)",
            "Propionyl-CoA (PROP-COA)",
            "Acetyl-CoA (ACoA)",
            "Succinyl-ACP (SUC-ACP)",
            "Formyl-CoA (FORM-COA)",
            "2-Ketobutyrate (2KB)",
            "\u03b1-Hydroxybutyrate (AHB)",
            "Crotonyl-CoA (CRO-COA)",
            "Butyryl-CoA (BUT-COA)",
            "Glutarate (GLUT)",
            "Adipate (ADIP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (CIT)",
                "Isocitrate (ISO)",
                "\u03b1-Ketoglutarate (\u03b1-KG)",
                "Succinyl-CoA (SUCOA)",
                "Succinate (SUCC)",
                "Fumarate (FUM)",
                "Malate (MAL)",
                "Oxaloacetate (OAA)",
                "Cis-aconitate (CIS-ACON)",
                "Citryl-CoA (CIT-COA)",
                "Threonyl-CoA (THR-COA)",
                "Glyoxylate (GLY)",
                "Glycolate (GLC)",
                "Methylmalonate semialdehyde (MMSA)",
                "Propionyl-CoA (PROP-COA)",
                "Acetyl-CoA (ACoA)",
                "Succinyl-ACP (SUC-ACP)",
                "Formyl-CoA (FORM-COA)",
                "2-Ketobutyrate (2KB)",
                "\u03b1-Hydroxybutyrate (AHB)",
                "Crotonyl-CoA (CRO-COA)",
                "Butyryl-CoA (BUT-COA)",
                "Glutarate (GLUT)",
                "Adipate (ADIP)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Ketobutyrate (2KB)",
                "Acetyl-CoA (ACoA)",
                "Adipate (ADIP)",
                "Butyryl-CoA (BUT-COA)",
                "Cis-aconitate (CIS-ACON)",
                "Citrate (CIT)",
                "Citryl-CoA (CIT-COA)",
                "Crotonyl-CoA (CRO-COA)",
                "Formyl-CoA (FORM-COA)",
                "Fumarate (FUM)",
                "Glutarate (GLUT)",
                "Glycolate (GLC)",
                "Glyoxylate (GLY)",
                "Isocitrate (ISO)",
                "Malate (MAL)",
                "Methylmalonate semialdehyde (MMSA)",
                "Oxaloacetate (OAA)",
                "Propionyl-CoA (PROP-COA)",
                "Succinate (SUCC)",
                "Succinyl-ACP (SUC-ACP)",
                "Succinyl-CoA (SUCOA)",
                "Threonyl-CoA (THR-COA)",
                "\u03b1-Hydroxybutyrate (AHB)",
                "\u03b1-Ketoglutarate (\u03b1-KG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (C6H5O7)",
            "Isocitrate (C6H5O7)",
            "Alpha-Ketoglutarate (C5H4O5)",
            "Succinyl-CoA (C25H40N7O19P3S)",
            "Succinate (C4H6O4)",
            "Fumarate (C4H4O4)",
            "Malate (C4H6O5)",
            "Oxaloacetate (C4H4O5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (C6H5O7)",
                "Isocitrate (C6H5O7)",
                "Alpha-Ketoglutarate (C5H4O5)",
                "Succinyl-CoA (C25H40N7O19P3S)",
                "Succinate (C4H6O4)",
                "Fumarate (C4H4O4)",
                "Malate (C4H6O5)",
                "Oxaloacetate (C4H4O5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-Ketoglutarate (C5H4O5)",
                "Citrate (C6H5O7)",
                "Fumarate (C4H4O4)",
                "Isocitrate (C6H5O7)",
                "Malate (C4H6O5)",
                "Oxaloacetate (C4H4O5)",
                "Succinate (C4H6O4)",
                "Succinyl-CoA (C25H40N7O19P3S)"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (C6H5O7)",
            "Isocitrate (C6H7O7)",
            "Alpha-Ketoglutarate (C5H5O5)",
            "Succinyl-CoA (C4H5O-CoA)",
            "Succinate (C4H6O4)",
            "Fumarate (C4H4O4)",
            "Malate (C4H6O5)",
            "Oxaloacetate (C4H4O5)",
            "Acetyl-CoA (C2H3O-CoA)",
            "Pyruvate (C3H4O3)",
            "Citrate Synthase (C6H5O7)",
            "Isocitrate Dehydrogenase (C6H7O7)",
            "Alpha-Ketoglutarate Dehydrogenase (C5H5O5)",
            "Succinyl-CoA Synthetase (C4H5O-CoA)",
            "Succinate Dehydrogenase (C4H6O4)",
            "Fumarase (C4H4O4)",
            "Malate Dehydrogenase (C4H6O5)",
            "Aconitase (C6H7O7)",
            "Citrate Lyase (C6H5O7)",
            "Oxaloacetate Decarboxylase (C4H4O5)",
            "Acetyl-CoA Carboxylase (C2H3O-CoA)",
            "Pyruvate Dehydrogenase (C3H4O3)",
            "Dihydrolipoamide Dehydrogenase (C4H5O-CoA)",
            "Lactate (C3H6O3)",
            "2-Oxoglutarate (C5H5O5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (C6H5O7)",
                "Isocitrate (C6H7O7)",
                "Alpha-Ketoglutarate (C5H5O5)",
                "Succinyl-CoA (C4H5O-CoA)",
                "Succinate (C4H6O4)",
                "Fumarate (C4H4O4)",
                "Malate (C4H6O5)",
                "Oxaloacetate (C4H4O5)",
                "Acetyl-CoA (C2H3O-CoA)",
                "Pyruvate (C3H4O3)",
                "Citrate Synthase (C6H5O7)",
                "Isocitrate Dehydrogenase (C6H7O7)",
                "Alpha-Ketoglutarate Dehydrogenase (C5H5O5)",
                "Succinyl-CoA Synthetase (C4H5O-CoA)",
                "Succinate Dehydrogenase (C4H6O4)",
                "Fumarase (C4H4O4)",
                "Malate Dehydrogenase (C4H6O5)",
                "Aconitase (C6H7O7)",
                "Citrate Lyase (C6H5O7)",
                "Oxaloacetate Decarboxylase (C4H4O5)",
                "Acetyl-CoA Carboxylase (C2H3O-CoA)",
                "Pyruvate Dehydrogenase (C3H4O3)",
                "Dihydrolipoamide Dehydrogenase (C4H5O-CoA)",
                "Lactate (C3H6O3)"
            ],
            "mismatches": [
                "2-Oxoglutarate (C5H5O5)"
            ],
            "true_referents": [
                "Acetyl-CoA (C2H3O-CoA)",
                "Acetyl-CoA Carboxylase (C2H3O-CoA)",
                "Aconitase (C6H7O7)",
                "Alpha-Ketoglutarate (C5H5O5)",
                "Alpha-Ketoglutarate Dehydrogenase (C5H5O5)",
                "Citrate (C6H5O7)",
                "Citrate Lyase (C6H5O7)",
                "Citrate Synthase (C6H5O7)",
                "Dihydrolipoamide Dehydrogenase (C4H5O-CoA)",
                "Fumarase (C4H4O4)",
                "Fumarate (C4H4O4)",
                "Isocitrate (C6H7O7)",
                "Isocitrate Dehydrogenase (C6H7O7)",
                "Lactate (C3H6O3)",
                "Malate (C4H6O5)",
                "Malate Dehydrogenase (C4H6O5)",
                "Oxaloacetate (C4H4O5)",
                "Oxaloacetate Decarboxylase (C4H4O5)",
                "Pyruvate (C3H4O3)",
                "Pyruvate Dehydrogenase (C3H4O3)",
                "Succinate (C4H6O4)",
                "Succinate Dehydrogenase (C4H6O4)",
                "Succinyl-CoA (C4H5O-CoA)",
                "Succinyl-CoA Synthetase (C4H5O-CoA)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citric Acid (CA)",
            "Isocitric Acid (IA)",
            "Aconitic Acid (AA)",
            "Alpha-Ketoglutaric Acid (AKA)",
            "Succinyl-CoA (SC)",
            "Succinic Acid (SA)",
            "Fumaric Acid (FA)",
            "Malic Acid (MA)",
            "Oxaloacetic Acid (OA)",
            "Dihydroxyacetone Phosphate (DHAP)",
            "Glyceraldehyde 3-Phosphate (G3P)",
            "1,3-Bisphosphoglyceric Acid (1,3-BPG)",
            "3-Phosphoglyceric Acid (3-PG)",
            "2-Phosphoglyceric Acid (2-PG)",
            "Phosphoenolpyruvic Acid (PEP)",
            "Pyruvic Acid (PA)",
            "Acetyl CoA (AcCoA)",
            "Oxalosuccinic Acid (OSA)",
            "Hydroxyethyl-TPP (HETPP)",
            "2-Hydroxyglutarate (2-HG)",
            "Glutamate (Glu)",
            "Glutamine (Gln)",
            "Alpha-Ketoglutarate (\u03b1-KG)",
            "Isocitrate (ICT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citric Acid (CA)",
                "Isocitric Acid (IA)",
                "Aconitic Acid (AA)",
                "Alpha-Ketoglutaric Acid (AKA)",
                "Succinyl-CoA (SC)",
                "Succinic Acid (SA)",
                "Fumaric Acid (FA)",
                "Malic Acid (MA)",
                "Oxaloacetic Acid (OA)",
                "Dihydroxyacetone Phosphate (DHAP)",
                "Glyceraldehyde 3-Phosphate (G3P)",
                "1,3-Bisphosphoglyceric Acid (1,3-BPG)",
                "3-Phosphoglyceric Acid (3-PG)",
                "2-Phosphoglyceric Acid (2-PG)",
                "Phosphoenolpyruvic Acid (PEP)",
                "Pyruvic Acid (PA)",
                "Acetyl CoA (AcCoA)",
                "Oxalosuccinic Acid (OSA)",
                "Hydroxyethyl-TPP (HETPP)",
                "2-Hydroxyglutarate (2-HG)",
                "Glutamate (Glu)",
                "Glutamine (Gln)",
                "Alpha-Ketoglutarate (\u03b1-KG)",
                "Isocitrate (ICT)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglyceric Acid (1,3-BPG)",
                "2-Hydroxyglutarate (2-HG)",
                "2-Phosphoglyceric Acid (2-PG)",
                "3-Phosphoglyceric Acid (3-PG)",
                "Acetyl CoA (AcCoA)",
                "Aconitic Acid (AA)",
                "Alpha-Ketoglutarate (\u03b1-KG)",
                "Alpha-Ketoglutaric Acid (AKA)",
                "Citric Acid (CA)",
                "Dihydroxyacetone Phosphate (DHAP)",
                "Fumaric Acid (FA)",
                "Glutamate (Glu)",
                "Glutamine (Gln)",
                "Glyceraldehyde 3-Phosphate (G3P)",
                "Hydroxyethyl-TPP (HETPP)",
                "Isocitrate (ICT)",
                "Isocitric Acid (IA)",
                "Malic Acid (MA)",
                "Oxaloacetic Acid (OA)",
                "Oxalosuccinic Acid (OSA)",
                "Phosphoenolpyruvic Acid (PEP)",
                "Pyruvic Acid (PA)",
                "Succinic Acid (SA)",
                "Succinyl-CoA (SC)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate",
            "Isocitrate",
            "Alpha-Ketoglutarate",
            "Succinyl-CoA",
            "Succinate",
            "Fumarate",
            "Malate",
            "Oxaloacetate",
            "Cis-Aconitate",
            "Dihydroxyacetone phosphate",
            "Glycerol-3-phosphate",
            "1,3-Bisphosphoglycerate",
            "3-Phosphoglycerate",
            "2-Phosphoglycerate",
            "Phosphoenolpyruvate",
            "Pyruvate",
            "Acetyl-CoA",
            "L-Malic acid",
            "L-Aspartic acid",
            "L-Glutamic acid",
            "L-Lysine",
            "L-Arginine",
            "L-Isoleucine",
            "L-Valine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate",
                "Isocitrate",
                "Alpha-Ketoglutarate",
                "Succinyl-CoA",
                "Succinate",
                "Fumarate",
                "Malate",
                "Oxaloacetate",
                "Cis-Aconitate",
                "Dihydroxyacetone phosphate",
                "Glycerol-3-phosphate",
                "1,3-Bisphosphoglycerate",
                "3-Phosphoglycerate",
                "2-Phosphoglycerate",
                "Phosphoenolpyruvate",
                "Pyruvate",
                "Acetyl-CoA",
                "L-Malic acid",
                "L-Aspartic acid",
                "L-Glutamic acid",
                "L-Lysine",
                "L-Arginine",
                "L-Isoleucine",
                "L-Valine"
            ],
            "mismatches": [],
            "true_referents": [
                "1,3-Bisphosphoglycerate",
                "2-Phosphoglycerate",
                "3-Phosphoglycerate",
                "Acetyl-CoA",
                "Alpha-Ketoglutarate",
                "Cis-Aconitate",
                "Citrate",
                "Dihydroxyacetone phosphate",
                "Fumarate",
                "Glycerol-3-phosphate",
                "Isocitrate",
                "L-Arginine",
                "L-Aspartic acid",
                "L-Glutamic acid",
                "L-Isoleucine",
                "L-Lysine",
                "L-Malic acid",
                "L-Valine",
                "Malate",
                "Oxaloacetate",
                "Phosphoenolpyruvate",
                "Pyruvate",
                "Succinate",
                "Succinyl-CoA"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (CIT)",
            "Isocitrate (ICT)",
            "\u03b1-Ketoglutarate (AKG)",
            "Succinyl-CoA (SucCoA)",
            "Succinate (SUCC)",
            "Fumarate (FUM)",
            "Malate (MAL)",
            "Oxaloacetate (OAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (CIT)",
                "Isocitrate (ICT)",
                "\u03b1-Ketoglutarate (AKG)",
                "Succinyl-CoA (SucCoA)",
                "Succinate (SUCC)",
                "Fumarate (FUM)",
                "Malate (MAL)",
                "Oxaloacetate (OAA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Citrate (CIT)",
                "Fumarate (FUM)",
                "Isocitrate (ICT)",
                "Malate (MAL)",
                "Oxaloacetate (OAA)",
                "Succinate (SUCC)",
                "Succinyl-CoA (SucCoA)",
                "\u03b1-Ketoglutarate (AKG)"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citric acid (C6H8O7)",
            "\u03b1-Ketoglutaric acid (\u03b1-KG)",
            "Succinic acid (C4H6O4)",
            "Fumaric acid (C4H4O4)",
            "Malic acid (C4H6O5)",
            "Oxaloacetic acid (OAA)",
            "Isocitric acid (C6H8O7)",
            "cis-Aconitic acid (C6H6O6)",
            "Oxalosuccinic acid",
            "Succinyl-CoA"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citric acid (C6H8O7)",
                "\u03b1-Ketoglutaric acid (\u03b1-KG)",
                "Succinic acid (C4H6O4)",
                "Fumaric acid (C4H4O4)",
                "Malic acid (C4H6O5)",
                "Oxaloacetic acid (OAA)",
                "Isocitric acid (C6H8O7)",
                "cis-Aconitic acid (C6H6O6)",
                "Oxalosuccinic acid",
                "Succinyl-CoA"
            ],
            "mismatches": [],
            "true_referents": [
                "Citric acid (C6H8O7)",
                "Fumaric acid (C4H4O4)",
                "Isocitric acid (C6H8O7)",
                "Malic acid (C4H6O5)",
                "Oxaloacetic acid (OAA)",
                "Oxalosuccinic acid",
                "Succinic acid (C4H6O4)",
                "Succinyl-CoA",
                "cis-Aconitic acid (C6H6O6)",
                "\u03b1-Ketoglutaric acid (\u03b1-KG)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (CIT)",
            "Isocitrate (ICIT)",
            "\u03b1-Ketoglutarate (AKG)",
            "Succinyl-CoA",
            "Succinate (SUC)",
            "Fumarate (FUM)",
            "Malate (MAL)",
            "Oxaloacetate (OAA)",
            "cis-Aconitate",
            "Acetyl-CoA",
            "Pyruvate",
            "Phosphoenolpyruvate (PEP)",
            "2-Oxoglutarate",
            "Succinic semialdehyde",
            "L-Glutamate",
            "L-Aspartate",
            "Coenzyme A (CoA)",
            "Guanosine triphosphate (GTP)",
            "Guanosine diphosphate (GDP)",
            "Flavin adenine dinucleotide (FAD)",
            "Flavin adenine dinucleotide reduced (FADH2)",
            "Nicotinamide adenine dinucleotide (NAD+)",
            "Nicotinamide adenine dinucleotide reduced (NADH)",
            "Thiamine pyrophosphate (TPP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (CIT)",
                "Isocitrate (ICIT)",
                "\u03b1-Ketoglutarate (AKG)",
                "Succinyl-CoA",
                "Succinate (SUC)",
                "Fumarate (FUM)",
                "Malate (MAL)",
                "Oxaloacetate (OAA)",
                "cis-Aconitate",
                "Acetyl-CoA",
                "Pyruvate",
                "Phosphoenolpyruvate (PEP)",
                "2-Oxoglutarate",
                "Succinic semialdehyde",
                "L-Glutamate",
                "L-Aspartate",
                "Coenzyme A (CoA)",
                "Guanosine triphosphate (GTP)",
                "Guanosine diphosphate (GDP)",
                "Flavin adenine dinucleotide (FAD)",
                "Flavin adenine dinucleotide reduced (FADH2)",
                "Nicotinamide adenine dinucleotide (NAD+)",
                "Nicotinamide adenine dinucleotide reduced (NADH)",
                "Thiamine pyrophosphate (TPP)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Oxoglutarate",
                "Acetyl-CoA",
                "Citrate (CIT)",
                "Coenzyme A (CoA)",
                "Flavin adenine dinucleotide (FAD)",
                "Flavin adenine dinucleotide reduced (FADH2)",
                "Fumarate (FUM)",
                "Guanosine diphosphate (GDP)",
                "Guanosine triphosphate (GTP)",
                "Isocitrate (ICIT)",
                "L-Aspartate",
                "L-Glutamate",
                "Malate (MAL)",
                "Nicotinamide adenine dinucleotide (NAD+)",
                "Nicotinamide adenine dinucleotide reduced (NADH)",
                "Oxaloacetate (OAA)",
                "Phosphoenolpyruvate (PEP)",
                "Pyruvate",
                "Succinate (SUC)",
                "Succinic semialdehyde",
                "Succinyl-CoA",
                "Thiamine pyrophosphate (TPP)",
                "cis-Aconitate",
                "\u03b1-Ketoglutarate (AKG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "citric acid (C6H8O7)",
            "isocitric acid",
            "cis-aconitic acid",
            "aconitic acid",
            "oxalosuccinic acid",
            "alpha-ketoglutaric acid (\u03b1-KG)",
            "succinyl-CoA",
            "succinic acid",
            "fumaric acid",
            "malic acid (C4H6O5)",
            "oxaloacetic acid (OAA)",
            "acetyl-CoA",
            "pyruvic acid",
            "glyoxylic acid",
            "alpha-ketoadipic acid",
            "glutamic acid",
            "aspartic acid",
            "succinylglutamic acid",
            "succinylglutamate",
            "succinylglutamine",
            "succinylglycine",
            "succinylornithine",
            "succinylputrescine",
            "succinylspermidine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "citric acid (C6H8O7)",
                "isocitric acid",
                "cis-aconitic acid",
                "aconitic acid",
                "oxalosuccinic acid",
                "alpha-ketoglutaric acid (\u03b1-KG)",
                "succinyl-CoA",
                "succinic acid",
                "fumaric acid",
                "malic acid (C4H6O5)",
                "oxaloacetic acid (OAA)",
                "acetyl-CoA",
                "pyruvic acid",
                "glyoxylic acid",
                "alpha-ketoadipic acid",
                "glutamic acid",
                "aspartic acid",
                "succinylglutamic acid",
                "succinylglutamate",
                "succinylglutamine",
                "succinylglycine",
                "succinylornithine",
                "succinylputrescine",
                "succinylspermidine"
            ],
            "mismatches": [],
            "true_referents": [
                "acetyl-CoA",
                "aconitic acid",
                "alpha-ketoadipic acid",
                "alpha-ketoglutaric acid (\u03b1-KG)",
                "aspartic acid",
                "cis-aconitic acid",
                "citric acid (C6H8O7)",
                "fumaric acid",
                "glutamic acid",
                "glyoxylic acid",
                "isocitric acid",
                "malic acid (C4H6O5)",
                "oxaloacetic acid (OAA)",
                "oxalosuccinic acid",
                "pyruvic acid",
                "succinic acid",
                "succinyl-CoA",
                "succinylglutamate",
                "succinylglutamic acid",
                "succinylglutamine",
                "succinylglycine",
                "succinylornithine",
                "succinylputrescine",
                "succinylspermidine"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citric acid (CA)",
            "Isocitric acid (ICA)",
            "\u03b1-Ketoglutaric acid (\u03b1-KGA)",
            "Succinic acid (SA)",
            "Fumaric acid (FA)",
            "Malic acid (MA)",
            "Oxaloacetic acid (OAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citric acid (CA)",
                "Isocitric acid (ICA)",
                "\u03b1-Ketoglutaric acid (\u03b1-KGA)",
                "Succinic acid (SA)",
                "Fumaric acid (FA)",
                "Malic acid (MA)",
                "Oxaloacetic acid (OAA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Citric acid (CA)",
                "Fumaric acid (FA)",
                "Isocitric acid (ICA)",
                "Malic acid (MA)",
                "Oxaloacetic acid (OAA)",
                "Succinic acid (SA)",
                "\u03b1-Ketoglutaric acid (\u03b1-KGA)"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Citrate (Cit)",
            "Isocitrate",
            "Alpha-ketoglutarate (AKG)",
            "Succinyl-CoA",
            "Succinate",
            "Fumarate",
            "Malate",
            "Oxaloacetate (OAA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Citrate (Cit)",
                "Isocitrate",
                "Alpha-ketoglutarate (AKG)",
                "Succinyl-CoA",
                "Succinate",
                "Fumarate",
                "Malate",
                "Oxaloacetate (OAA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-ketoglutarate (AKG)",
                "Citrate (Cit)",
                "Fumarate",
                "Isocitrate",
                "Malate",
                "Oxaloacetate (OAA)",
                "Succinate",
                "Succinyl-CoA"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            "Acetyl-CoA (Acetyl-CoA)",
            "Citrate",
            "isocitrate",
            "\u03b1-Ketoglutarate (\u03b1-KG)",
            "NADH",
            "NAD+",
            "Oxaloacetate",
            "Oxoglutarate",
            "Malate",
            "Fumarate",
            "Succinate",
            "Succinyl-CoA (Succinyl-CoA)",
            "L-Malate",
            "Fumaric acid",
            "Cis-Aconitic acid",
            "Isocitric acid",
            "\u03b1-Ketoglutaric acid",
            "Oxalacetic acid",
            "Oxoglutaric acid",
            "Malic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetyl-CoA (Acetyl-CoA)",
                "Citrate",
                "isocitrate",
                "\u03b1-Ketoglutarate (\u03b1-KG)",
                "NADH",
                "NAD+",
                "Oxaloacetate",
                "Oxoglutarate",
                "Malate",
                "Fumarate",
                "Succinate",
                "Succinyl-CoA (Succinyl-CoA)",
                "L-Malate",
                "Fumaric acid",
                "Cis-Aconitic acid",
                "Isocitric acid",
                "\u03b1-Ketoglutaric acid",
                "Oxalacetic acid",
                "Oxoglutaric acid",
                "Malic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetyl-CoA (Acetyl-CoA)",
                "Cis-Aconitic acid",
                "Citrate",
                "Fumarate",
                "Fumaric acid",
                "Isocitric acid",
                "L-Malate",
                "Malate",
                "Malic acid",
                "NAD+",
                "NADH",
                "Oxalacetic acid",
                "Oxaloacetate",
                "Oxoglutarate",
                "Oxoglutaric acid",
                "Succinate",
                "Succinyl-CoA (Succinyl-CoA)",
                "isocitrate",
                "\u03b1-Ketoglutarate (\u03b1-KG)",
                "\u03b1-Ketoglutaric acid"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "TCA cycle organic intermediate",
        "domain": "chemistry",
        "response": [
            [
                "Referent",
                "citric acid"
            ],
            [
                "Referent",
                "cis-aconitate"
            ],
            [
                "Referent",
                "isocitrate"
            ],
            [
                "Referent",
                "alpha-ketoglutarate"
            ],
            [
                "Referent",
                "succinate"
            ]
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "citric acid",
                "cis-aconitate",
                "isocitrate",
                "alpha-ketoglutarate",
                "succinate"
            ],
            "mismatches": [],
            "true_referents": [
                "[\"Referent\", \"alpha-ketoglutarate\"]",
                "[\"Referent\", \"cis-aconitate\"]",
                "[\"Referent\", \"citric acid\"]",
                "[\"Referent\", \"isocitrate\"]",
                "[\"Referent\", \"succinate\"]"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 0
        }
    }
]