[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (C6H12O6)",
            "Adenosine Triphosphate (ATP)",
            "NAD+ (Nicotinamide adenine dinucleotide)",
            "NADH",
            "NADP+",
            "NADPH",
            "FAD (Flavin adenine dinucleotide)",
            "FADH2",
            "Pyruvate",
            "Acetyl-CoA",
            "Oxaloacetate",
            "Citrate",
            "Malate",
            "Isocitrate",
            "\u03b1-Ketoglutarate",
            "\u03b1-Ketoacid",
            "Glyceraldehyde 3-phosphate",
            "Fructose 1,6-bisphosphate",
            "Glucose 6-phosphate",
            "Fructose 6-phosphate",
            "Glycogen",
            "Lactate",
            "Alanine",
            "Pyridoxal 5'-phosphate (PLP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP",
                "NADH",
                "NADP+",
                "NADPH",
                "Pyruvate",
                "acetyl-CoA",
                "Citrate",
                "Malate",
                "Isocitrate",
                "Fructose 1,6-bisphosphate",
                "Glucose 6-phosphate",
                "Fructose 6-phosphate",
                "Alanine"
            ],
            "mismatches": [
                "Glucose (C6H12O6)",
                "NAD+ (Nicotinamide adenine dinucleotide)",
                "FAD (Flavin adenine dinucleotide)",
                "FADH2",
                "Oxaloacetate",
                "\u03b1-Ketoglutarate",
                "\u03b1-Ketoacid",
                "Glyceraldehyde 3-phosphate",
                "Glycogen",
                "Lactate",
                "Pyridoxal 5'-phosphate (PLP)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "6-phospho-D-gluconic acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-fructofuranose 1,6-bisphosphate(4\u2212)",
                "D-fructose 1,6-bisphosphate",
                "D-fructose 6-phosphate",
                "D-glucose 6-phosphate",
                "D-threo-isocitrate(3\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "S-adenosyl-L-homocysteinate",
                "S-adenosyl-L-homocysteine",
                "acetyl-CoA",
                "alanine",
                "aldehydo-D-ribose 5-phosphate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "fructose 1-phosphate",
                "glucose",
                "glutamic acid",
                "glyceraldehyde",
                "glycerate",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "thiamine(1+) diphosphate",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (C6H12O6)",
            "Fructose (C6H12O6)",
            "Galactose (C6H12O6)",
            "Pyruvate (C3H4O3)",
            "Lactate (C3H6O3)",
            "Acetyl-CoA (C23H38N7O17P3S)",
            "Citrate (C6H8O7)",
            "\u03b1-Ketoglutarate (C5H6O5)",
            "Succinate (C4H6O4)",
            "Fumarate (C4H4O4)",
            "Malate (C4H6O5)",
            "Oxaloacetate (C4H4O5)",
            "Urea (CH4N2O)",
            "Creatine (C4H9N3O2)",
            "Adenosine triphosphate (ATP, C10H16N5O13P3)",
            "Nicotinamide adenine dinucleotide (NAD+, C21H27N7O14P2)",
            "Flavin adenine dinucleotide (FAD, C27H33N9O15P2)",
            "Glutathione (C10H17N3O6S)",
            "Cholesterol (C27H46O)",
            "Amino acids (e.g., L-Glutamine, C5H10N2O3)",
            "Fatty acids (e.g., Palmitic acid, C16H32O2)",
            "Nucleotides (e.g., Adenosine monophosphate, AMP, C10H14N5O7P)",
            "Glycerol (C3H8O3)",
            "Serotonin (C10H12N2O)",
            "Histamine (C5H9N3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (C6H12O6)",
                "Fructose (C6H12O6)",
                "Galactose (C6H12O6)",
                "Pyruvate (C3H4O3)",
                "Acetyl-CoA (C23H38N7O17P3S)",
                "Citrate (C6H8O7)",
                "Succinate (C4H6O4)",
                "Fumarate (C4H4O4)",
                "Malate (C4H6O5)",
                "Adenosine triphosphate (ATP, C10H16N5O13P3)",
                "Nicotinamide adenine dinucleotide (NAD+, C21H27N7O14P2)",
                "Amino acids (e.g., L-Glutamine, C5H10N2O3)",
                "Nucleotides (e.g., Adenosine monophosphate, AMP, C10H14N5O7P)"
            ],
            "mismatches": [
                "Lactate (C3H6O3)",
                "Oxaloacetate (C4H4O5)",
                "Urea (CH4N2O)",
                "Creatine (C4H9N3O2)",
                "Flavin adenine dinucleotide (FAD, C27H33N9O15P2)",
                "Glutathione (C10H17N3O6S)",
                "Cholesterol (C27H46O)",
                "Fatty acids (e.g., Palmitic acid, C16H32O2)",
                "Glycerol (C3H8O3)",
                "Serotonin (C10H12N2O)",
                "Histamine (C5H9N3)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "6-phospho-D-gluconic acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-cysteinate(1\u2212)",
                "L-cysteinate(1\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "L-galactose 1-phosphate(2\u2212)",
                "L-tyrosinate(1\u2212)",
                "L-tyrosinate(2\u2212)",
                "L-tyrosine",
                "NAD",
                "NADH",
                "NADP+",
                "acetyl-CoA",
                "adenosine 5'-monophosphate",
                "adenosine 5'-monophosphate(1+)",
                "aspartate(1\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "cysteine",
                "fructose",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "galactose",
                "glucose",
                "glutamic acid",
                "glutamine",
                "glycerate",
                "glyceric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "purine",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "succinate(1\u2212)",
                "succinic acid",
                "trans-aconitate(3\u2212)",
                "uridine",
                "\u03b1-L-galactose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (C6H12O6)",
            "Oxygen (O2)",
            "Carbon Dioxide (CO2)",
            "Water (H2O)",
            "Adenosine Triphosphate (ATP)",
            "Nicotinamide Adenine Dinucleotide (NAD+)",
            "Nicotinamide Adenine Dinucleotide Phosphate (NADPH)",
            "Flavin Adenine Dinucleotide (FAD)",
            "Flavin Mononucleotide (FMN)",
            "Coenzyme A (CoA)",
            "Acetyl-CoA",
            "Glutathione (GSH)",
            "Pyruvate",
            "Lactate",
            "Glyceraldehyde 3-Phosphate (G3P)",
            "Citrate",
            "Succinate",
            "Malate",
            "Alpha-Ketoglutarate",
            "Oxaloacetate",
            "Acetyl-CoA",
            "Glutamine",
            "Glycine",
            "Serine",
            "Tryptophan"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (C6H12O6)",
                "Adenosine Triphosphate (ATP)",
                "Nicotinamide Adenine Dinucleotide Phosphate (NADPH)",
                "Acetyl-CoA",
                "Pyruvate",
                "Citrate",
                "Succinate",
                "Malate",
                "Glutamine",
                "Glycine",
                "Serine"
            ],
            "mismatches": [
                "Oxygen (O2)",
                "Carbon Dioxide (CO2)",
                "Water (H2O)",
                "Nicotinamide Adenine Dinucleotide (NAD+)",
                "Flavin Adenine Dinucleotide (FAD)",
                "Flavin Mononucleotide (FMN)",
                "Coenzyme A (CoA)",
                "Glutathione (GSH)",
                "Lactate",
                "Glyceraldehyde 3-Phosphate (G3P)",
                "Alpha-Ketoglutarate",
                "Oxaloacetate",
                "Tryptophan"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "acetyl-CoA",
                "arginine",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "cysteine",
                "fructose 1-phosphate",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glyceraldehyde",
                "glycerate",
                "glycinate",
                "glycine",
                "homocysteine",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "serinate",
                "serine",
                "succinate(1\u2212)",
                "succinic acid",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (GLC)",
            "Fructose (FRU)",
            "Pyruvate (PYR)",
            "Lactate (LAC)",
            "Citrate (CIT)",
            "Acetyl-CoA (ACoA)",
            "Alpha-Ketoglutarate (AKG)",
            "Succinyl-CoA (SUCoA)",
            "Malate (MAL)",
            "Oxaloacetate (OAA)",
            "Glutamine (GLN)",
            "Glutamate (GLU)",
            "Aspartate (ASP)",
            "Alanine (ALA)",
            "Glycine (GLY)",
            "Serine (SER)",
            "Threonine (THR)",
            "Lysine (LYS)",
            "Arginine (ARG)",
            "Proline (PRO)",
            "Fatty Acids (FA)",
            "Glycerol (GLY3P)",
            "Cholesterol (CHOL)",
            "Urea (UREA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (GLC)",
                "Fructose (FRU)",
                "Pyruvate (PYR)",
                "Citrate (CIT)",
                "Acetyl-CoA (ACoA)",
                "Malate (MAL)",
                "Glutamine (GLN)",
                "Glutamate (GLU)",
                "Aspartate (ASP)",
                "Alanine (ALA)",
                "Glycine (GLY)",
                "Serine (SER)",
                "Arginine (ARG)"
            ],
            "mismatches": [
                "Lactate (LAC)",
                "Alpha-Ketoglutarate (AKG)",
                "Succinyl-CoA (SUCoA)",
                "Oxaloacetate (OAA)",
                "Threonine (THR)",
                "Lysine (LYS)",
                "Proline (PRO)",
                "Fatty Acids (FA)",
                "Glycerol (GLY3P)",
                "Cholesterol (CHOL)",
                "Urea (UREA)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "2-oxoglutaric acid",
                "3-phosphoglyceric acid",
                "D-threo-isocitrate(3\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "L-threo-isocitrate(3\u2212)",
                "acetyl-CoA",
                "alanine",
                "arginine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "biotin",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "glucose",
                "glutamate(1\u2212)",
                "glutamate(2\u2212)",
                "glutamic acid",
                "glutamine",
                "glycerate",
                "glyceric acid",
                "glycinate",
                "glycine",
                "glycine betaine",
                "homocysteine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "serinate",
                "serine",
                "succinate(1\u2212)",
                "succinic acid",
                "trans-aconitate(3\u2212)",
                "trans-aconitic acid",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (C6H12O6)",
            "Pyruvate (C3H3O3-)",
            "Lactic acid (C3H6O3)",
            "Acetyl-CoA (C23H38N7O17P3S)",
            "Citric acid (C6H8O7)",
            "Alpha-Ketoglutarate (C5H6O5)",
            "Oxaloacetate (C4H4O5)",
            "Malate (C4H6O5)",
            "Fumarate (C4H4O4)",
            "Succinate (C4H6O4)",
            "NAD+ (C21H27N7O14P2)",
            "NADH (C21H28N7O14P2)",
            "FAD (C27H33N9O15P2)",
            "FADH2 (C27H35N9O15P2)",
            "ATP (C10H16N5O13P3)",
            "ADP (C10H15N5O10P2)",
            "AMP (C10H14N5O7P)",
            "Ribose (C5H10O5)",
            "Fructose 1,6-bisphosphate (C6H14O12P2)",
            "Glyceraldehyde 3-phosphate (C3H7O6P)",
            "Dihydroxyacetone phosphate (C3H7O6P)",
            "Urea (CH4N2O)",
            "Glutamine (C5H10N2O3)",
            "Arginine (C6H14N4O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (C6H12O6)",
                "Pyruvate (C3H3O3-)",
                "Acetyl-CoA (C23H38N7O17P3S)",
                "Citric acid (C6H8O7)",
                "Malate (C4H6O5)",
                "Fumarate (C4H4O4)",
                "Succinate (C4H6O4)",
                "NADH (C21H28N7O14P2)",
                "ATP (C10H16N5O13P3)",
                "ADP (C10H15N5O10P2)",
                "Ribose (C5H10O5)",
                "Fructose 1,6-bisphosphate (C6H14O12P2)",
                "Glutamine (C5H10N2O3)",
                "Arginine (C6H14N4O2)"
            ],
            "mismatches": [
                "Lactic acid (C3H6O3)",
                "Alpha-Ketoglutarate (C5H6O5)",
                "Oxaloacetate (C4H4O5)",
                "NAD+ (C21H27N7O14P2)",
                "FAD (C27H33N9O15P2)",
                "FADH2 (C27H35N9O15P2)",
                "AMP (C10H14N5O7P)",
                "Glyceraldehyde 3-phosphate (C3H7O6P)",
                "Dihydroxyacetone phosphate (C3H7O6P)",
                "Urea (CH4N2O)"
            ],
            "true_referents": [
                "2'-deoxyadenosine 5'-monophosphate(2\u2212)",
                "2-oxoglutarate(1\u2212)",
                "3-phosphonato-D-glycerate(3\u2212)",
                "6-phospho-D-gluconic acid",
                "ADP(3\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-fructofuranose 1,6-bisphosphate(4\u2212)",
                "D-fructose 1,6-bisphosphate",
                "D-fructose 6-phosphate",
                "D-glucose 6-phosphate",
                "D-ribose",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate(2\u2212)",
                "N6-(1,2-dicarboxyethyl)-AMP",
                "NAD",
                "NADH",
                "NADP+",
                "acetyl-CoA",
                "adenosine 5'-monophosphate(1+)",
                "adenosine 5'-monophosphate(2\u2212)",
                "aldehydo-D-ribose 5-phosphate(2\u2212)",
                "arginine",
                "aspartate(1\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glyceraldehyde",
                "glycerate",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "pyruvate",
                "pyruvic acid",
                "succinate(1\u2212)",
                "succinic acid",
                "trans-aconitate(3\u2212)",
                "uridine",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Amino Acids (AA)",
            "Glucose (C6H12O6)",
            "Fructose (C6H12O6)",
            "Lactate (C3H6O3)",
            "Pyruvate (C3H4O3)",
            "Citric Acid (C6H8O7)",
            "Acetyl-CoA (AcCoA)",
            "Glycerol (C3H8O3)",
            "Fatty Acids (FAs)",
            "Nucleotides (NTPs)",
            "Cholesterol (C27H46O)",
            "Urea (CH4N2O)",
            "ATP (Adenosine Triphosphate)",
            "NAD+ (Nicotinamide Adenine Dinucleotide)",
            "NADPH (Nicotinamide Adenine Dinucleotide Phosphate)",
            "Coenzyme A (CoA)",
            "Glutathione (GSH)",
            "Sodium Bicarbonate (NaHCO3)",
            "Creatine (C4H9N3O2)",
            "Beta-Hydroxybutyrate (BHB)",
            "Sphingolipids (SLs)",
            "Inositol (C6H12O6)",
            "Lipid (Triglycerides)",
            "Serotonin (5-HT)",
            "Histamine (C5H9N3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose",
                "Fructose",
                "Pyruvate",
                "Citric Acid",
                "Acetyl-CoA",
                "ATP",
                "NADPH"
            ],
            "mismatches": [
                "Amino Acids (AA)",
                "Lactate (C3H6O3)",
                "Glycerol (C3H8O3)",
                "Fatty Acids (FAs)",
                "Nucleotides (NTPs)",
                "Cholesterol (C27H46O)",
                "Urea (CH4N2O)",
                "NAD+ (Nicotinamide Adenine Dinucleotide)",
                "Coenzyme A (CoA)",
                "Glutathione (GSH)",
                "Sodium Bicarbonate (NaHCO3)",
                "Creatine (C4H9N3O2)",
                "Beta-Hydroxybutyrate (BHB)",
                "Sphingolipids (SLs)",
                "Inositol (C6H12O6)",
                "Lipid (Triglycerides)",
                "Serotonin (5-HT)",
                "Histamine (C5H9N3)"
            ],
            "true_referents": [
                "3-phosphoglyceric acid",
                "6-phospho-D-gluconic acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "L-alanine",
                "L-erythro-isocitrate(3\u2212)",
                "L-tyrosine",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "acetyl-CoA",
                "adenosine 5'-monophosphate",
                "aspartate(1\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "cysteine",
                "fructose",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "glucose",
                "glutamic acid",
                "glycerate",
                "glyceric acid",
                "glycine",
                "guanosine",
                "homocysteine",
                "isocitric acid",
                "lipoic acid",
                "purine",
                "pyruvate",
                "pyruvic acid",
                "trans-aconitate(3\u2212)",
                "uridine",
                "\u03b1-D-glucose 6-phosphate(2\u2212)",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 7,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Adenosine Triphosphate (ATP)",
            "Glucose (C6H12O6)",
            "Pyruvate (C3H4O3)",
            "Nicotinamide Adenine Dinucleotide (NAD)",
            "Nicotinamide Adenine Dinucleotide Phosphate (NADP)",
            "Acetyl-CoA (C23H38N7O17P3S)",
            "Oxaloacetate (C4H4O5)",
            "Alpha-ketoglutarate (C5H6O5)",
            "Flavin Adenine Dinucleotide (FAD)",
            "Coenzyme A (CoA)",
            "Guanosine Triphosphate (GTP)",
            "Citrate (C6H8O7)",
            "Succinate (C4H6O4)",
            "Fumarate (C4H4O4)",
            "Malate (C4H6O5)",
            "Glyceraldehyde 3-phosphate (C3H7O6P)",
            "Fructose 1,6-bisphosphate (C6H14O12P2)",
            "3-Phosphoglycerate (C3H7O7P)",
            "Phosphoenolpyruvate (C3H5O6P)",
            "Ribose 5-phosphate (C5H11O8P)",
            "Deoxyribose 5-phosphate (C5H11O7P)",
            "Uridine Triphosphate (UTP)",
            "Cytidine Triphosphate (CTP)",
            "Thymidine Triphosphate (TTP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adenosine Triphosphate (ATP)",
                "Glucose (C6H12O6)",
                "Pyruvate (C3H4O3)",
                "Nicotinamide Adenine Dinucleotide (NAD)",
                "Acetyl-CoA (C23H38N7O17P3S)",
                "Citrate (C6H8O7)",
                "Succinate (C4H6O4)",
                "Fumarate (C4H4O4)",
                "Malate (C4H6O5)",
                "Fructose 1,6-bisphosphate (C6H14O12P2)",
                "3-Phosphoglycerate (C3H7O7P)",
                "Phosphoenolpyruvate (C3H5O6P)",
                "Ribose 5-phosphate (C5H11O8P)"
            ],
            "mismatches": [
                "Nicotinamide Adenine Dinucleotide Phosphate (NADP)",
                "Oxaloacetate (C4H4O5)",
                "Alpha-ketoglutarate (C5H6O5)",
                "Flavin Adenine Dinucleotide (FAD)",
                "Coenzyme A (CoA)",
                "Guanosine Triphosphate (GTP)",
                "Glyceraldehyde 3-phosphate (C3H7O6P)",
                "Deoxyribose 5-phosphate (C5H11O7P)",
                "Uridine Triphosphate (UTP)",
                "Cytidine Triphosphate (CTP)",
                "Thymidine Triphosphate (TTP)"
            ],
            "true_referents": [
                "2'-deoxyadenosine 5'-monophosphate",
                "2'-deoxyadenosine 5'-monophosphate(2\u2212)",
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "3-phosphonato-D-glycerate(3\u2212)",
                "6-phospho-D-gluconic acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-fructofuranose 1,6-bisphosphate(4\u2212)",
                "D-fructose 1,6-bisphosphate",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate(2\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "acetyl-CoA",
                "adenosine 5'-monophosphate",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glyceraldehyde",
                "glycerate",
                "guanosine",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "phosphoenolpyruvic acid",
                "purine",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "ribulose 5-phosphate",
                "succinate(1\u2212)",
                "succinic acid",
                "trans-aconitate(3\u2212)",
                "uridine",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose",
            "Fructose",
            "Pyruvate",
            "Lactate",
            "Acetyl-CoA",
            "Glycerol",
            "Fatty acid",
            "Amino acid",
            "Nucleotide",
            "ATP (Adenosine triphosphate)",
            "NAD+ (Nicotinamide adenine dinucleotide)",
            "NADH (Nicotinamide adenine dinucleotide)",
            "NADPH (Nicotinamide adenine dinucleotide phosphate)",
            "FAD (Flavin adenine dinucleotide)",
            "FADH2 (Flavin adenine dinucleotide)",
            "Coenzyme A",
            "Citrate",
            "Oxaloacetate",
            "Malate",
            "Succinate",
            "Glyceraldehyde 3-phosphate",
            "Ribose-5-phosphate",
            "Uridine diphosphate glucose",
            "S-Adenosylmethionine (SAM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose",
                "Fructose",
                "Pyruvate",
                "Acetyl-CoA",
                "ATP (Adenosine triphosphate)",
                "NADH (Nicotinamide adenine dinucleotide)",
                "NADPH (Nicotinamide adenine dinucleotide phosphate)",
                "Citrate",
                "Malate",
                "Succinate"
            ],
            "mismatches": [
                "Lactate",
                "Glycerol",
                "Fatty acid",
                "Amino acid",
                "Nucleotide",
                "NAD+ (Nicotinamide adenine dinucleotide)",
                "FAD (Flavin adenine dinucleotide)",
                "FADH2 (Flavin adenine dinucleotide)",
                "Coenzyme A",
                "Oxaloacetate",
                "Glyceraldehyde 3-phosphate",
                "Ribose-5-phosphate",
                "Uridine diphosphate glucose",
                "S-Adenosylmethionine (SAM)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate(2\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "S-adenosyl-L-homocysteinate",
                "S-adenosyl-L-homocysteine",
                "acetyl-CoA",
                "adenosine 5'-monophosphate",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose",
                "fructose 1-phosphate",
                "glucose",
                "glutamic acid",
                "glyceraldehyde",
                "glycerate",
                "glyceric acid",
                "glycine",
                "guanosine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "purine",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "ribulose 5-phosphate",
                "succinate(1\u2212)",
                "succinic acid",
                "uridine",
                "\u03b2-D-glucose 1-phosphate(2\u2212)"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (C6H12O6)",
            "Fructose-6-phosphate (F6P)",
            "Pyruvate (C3H4O3)",
            "Acetyl-CoA",
            "Adenosine triphosphate (ATP)",
            "Nicotinamide adenine dinucleotide (NAD+)",
            "Citrate",
            "Oxaloacetate",
            "3-Phosphoglycerate",
            "Ribose-5-phosphate (R5P)",
            "Lactate",
            "Glutamate",
            "Fumarate",
            "Malate",
            "Succinate",
            "Glyceraldehyde-3-phosphate (G3P)",
            "Phosphoenolpyruvate (PEP)",
            "Urea",
            "Ethanolamine",
            "Serine",
            "Methionine",
            "Choline",
            "Sphingosine",
            "Uridine diphosphate glucose (UDP-Glc)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (C6H12O6)",
                "Fructose-6-phosphate (F6P)",
                "Pyruvate (C3H4O3)",
                "Acetyl-CoA",
                "Adenosine triphosphate (ATP)",
                "Citrate",
                "3-Phosphoglycerate",
                "Ribose-5-phosphate (R5P)",
                "Glutamate",
                "Fumarate",
                "Malate",
                "Succinate",
                "Phosphoenolpyruvate (PEP)",
                "Serine",
                "Uridine diphosphate glucose (UDP-Glc)"
            ],
            "mismatches": [
                "Nicotinamide adenine dinucleotide (NAD+)",
                "Oxaloacetate",
                "Lactate",
                "Glyceraldehyde-3-phosphate (G3P)",
                "Urea",
                "Ethanolamine",
                "Methionine",
                "Choline",
                "Sphingosine"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "3-phosphonato-D-glycerate(3\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-fructose 1,6-bisphosphate",
                "D-fructose 6-phosphate",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate(2\u2212)",
                "N6-(1,2-dicarboxyethyl)-AMP",
                "NAD",
                "NADH",
                "NADP+",
                "UDP-\u03b1-D-glucose",
                "UDP-\u03b1-D-xylose",
                "acetyl-CoA",
                "alanine",
                "arginine",
                "biotin",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glyceraldehyde",
                "glycerate",
                "glyceric acid",
                "glycinate",
                "glycine",
                "glycine betaine",
                "homocysteine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "phosphoenolpyruvic acid",
                "pyruvate",
                "pyruvic acid",
                "ribulose 5-phosphate",
                "serinate",
                "serine",
                "spermine",
                "succinate(1\u2212)",
                "succinic acid",
                "uridine",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (Glc)",
            "Adenosine triphosphate (ATP)",
            "Nicotinamide adenine dinucleotide (NAD+)",
            "Acetyl-CoA",
            "Pyruvate",
            "Oxaloacetate",
            "\u03b1-Ketoglutarate",
            "Glutamate",
            "Aspartate",
            "Glycine",
            "Serine",
            "Alanine",
            "Glutamine",
            "Citrate",
            "Succinate",
            "Fumarate",
            "Malate",
            "Ribose 5-phosphate",
            "Fructose 6-phosphate",
            "Glyceraldehyde 3-phosphate",
            "3-Phosphoglycerate",
            "Phosphoenolpyruvate (PEP)",
            "Coenzyme A (CoA)",
            "Flavin adenine dinucleotide (FAD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (Glc)",
                "Adenosine triphosphate (ATP)",
                "Acetyl-CoA",
                "Pyruvate",
                "Glutamate",
                "Aspartate",
                "Glycine",
                "Serine",
                "Alanine",
                "Glutamine",
                "Citrate",
                "Succinate",
                "Fumarate",
                "Malate",
                "Ribose 5-phosphate",
                "Fructose 6-phosphate",
                "3-Phosphoglycerate",
                "Phosphoenolpyruvate (PEP)"
            ],
            "mismatches": [
                "Nicotinamide adenine dinucleotide (NAD+)",
                "Oxaloacetate",
                "\u03b1-Ketoglutarate",
                "Glyceraldehyde 3-phosphate",
                "Coenzyme A (CoA)",
                "Flavin adenine dinucleotide (FAD)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "3-phosphonato-D-glycerate(3\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-fructose 1,6-bisphosphate",
                "D-fructose 6-phosphate",
                "D-ribose 5-phosphate",
                "D-ribulose 5-phosphate(2\u2212)",
                "L-alaninate",
                "L-alanine",
                "NAD",
                "NADH",
                "NADP+",
                "acetyl-CoA",
                "alanine",
                "arginine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glyceraldehyde",
                "glycerate",
                "glycinate",
                "glycine",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "phosphoenolpyruvic acid",
                "pyruvate",
                "pyruvic acid",
                "riboflavin",
                "ribulose 5-phosphate",
                "serinate",
                "serine",
                "succinate(1\u2212)",
                "succinic acid",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (Glc)",
            "Adenosine triphosphate (ATP)",
            "Guanosine triphosphate (GTP)",
            "Nicotinamide adenine dinucleotide (NAD+)",
            "Nicotinamide adenine dinucleotide phosphate (NADP+)",
            "Coenzyme A (CoA)",
            "Acetyl-CoA",
            "Pyruvate",
            "Oxaloacetate",
            "Citrate",
            "\u03b1-Ketoglutarate",
            "Succinyl-CoA",
            "Succinate",
            "Fumarate",
            "Malate",
            "Glutamate",
            "Glutamine",
            "Aspartate",
            "Alanine",
            "Serine",
            "Glycine",
            "Cysteine",
            "Methionine",
            "Threonine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (Glc)",
                "Adenosine triphosphate (ATP)",
                "Nicotinamide adenine dinucleotide phosphate (NADP+)",
                "Acetyl-CoA",
                "Pyruvate",
                "Citrate",
                "Succinate",
                "Fumarate",
                "Malate",
                "Glutamate",
                "Glutamine",
                "Aspartate",
                "Alanine",
                "Serine",
                "Glycine",
                "Cysteine"
            ],
            "mismatches": [
                "Guanosine triphosphate (GTP)",
                "Nicotinamide adenine dinucleotide (NAD+)",
                "Coenzyme A (CoA)",
                "Oxaloacetate",
                "\u03b1-Ketoglutarate",
                "Succinyl-CoA",
                "Methionine",
                "Threonine"
            ],
            "true_referents": [
                "2'-deoxyadenosine 5'-monophosphate",
                "2-oxoglutarate(1\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-cysteinium",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "acetyl-CoA",
                "adenosine 5'-monophosphate",
                "alanine",
                "arginine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "cysteine",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glycerate",
                "glycinate",
                "glycine",
                "guanosine",
                "homocysteine",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "serinate",
                "serine",
                "succinate(1\u2212)",
                "succinic acid",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (Glc)",
            "Adenosine triphosphate (ATP)",
            "Nicotinamide adenine dinucleotide (NAD+)",
            "Nicotinamide adenine dinucleotide phosphate (NADP+)",
            "Pyruvate (Pyr)",
            "Acetyl-CoA (AcCoA)",
            "Citrate",
            "Oxaloacetate (OAA)",
            "\u03b1-Ketoglutarate (\u03b1-KG)",
            "Succinyl-CoA (SucCoA)",
            "Succinate (Suc)",
            "Fumarate (Fum)",
            "Malate (Mal)",
            "Glutamate (Glu)",
            "Glutamine (Gln)",
            "Aspartate (Asp)",
            "Alanine (Ala)",
            "Glycerol-3-phosphate (G3P)",
            "Fatty acids (e.g., Palmitate)",
            "Amino acids (e.g., Leucine, Isoleucine, Valine)",
            "Nucleotides (e.g., ATP, GTP, CTP, UTP)",
            "Coenzymes (e.g., Thiamine pyrophosphate, Flavin adenine dinucleotide)",
            "Lipids (e.g., Triglycerides, Phospholipids)",
            "Carbohydrates (e.g., Glycogen, Cellulose)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP",
                "NADP+",
                "pyruvate",
                "acetyl-CoA",
                "citrate",
                "succinate(1\u2212)",
                "fumarate(1\u2212)",
                "malate(2\u2212)",
                "glutamate(1\u2212)",
                "glutamine",
                "aspartate(1\u2212)",
                "alanine",
                "glucose"
            ],
            "mismatches": [
                "oxaloacetate",
                "\u03b1-ketoglutarate",
                "succinyl-CoA",
                "glycerol-3-phosphate",
                "fatty acids (e.g., palmitate)",
                "amino acids (e.g., leucine, isoleucine, valine)",
                "nucleotides (e.g., ATP, GTP, CTP, UTP)",
                "coenzymes (e.g., thiamine pyrophosphate, flavin adenine dinucleotide)",
                "lipids (e.g., triglycerides, phospholipids)",
                "carbohydrates (e.g., glycogen, cellulose)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "3-phosphoglyceric acid",
                "3-phosphonato-D-glycerate(3\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "acetyl-CoA",
                "alanine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "fructose",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamate(2\u2212)",
                "glutamic acid",
                "glutamine",
                "glyceric acid",
                "glycine",
                "guanosine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "purine",
                "pyruvate",
                "pyruvic acid",
                "succinate(1\u2212)",
                "succinic acid",
                "thiamine(1+) diphosphate",
                "trans-aconitate(3\u2212)",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)",
                "\u03b2-alanine"
            ],
            "TP": 13,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (Glc)",
            "Fructose (Fru)",
            "Galactose (Gal)",
            "Pyruvate (Pyr)",
            "Lactate (Lac)",
            "Acetyl-CoA (AcCoA)",
            "Citrate (Cit)",
            "Oxaloacetate (OAA)",
            "Fumarate (Fum)",
            "Malate (Mal)",
            "Glutamate (Glu)",
            "Glutamine (Gln)",
            "Aspartate (Asp)",
            "Alanine (Ala)",
            "Glycine (Gly)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Tryptophan (Trp)",
            "Arginine (Arg)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (Glc)",
                "Fructose (Fru)",
                "Galactose (Gal)",
                "Pyruvate (Pyr)",
                "Acetyl-CoA (AcCoA)",
                "Citrate (Cit)",
                "Fumarate (Fum)",
                "Malate (Mal)",
                "Glutamate (Glu)",
                "Glutamine (Gln)",
                "Aspartate (Asp)",
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Serine (Ser)",
                "Tyrosine (Tyr)",
                "Arginine (Arg)"
            ],
            "mismatches": [
                "Lactate (Lac)",
                "Oxaloacetate (OAA)",
                "Threonine (Thr)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "D-threo-isocitrate(3\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "L-galactose 1-phosphate(2\u2212)",
                "L-threo-isocitrate(3\u2212)",
                "L-tyrosinate(1\u2212)",
                "L-tyrosinate(2\u2212)",
                "L-tyrosine",
                "acetyl-CoA",
                "alanine",
                "arginine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "fructose",
                "fructose 1-phosphate",
                "fumarate(1\u2212)",
                "fumaric acid",
                "galactose",
                "glucose",
                "glutamate(1\u2212)",
                "glutamate(2\u2212)",
                "glutamic acid",
                "glutamine",
                "glycerate",
                "glycinate",
                "glycine",
                "glycine betaine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "serinate",
                "serine",
                "succinate(1\u2212)",
                "trans-aconitate(3\u2212)",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)",
                "\u03b2-L-galactose 1-phosphate(2\u2212)",
                "\u03b2-alanine"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Adenosine triphosphate (ATP)",
            "Glucose",
            "Glycogen",
            "Lactic acid",
            "Pyruvate",
            "Acetyl-CoA",
            "Citrate",
            "Isocitrate",
            "Alpha-ketoglutarate",
            "Succinyl-CoA",
            "Succinate",
            "Fumarate",
            "Malate",
            "Oxaloacetate",
            "Aspartate",
            "Glutamate",
            "Glutathione",
            "Coenzyme A",
            "NAD+",
            "NADH",
            "FAD",
            "FADH2",
            "Coenzyme Q",
            "ATP synthase",
            "Glyceraldehyde 3-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose",
                "Pyruvate",
                "Acetyl-CoA",
                "Citrate",
                "Succinate",
                "Fumarate",
                "Malate",
                "Aspartate",
                "Glutamate",
                "NADH"
            ],
            "mismatches": [
                "Adenosine triphosphate (ATP)",
                "Glycogen",
                "Lactic acid",
                "Isocitrate",
                "Alpha-ketoglutarate",
                "Succinyl-CoA",
                "Oxaloacetate",
                "Glutathione",
                "Coenzyme A",
                "NAD+",
                "FAD",
                "FADH2",
                "Coenzyme Q",
                "ATP synthase",
                "Glyceraldehyde 3-phosphate"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-threo-isocitrate(3\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "NAD",
                "NADH",
                "NADP+",
                "NADPH",
                "S-adenosyl-L-homocysteinate",
                "S-adenosyl-L-homocysteine",
                "acetyl-CoA",
                "aldehydo-D-ribose 5-phosphate(2\u2212)",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "citric acid",
                "cysteine",
                "fructose",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamic acid",
                "glutamine",
                "glycerate",
                "glyceric acid",
                "glycinate",
                "homocysteine",
                "isocitric acid",
                "lipoate",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "succinate(1\u2212)",
                "succinic acid",
                "\u03b2-D-glucose 1-phosphate(2\u2212)"
            ],
            "TP": 10,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            "Glucose (Glc)",
            "Pyruvate (PYR)",
            "Acetyl-CoA (AcCoA)",
            "Oxaloacetate (OAA)",
            "Alpha-Ketoglutarate (\u03b1-KG)",
            "Citrate",
            "Isocitrate",
            "Alpha-Ketoglutarate (\u03b1-KG)",
            "Oxoglutarate",
            "Fumarate",
            "Malate",
            "Glutamate (Glu)",
            "Aspartate (Asp)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Lysine (Lys)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Trytophan (Trp)",
            "Glycine (Gly)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Asparagine (Asn)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose (Glc)",
                "Pyruvate (PYR)",
                "Acetyl-CoA (AcCoA)",
                "Citrate",
                "Isocitrate",
                "Fumarate",
                "Malate",
                "Glutamate (Glu)",
                "Aspartate (Asp)",
                "Alanine (Ala)",
                "Tyrosine (Tyr)",
                "Glycine (Gly)",
                "Serine (Ser)"
            ],
            "mismatches": [
                "Oxaloacetate (OAA)",
                "Alpha-Ketoglutarate (\u03b1-KG)",
                "Oxoglutarate",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Lysine (Lys)",
                "Phenylalanine (Phe)",
                "Trytophan (Trp)",
                "Threonine (Thr)",
                "Asparagine (Asn)"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "2-oxoglutaric acid",
                "ADP(3\u2212)",
                "D-threo-isocitrate(3\u2212)",
                "L-alaninate",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "L-threo-isocitrate(3\u2212)",
                "L-tyrosinate(1\u2212)",
                "L-tyrosinate(2\u2212)",
                "L-tyrosine",
                "acetyl-CoA",
                "alanine",
                "arginine",
                "aspartate(1\u2212)",
                "aspartate(2\u2212)",
                "aspartic acid",
                "citrate(1\u2212)",
                "citrate(2\u2212)",
                "citrate(3\u2212)",
                "fumarate(1\u2212)",
                "fumaric acid",
                "glucose",
                "glutamate(1\u2212)",
                "glutamate(2\u2212)",
                "glutamic acid",
                "glycerate",
                "glycinate",
                "glycine",
                "glycine betaine",
                "isocitric acid",
                "malate(2\u2212)",
                "malic acid",
                "oxalatosuccinate(3\u2212)",
                "oxalosuccinic acid",
                "phosphoenolpyruvate",
                "pyruvate",
                "pyruvic acid",
                "serinate",
                "serine",
                "trans-aconitate(3\u2212)",
                "\u03b2-D-glucose 1-phosphate(2\u2212)",
                "\u03b2-D-glucose 6-phosphate(2\u2212)",
                "\u03b2-alanine"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "fundamental metabolite",
        "domain": "biology",
        "response": [
            {
                "Referent": "Acetyl-CoA",
                "Canonical Name": "(CH3)2CHCO2H"
            },
            {
                "Referent": "Adenosine triphosphate",
                "Canonical Name": "C10H16N5O13P3"
            },
            {
                "Referent": "Alanine",
                "Canonical Name": "C3H7NO2"
            },
            {
                "Referent": "Alkaloid",
                "Canonical Name": "(C21H29NO)N"
            },
            {
                "Referent": "Amino acid",
                "Canonical Name": "(CH2)nNH2"
            },
            {
                "Referent": "Amino acid",
                "Canonical Name": "(CH2)nNH2"
            },
            {
                "Referent": "Aspartate",
                "Canonical Name": "C4H7NO4"
            },
            {
                "Referent": "ATP",
                "Canonical Name": "C10H16N5O13P3"
            },
            {
                "Referent": "Bile acid",
                "Canonical Name": "C24H40O2"
            },
            {
                "Referent": "Caffeine",
                "Canonical Name": "C8H10N4O2"
            },
            {
                "Referent": "Cholesterol",
                "Canonical Name": "C27H46O"
            },
            {
                "Referent": "Corticosterone",
                "Canonical Name": "C21H30O4"
            },
            {
                "Referent": "Cytidine",
                "Canonical Name": "C10H14N4O4"
            },
            {
                "Referent": "Deoxycorticosterone",
                "Canonical Name": "C21H30O4"
            },
            {
                "Referent": "Erythritol",
                "Canonical Name": "C4H10O4"
            },
            {
                "Referent": "Glucose",
                "Canonical Name": "C6H12O6"
            },
            {
                "Referent": "Guanine",
                "Canonical Name": "C5H5N5O"
            },
            {
                "Referent": "Hemoglobin",
                "Canonical Name": "C34H46FeN4O4"
            },
            {
                "Referent": "Histamine",
                "Canonical Name": "C5H9N3"
            },
            {
                "Referent": "Isoleucine",
                "Canonical Name": "C3H7NO2"
            },
            {
                "Referent": "Leucine",
                "Canonical Name": "C6H13NO2"
            },
            {
                "Referent": "Lysine",
                "Canonical Name": "C6H14N2O2"
            },
            {
                "Referent": "Methionine",
                "Canonical Name": "C5H11NO2S"
            },
            {
                "Referent": "Nicotine",
                "Canonical Name": "C10H14N2"
            },
            {
                "Referent": "Norepinephrine",
                "Canonical Name": "C8H11NO3"
            },
            {
                "Referent": "Nucleotide",
                "Canonical Name": "C10H14N5O13P3"
            },
            {
                "Referent": "Phenylalanine",
                "Canonical Name": "C9H11NO2"
            },
            {
                "Referent": "Phenylalanine",
                "Canonical Name": "C9H11NO2"
            },
            {
                "Referent": "Proline",
                "Canonical Name": "C5H9NO2"
            },
            {
                "Referent": "Protein",
                "Canonical Name": "(C21H29NO)N"
            },
            {
                "Referent": "Serine",
                "Canonical Name": "C3H7NO2"
            },
            {
                "Referent": "Steroids",
                "Canonical Name": "(C21H30O4)n"
            },
            {
                "Referent": "Succinyl-CoA",
                "Canonical Name": "(CH3)2CHCO2CH2CO2H"
            },
            {
                "Referent": "Threonine",
                "Canonical Name": "C3H7NO2"
            },
            {
                "Referent": "Tyrosine",
                "Canonical Name": "C9H11NO3"
            },
            {
                "Referent": "Uridine",
                "Canonical Name": "C10H12N4O4"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetyl-CoA",
                "ATP",
                "glucose",
                "uridine"
            ],
            "mismatches": [
                "alanine",
                "alkaloid",
                "amino acid",
                "aspartate",
                "bile acid",
                "caffeine",
                "cholesterol",
                "corticosterone",
                "cytidine",
                "deoxycorticosterone",
                "erythritol",
                "guanine",
                "hemoglobin",
                "histamine",
                "isoleucine",
                "leucine",
                "lysine",
                "methionine",
                "nicotine",
                "norepinephrine",
                "nucleotide",
                "phenylalanine",
                "proline",
                "protein",
                "serine",
                "steroids",
                "succinyl-CoA",
                "threonine",
                "tyrosine"
            ],
            "true_referents": [
                "2-oxoglutarate(1\u2212)",
                "6-phospho-D-gluconic acid",
                "ATP",
                "ATP(3\u2212)",
                "ATP(4\u2212)",
                "D-aspartate(1\u2212)",
                "L-alanine",
                "L-aspartate(1\u2212)",
                "L-cysteinate(1\u2212)",
                "L-erythro-isocitrate(3\u2212)",
                "L-tyrosinate(1\u2212)",
                "L-tyrosinate(2\u2212)",
                "L-xylulose 5-phosphate(2\u2212)",
                "acetyl-CoA",
                "aspartate(1\u2212)",
                "citric acid",
                "fructose",
                "fumarate(1\u2212)",
                "glucose",
                "glutamate(1\u2212)",
                "glyceric acid",
                "glycine",
                "guanosine",
                "purine",
                "pyruvate",
                "succinate(1\u2212)",
                "trans-aconitate(3\u2212)",
                "uridine",
                "\u03b2-D-glucose 1-phosphate(2\u2212)"
            ],
            "TP": 4,
            "FP": 29,
            "FN": 0
        }
    }
]