[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Valine (Val)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Histidine (His)",
            "Arginine (Arg)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Alanine (Ala)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Valine (Val)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Histidine (His)",
                "Arginine (Arg)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Alanine (Ala)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Proline (Pro)",
                "Serine (Ser)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg) - essential in infants",
            "Cysteine (Cys) - semi-essential",
            "Glycine (Gly) - semi-essential",
            "Proline (Pro) - semi-essential",
            "Tyrosine (Tyr) - semi-essential",
            "Alanine (Ala)",
            "Asparagine (Asn)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Glutamine (Gln)",
            "Serine (Ser)",
            "Glutamate (Glu)",
            "Aspartate (Asp)",
            "Carnitine",
            "Creatine",
            "Choline"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg) - essential in infants",
                "Cysteine (Cys) - semi-essential",
                "Glycine (Gly) - semi-essential",
                "Proline (Pro) - semi-essential",
                "Tyrosine (Tyr) - semi-essential",
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Serine (Ser)",
                "Glutamate (Glu)",
                "Aspartate (Asp)",
                "Carnitine",
                "Creatine"
            ],
            "mismatches": [
                "Choline"
            ],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg) - essential in infants",
                "Asparagine (Asn)",
                "Aspartate (Asp)",
                "Aspartic acid (Asp)",
                "Carnitine",
                "Creatine",
                "Cysteine (Cys) - semi-essential",
                "Glutamate (Glu)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly) - semi-essential",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro) - semi-essential",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr) - semi-essential",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Arginine (Arg)",
            "Asparagine (Asn)",
            "Aspartic acid (Asp)",
            "Cysteine (Cys)",
            "Glutamic acid (Glu)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Valine (Val)",
            "Histidine (His)",
            "Methionine (Met)",
            "Threonine (Thr)",
            "Tryptophan (Trp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg) - conditionally essential",
            "Cysteine (Cys) - conditionally essential",
            "Tyrosine (Tyr) - conditionally essential",
            "Proline (Pro) - conditionally essential",
            "Glutamine (Gln) - conditionally essential",
            "Glycine (Gly) - conditionally essential",
            "Serine (Ser) - conditionally essential",
            "Asparagine (Asn) - conditionally essential",
            "Alanine (Ala) - conditionally essential",
            "Aspartic Acid (Asp) - conditionally essential",
            "Glutamic Acid (Glu) - conditionally essential",
            "Human dietary requirement for Histidine",
            "Infant dietary requirement for Histidine",
            "Branched-chain amino acids (BCAAs) - Leucine, Isoleucine, Valine",
            "Aromatic amino acids - Phenylalanine, Tyrosine, Tryptophan"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg) - conditionally essential",
                "Cysteine (Cys) - conditionally essential",
                "Tyrosine (Tyr) - conditionally essential",
                "Proline (Pro) - conditionally essential",
                "Glutamine (Gln) - conditionally essential",
                "Glycine (Gly) - conditionally essential",
                "Serine (Ser) - conditionally essential",
                "Asparagine (Asn) - conditionally essential",
                "Alanine (Ala) - conditionally essential",
                "Aspartic Acid (Asp) - conditionally essential",
                "Glutamic Acid (Glu) - conditionally essential",
                "Human dietary requirement for Histidine",
                "Infant dietary requirement for Histidine",
                "Branched-chain amino acids (BCAAs) - Leucine, Isoleucine, Valine",
                "Aromatic amino acids - Phenylalanine, Tyrosine, Tryptophan"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala) - conditionally essential",
                "Arginine (Arg) - conditionally essential",
                "Aromatic amino acids - Phenylalanine, Tyrosine, Tryptophan",
                "Asparagine (Asn) - conditionally essential",
                "Aspartic Acid (Asp) - conditionally essential",
                "Branched-chain amino acids (BCAAs) - Leucine, Isoleucine, Valine",
                "Cysteine (Cys) - conditionally essential",
                "Glutamic Acid (Glu) - conditionally essential",
                "Glutamine (Gln) - conditionally essential",
                "Glycine (Gly) - conditionally essential",
                "Histidine (His)",
                "Human dietary requirement for Histidine",
                "Infant dietary requirement for Histidine",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro) - conditionally essential",
                "Serine (Ser) - conditionally essential",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr) - conditionally essential",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "mismatches": [],
            "true_referents": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Valine (Val)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Histidine (His)",
            "Arginine (Arg)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Glutamine (Gln)",
            "Asparagine (Asn)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Ornithine (Orn)",
            "Citrulline (Cit)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Valine (Val)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Histidine (His)",
                "Arginine (Arg)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Glutamine (Gln)",
                "Asparagine (Asn)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Ornithine (Orn)",
                "Citrulline (Cit)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Citrulline (Cit)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg)",
            "Tyrosine (Tyr)",
            "Cysteine (Cys)",
            "Glycine (Gly)",
            "Glutamine (Gln)",
            "Asparagine (Asn)",
            "Serine (Ser)",
            "Proline (Pro)",
            "Alanine (Ala)",
            "Glutamic acid (Glu)",
            "Aspartic acid (Asp)",
            "Selenocysteine (Sec)",
            "Pyrolysine (Pyl)",
            "Ornithine (Orn)",
            "Citrulline (Cit)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg)",
                "Tyrosine (Tyr)",
                "Cysteine (Cys)",
                "Glycine (Gly)",
                "Glutamine (Gln)",
                "Asparagine (Asn)",
                "Serine (Ser)",
                "Proline (Pro)",
                "Alanine (Ala)",
                "Glutamic acid (Glu)",
                "Aspartic acid (Asp)",
                "Selenocysteine (Sec)",
                "Pyrolysine (Pyl)",
                "Ornithine (Orn)",
                "Citrulline (Cit)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Citrulline (Cit)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrolysine (Pyl)",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Phenylalanine (Phe)",
            "Valine (Val)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Methionine (Met)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Lysine (Lys)",
            "Histidine (His)",
            "Arginine (Arg)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Alanine (Ala)",
            "Asparagine (Asn)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Glycine (Gly)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Carnitine",
            "Ornithine",
            "Taurine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylalanine (Phe)",
                "Valine (Val)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Methionine (Met)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Lysine (Lys)",
                "Histidine (His)",
                "Arginine (Arg)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Glycine (Gly)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Carnitine",
                "Ornithine",
                "Taurine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Carnitine",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Taurine",
                "Threonine (Thr)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Taurine (Tau)",
            "Beta-Alanine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Taurine (Tau)",
                "Beta-Alanine"
            ],
            "mismatches": [],
            "true_referents": [
                "Arginine (Arg)",
                "Beta-Alanine",
                "Cysteine (Cys)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Taurine (Tau)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 14,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His, H)",
            "Isoleucine (Ile, I)",
            "Leucine (Leu, L)",
            "Lysine (Lys, K)",
            "Methionine (Met, M)",
            "Phenylalanine (Phe, F)",
            "Threonine (Thr, T)",
            "Tryptophan (Trp, W)",
            "Valine (Val, V)",
            "Arginine (Arg, R)",
            "Cysteine (Cys, C)",
            "Glycine (Gly, G)",
            "Proline (Pro, P)",
            "Tyrosine (Tyr, Y)",
            "Glutamine (Gln, Q)",
            "Asparagine (Asn, N)",
            "Selenocysteine (Sec, U)",
            "Pyrrolysine (Pyl, O)",
            "2-Aminoisobutyric acid (Aib)",
            "Norvaline (Nva)",
            "Norleucine (Nle)",
            "Ornithine (Orn)",
            "Citrulline (Cit)",
            "Homocysteine (Hcy)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His, H)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Lysine (Lys, K)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Threonine (Thr, T)",
                "Tryptophan (Trp, W)",
                "Valine (Val, V)",
                "Arginine (Arg, R)",
                "Cysteine (Cys, C)",
                "Glycine (Gly, G)",
                "Proline (Pro, P)",
                "Tyrosine (Tyr, Y)",
                "Glutamine (Gln, Q)",
                "Asparagine (Asn, N)",
                "Selenocysteine (Sec, U)",
                "Pyrrolysine (Pyl, O)",
                "2-Aminoisobutyric acid (Aib)",
                "Norvaline (Nva)",
                "Norleucine (Nle)",
                "Ornithine (Orn)",
                "Citrulline (Cit)",
                "Homocysteine (Hcy)"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Aminoisobutyric acid (Aib)",
                "Arginine (Arg, R)",
                "Asparagine (Asn, N)",
                "Citrulline (Cit)",
                "Cysteine (Cys, C)",
                "Glutamine (Gln, Q)",
                "Glycine (Gly, G)",
                "Histidine (His, H)",
                "Homocysteine (Hcy)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Lysine (Lys, K)",
                "Methionine (Met, M)",
                "Norleucine (Nle)",
                "Norvaline (Nva)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe, F)",
                "Proline (Pro, P)",
                "Pyrrolysine (Pyl, O)",
                "Selenocysteine (Sec, U)",
                "Threonine (Thr, T)",
                "Tryptophan (Trp, W)",
                "Tyrosine (Tyr, Y)",
                "Valine (Val, V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "mismatches": [],
            "true_referents": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg)"
            ],
            "mismatches": [],
            "true_referents": [
                "Arginine (Arg)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Valine (Val)",
            "Arginine (Arg)",
            "Cysteine (Cys)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Tyrosine (Tyr)",
            "Alanine (Ala)",
            "Aspartic acid (Asp)",
            "Asparagine (Asn)",
            "Glutamic acid (Glu)",
            "Hydroxyproline (Hyp)",
            "Hydroxylysine (Hyl)",
            "Ornithine (Orn)",
            "Pyrrolysine (Pyl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Valine (Val)",
                "Arginine (Arg)",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Tyrosine (Tyr)",
                "Alanine (Ala)",
                "Aspartic acid (Asp)",
                "Asparagine (Asn)",
                "Glutamic acid (Glu)",
                "Hydroxyproline (Hyp)",
                "Hydroxylysine (Hyl)",
                "Ornithine (Orn)",
                "Pyrrolysine (Pyl)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Hydroxylysine (Hyl)",
                "Hydroxyproline (Hyp)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala or A)",
            "Arginine (Arg or R)",
            "Asparagine (Asn or N)",
            "Aspartic acid (Asp or D)",
            "Cysteine (Cys or C)",
            "Glutamic acid (Glu or E)",
            "Glutamine (Gln or Q)",
            "Glycine (Gly or G)",
            "Histidine (His or H)",
            "Isoleucine (Ile or I)",
            "Leucine (Leu or L)",
            "Lysine (Lys or K)",
            "Methionine (Met or M)",
            "Phenylalanine (Phe or F)",
            "Proline (Pro or P)",
            "Serine (Ser or S)",
            "Threonine (Thr or T)",
            "Tryptophan (Trp or W)",
            "Tyrosine (Tyr or Y)",
            "Valine (Val or V)",
            "Amino acid building blocks of proteins",
            "Protein synthesis precursors",
            "Nutrient essential for human health",
            "Amino acid supplement for athletes",
            "Key component of neurotransmitters",
            "Essential for collagen synthesis",
            "Amino acid found in breast milk",
            "Important for muscle growth and repair",
            "Found in many plant-based foods",
            "Amino acid involved in detoxification",
            "Important for immune system function",
            "Found in many animal-based foods",
            "Amino acid involved in neurotransmission",
            "Essential for skin and hair health",
            "Found in many fermented foods",
            "Amino acid involved in energy metabolism"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala or A)",
                "Arginine (Arg or R)",
                "Asparagine (Asn or N)",
                "Aspartic acid (Asp or D)",
                "Cysteine (Cys or C)",
                "Glutamic acid (Glu or E)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Histidine (His or H)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Lysine (Lys or K)",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Tryptophan (Trp or W)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)",
                "Amino acid building blocks of proteins",
                "Protein synthesis precursors",
                "Nutrient essential for human health",
                "Amino acid supplement for athletes"
            ],
            "mismatches": [
                "Key component of neurotransmitters",
                "Essential for collagen synthesis",
                "Amino acid found in breast milk",
                "Important for muscle growth and repair",
                "Found in many plant-based foods",
                "Amino acid involved in detoxification",
                "Important for immune system function",
                "Found in many animal-based foods",
                "Amino acid involved in neurotransmission",
                "Essential for skin and hair health",
                "Found in many fermented foods",
                "Amino acid involved in energy metabolism"
            ],
            "true_referents": [
                "Alanine (Ala or A)",
                "Amino acid building blocks of proteins",
                "Amino acid supplement for athletes",
                "Arginine (Arg or R)",
                "Asparagine (Asn or N)",
                "Aspartic acid (Asp or D)",
                "Cysteine (Cys or C)",
                "Glutamic acid (Glu or E)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Histidine (His or H)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Lysine (Lys or K)",
                "Methionine (Met or M)",
                "Nutrient essential for human health",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Protein synthesis precursors",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Tryptophan (Trp or W)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)"
            ],
            "TP": 24,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            "Histidine (H)",
            "Isoleucine (I)",
            "Leucine (L)",
            "Lysine (K)",
            "Methionine (M)",
            "Phenylalanine (F)",
            "Threonine (T)",
            "Tryptophan (W)",
            "Valine (V)",
            "Arginine (R)",
            "Tyrosine (Y)",
            "Alanine (A)",
            "Asparagine (N)",
            "Aspartic acid (D)",
            "Cysteine (C)",
            "Glutamic acid (E)",
            "Glutamine (Q)",
            "Glycine (G)",
            "Proline (P)",
            "Serine (S)",
            "Prolin (O)",
            "Hydroxyproline (O)",
            "Ornithine",
            "Citrulline",
            "Homocysteine",
            "Taurine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Histidine (H)",
                "Isoleucine (I)",
                "Leucine (L)",
                "Lysine (K)",
                "Methionine (M)",
                "Phenylalanine (F)",
                "Threonine (T)",
                "Tryptophan (W)",
                "Valine (V)",
                "Arginine (R)",
                "Tyrosine (Y)",
                "Alanine (A)",
                "Asparagine (N)",
                "Aspartic acid (D)",
                "Cysteine (C)",
                "Glutamic acid (E)",
                "Glutamine (Q)",
                "Glycine (G)",
                "Proline (P)",
                "Serine (S)",
                "Prolin (O)",
                "Hydroxyproline (O)",
                "Ornithine",
                "Citrulline"
            ],
            "mismatches": [
                "Homocysteine",
                "Taurine"
            ],
            "true_referents": [
                "Alanine (A)",
                "Arginine (R)",
                "Asparagine (N)",
                "Aspartic acid (D)",
                "Citrulline",
                "Cysteine (C)",
                "Glutamic acid (E)",
                "Glutamine (Q)",
                "Glycine (G)",
                "Histidine (H)",
                "Hydroxyproline (O)",
                "Isoleucine (I)",
                "Leucine (L)",
                "Lysine (K)",
                "Methionine (M)",
                "Ornithine",
                "Phenylalanine (F)",
                "Prolin (O)",
                "Proline (P)",
                "Serine (S)",
                "Threonine (T)",
                "Tryptophan (W)",
                "Tyrosine (Y)",
                "Valine (V)"
            ],
            "TP": 24,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "essential amino acid",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Alanine",
                "Canonical name": "(Ala, A)"
            },
            {
                "Referent": "Arginine",
                "Canonical name": "(Arg, R)"
            },
            {
                "Referent": "Asparagine",
                "Canonical name": "(Asn, N)"
            },
            {
                "Referent": "Aspartic acid",
                "Canonical name": "(Asp, D)"
            },
            {
                "Referent": "Cysteine",
                "Canonical name": "(Cys, C)"
            },
            {
                "Referent": "Glutamic acid",
                "Canonical name": "(Glu, E)"
            },
            {
                "Referent": "Glutamine",
                "Canonical name": "(Gln, Q)"
            },
            {
                "Referent": "Glycine",
                "Canonical name": "(Gly, G)"
            },
            {
                "Referent": "Histidine",
                "Canonical name": "(His, H)"
            },
            {
                "Referent": "Isoleucine",
                "Canonical name": "(Ile, I)"
            },
            {
                "Referent": "Leucine",
                "Canonical name": "(Leu, L)"
            },
            {
                "Referent": "Lysine",
                "Canonical name": "(Lys, K)"
            },
            {
                "Referent": "Methionine",
                "Canonical name": "(Met, M)"
            },
            {
                "Referent": "Phenylalanine",
                "Canonical name": "(Phe, F)"
            },
            {
                "Referent": "Proline",
                "Canonical name": "(Pro, P)"
            },
            {
                "Referent": "Serine",
                "Canonical name": "(Ser, S)"
            },
            {
                "Referent": "Threonine",
                "Canonical name": "(Thr, T)"
            },
            {
                "Referent": "Tryptophan",
                "Canonical name": "(Trp, W)"
            },
            {
                "Referent": "Tyrosine",
                "Canonical name": "(Tyr, Y)"
            },
            {
                "Referent": "Valine",
                "Canonical name": "(Val, V)"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine",
                "Arginine",
                "Asparagine",
                "Aspartic acid",
                "Cysteine",
                "Glutamic acid",
                "Glutamine",
                "Glycine",
                "Histidine",
                "Isoleucine",
                "Leucine",
                "Lysine",
                "Methionine",
                "Phenylalanine",
                "Proline",
                "Serine",
                "Threonine",
                "Tryptophan",
                "Tyrosine",
                "Valine"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"Alanine\", \"Canonical name\": \"(Ala, A)\"}",
                "{\"Referent\": \"Arginine\", \"Canonical name\": \"(Arg, R)\"}",
                "{\"Referent\": \"Asparagine\", \"Canonical name\": \"(Asn, N)\"}",
                "{\"Referent\": \"Aspartic acid\", \"Canonical name\": \"(Asp, D)\"}",
                "{\"Referent\": \"Cysteine\", \"Canonical name\": \"(Cys, C)\"}",
                "{\"Referent\": \"Glutamic acid\", \"Canonical name\": \"(Glu, E)\"}",
                "{\"Referent\": \"Glutamine\", \"Canonical name\": \"(Gln, Q)\"}",
                "{\"Referent\": \"Glycine\", \"Canonical name\": \"(Gly, G)\"}",
                "{\"Referent\": \"Histidine\", \"Canonical name\": \"(His, H)\"}",
                "{\"Referent\": \"Isoleucine\", \"Canonical name\": \"(Ile, I)\"}",
                "{\"Referent\": \"Leucine\", \"Canonical name\": \"(Leu, L)\"}",
                "{\"Referent\": \"Lysine\", \"Canonical name\": \"(Lys, K)\"}",
                "{\"Referent\": \"Methionine\", \"Canonical name\": \"(Met, M)\"}",
                "{\"Referent\": \"Phenylalanine\", \"Canonical name\": \"(Phe, F)\"}",
                "{\"Referent\": \"Proline\", \"Canonical name\": \"(Pro, P)\"}",
                "{\"Referent\": \"Serine\", \"Canonical name\": \"(Ser, S)\"}",
                "{\"Referent\": \"Threonine\", \"Canonical name\": \"(Thr, T)\"}",
                "{\"Referent\": \"Tryptophan\", \"Canonical name\": \"(Trp, W)\"}",
                "{\"Referent\": \"Tyrosine\", \"Canonical name\": \"(Tyr, Y)\"}",
                "{\"Referent\": \"Valine\", \"Canonical name\": \"(Val, V)\"}"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    }
]