[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Cysteine (Cys)",
            "Glutamine (Gln)",
            "Asparagine (Asn)",
            "Glycine (Gly)",
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Lysine (Lys)",
            "Leucine (Leu)",
            "Methionine (Met)",
            "Aspartic Acid (Asp)",
            "Phenylalanine (Phe)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Valine (Val)",
            "Arginine (Arg)",
            "Glutamic Acid (Glu)",
            "Cystine",
            "Homocysteine",
            "Homocysteine Cysteine",
            "Selenocysteine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Asparagine (Asn)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Lysine (Lys)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Aspartic Acid (Asp)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)",
                "Arginine (Arg)",
                "Glutamic Acid (Glu)",
                "Cystine",
                "Homocysteine",
                "Homocysteine Cysteine",
                "Selenocysteine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic Acid (Asp)",
                "Cysteine (Cys)",
                "Cystine",
                "Glutamic Acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Homocysteine",
                "Homocysteine Cysteine",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Selenocysteine",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala, A)",
            "Asparagine (Asn, N)",
            "Glutamine (Gln, Q)",
            "Serine (Ser, S)",
            "Threonine (Thr, T)",
            "Tyrosine (Tyr, Y)",
            "Cysteine (Cys, C)",
            "Glycine (Gly, G)",
            "Histidine (His, H)",
            "Methionine (Met, M)",
            "Proline (Pro, P)",
            "Valine (Val, V)",
            "Isoleucine (Ile, I)",
            "Leucine (Leu, L)",
            "Phenylalanine (Phe, F)",
            "Tryptophan (Trp, W)",
            "Aspartic acid (Asp, D)",
            "Glutamic acid (Glu, E)",
            "Lysine (Lys, K)",
            "Arginine (Arg, R)",
            "Glutamate (Glu, E)",
            "Aspartate (Asp, D)",
            "Argininosuccinate (Arg-Succ)",
            "Ornithine (Orn)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala, A)",
                "Asparagine (Asn, N)",
                "Glutamine (Gln, Q)",
                "Serine (Ser, S)",
                "Threonine (Thr, T)",
                "Tyrosine (Tyr, Y)",
                "Cysteine (Cys, C)",
                "Glycine (Gly, G)",
                "Histidine (His, H)",
                "Methionine (Met, M)",
                "Proline (Pro, P)",
                "Valine (Val, V)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Phenylalanine (Phe, F)",
                "Tryptophan (Trp, W)",
                "Aspartic acid (Asp, D)",
                "Glutamic acid (Glu, E)",
                "Lysine (Lys, K)",
                "Arginine (Arg, R)",
                "Glutamate (Glu, E)",
                "Aspartate (Asp, D)",
                "Argininosuccinate (Arg-Succ)",
                "Ornithine (Orn)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala, A)",
                "Arginine (Arg, R)",
                "Argininosuccinate (Arg-Succ)",
                "Asparagine (Asn, N)",
                "Aspartate (Asp, D)",
                "Aspartic acid (Asp, D)",
                "Cysteine (Cys, C)",
                "Glutamate (Glu, E)",
                "Glutamic acid (Glu, E)",
                "Glutamine (Gln, Q)",
                "Glycine (Gly, G)",
                "Histidine (His, H)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Lysine (Lys, K)",
                "Methionine (Met, M)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe, F)",
                "Proline (Pro, P)",
                "Serine (Ser, S)",
                "Threonine (Thr, T)",
                "Tryptophan (Trp, W)",
                "Tyrosine (Tyr, Y)",
                "Valine (Val, V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Tyrosine (Tyr)",
            "Lysine (Lys)",
            "Glutamic acid (Glu)",
            "Aspartic acid (Asp)",
            "Histidine (His)",
            "Arginine (Arg)",
            "Citrulline",
            "Selenocysteine (Sec)",
            "Pyrrolysine",
            "Ornithine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Tyrosine (Tyr)",
                "Lysine (Lys)",
                "Glutamic acid (Glu)",
                "Aspartic acid (Asp)",
                "Histidine (His)",
                "Arginine (Arg)",
                "Citrulline",
                "Selenocysteine (Sec)",
                "Pyrrolysine",
                "Ornithine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Citrulline",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Proline (Pro)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Methionine (Met)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Histidine (His) at pH 7.4",
            "Lysine (Lys) at pH 7.4",
            "Arginine (Arg) at pH 7.4",
            "Creatine",
            "Beta-Alanine",
            "Homoserine",
            "Norvaline",
            "Sarcosine (N-Methylglycine)",
            "Canavanine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Proline (Pro)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Methionine (Met)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Histidine (His) at pH 7.4",
                "Lysine (Lys) at pH 7.4",
                "Arginine (Arg) at pH 7.4",
                "Creatine",
                "Beta-Alanine",
                "Homoserine",
                "Norvaline",
                "Sarcosine (N-Methylglycine)",
                "Canavanine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg) at pH 7.4",
                "Asparagine (Asn)",
                "Beta-Alanine",
                "Canavanine",
                "Creatine",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His) at pH 7.4",
                "Homoserine",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys) at pH 7.4",
                "Methionine (Met)",
                "Norvaline",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Sarcosine (N-Methylglycine)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Tryptophan (Trp)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Methionine (Met)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Proline (Pro)",
            "Histidine (His)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Ornithine",
            "Citrulline",
            "Homoserine",
            "Hydroxyproline",
            "Sarcosine",
            "Taurine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Phenylalanine (Phe)",
                "Tyrosine (Tyr)",
                "Tryptophan (Trp)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Methionine (Met)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Proline (Pro)",
                "Histidine (His)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Ornithine",
                "Citrulline",
                "Homoserine",
                "Hydroxyproline",
                "Sarcosine",
                "Taurine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Citrulline",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Homoserine",
                "Hydroxyproline",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Sarcosine",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Taurine",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Methionine (Met)",
            "Proline (Pro)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Tryptophan (Trp)",
            "Histidine (His)",
            "Arginine (Arg)",
            "Lysine (Lys)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Ornithine (Orn)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Dihydroxyphenylalanine (DOPA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Methionine (Met)",
                "Proline (Pro)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Phenylalanine (Phe)",
                "Tyrosine (Tyr)",
                "Tryptophan (Trp)",
                "Histidine (His)",
                "Arginine (Arg)",
                "Lysine (Lys)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Ornithine (Orn)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Dihydroxyphenylalanine (DOPA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Dihydroxyphenylalanine (DOPA)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Cysteine (Cys)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Valine (Val)",
            "Arginine (Arg)",
            "Histidine (His)",
            "Lysine (Lys)",
            "Aspartic Acid (Asp)",
            "Glutamic Acid (Glu)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Ornithine (Orn)",
            "Citrulline (Cit)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Cysteine (Cys)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)",
                "Arginine (Arg)",
                "Histidine (His)",
                "Lysine (Lys)",
                "Aspartic Acid (Asp)",
                "Glutamic Acid (Glu)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Ornithine (Orn)",
                "Citrulline (Cit)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic Acid (Asp)",
                "Citrulline (Cit)",
                "Cysteine (Cys)",
                "Glutamic Acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Glycine (Gly)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Methionine (Met)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Tryptophan (Trp)",
            "Histidine (His)",
            "Lysine (Lys)",
            "Arginine (Arg)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Cysteine (Cys)",
            "Serine (Ser)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Methionine (Met)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tyrosine (Tyr)",
                "Tryptophan (Trp)",
                "Histidine (His)",
                "Lysine (Lys)",
                "Arginine (Arg)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Asparagine (Asn)",
            "Cysteine (Cys)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Valine (Val)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Hydroxyproline (Hyp)",
            "Ornithine",
            "Citrocytosine",
            "Homocysteine",
            "Lanthionine",
            "Sarcosine",
            "Trans-4-hydroxy-L-proline (Hyp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Hydroxyproline (Hyp)",
                "Ornithine",
                "Citrocytosine",
                "Homocysteine",
                "Lanthionine",
                "Sarcosine",
                "Trans-4-hydroxy-L-proline (Hyp)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Citrocytosine",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Homocysteine",
                "Hydroxyproline (Hyp)",
                "Isoleucine (Ile)",
                "Lanthionine",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Sarcosine",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Trans-4-hydroxy-L-proline (Hyp)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Glycine (Gly)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Proline (Pro)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Glycine (Gly)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Proline (Pro)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 15,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tyrosine (Tyr)",
            "Tryptophan (Trp)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Cysteine (Cys)",
            "Methionine (Met)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Selenocysteine (Sec)",
            "Pyrrolysine (Pyl)",
            "Homocysteine",
            "Citrulline",
            "Ornithine",
            "\u03b2-Alanine",
            "\u03b3-Aminobutyric acid (GABA)",
            "Taurine",
            "Hydroxyproline (Hyp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tyrosine (Tyr)",
                "Tryptophan (Trp)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Cysteine (Cys)",
                "Methionine (Met)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Selenocysteine (Sec)",
                "Pyrrolysine (Pyl)",
                "Homocysteine",
                "Citrulline",
                "Ornithine",
                "\u03b2-Alanine",
                "\u03b3-Aminobutyric acid (GABA)",
                "Taurine",
                "Hydroxyproline (Hyp)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Citrulline",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Homocysteine",
                "Hydroxyproline (Hyp)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Pyrrolysine (Pyl)",
                "Selenocysteine (Sec)",
                "Serine (Ser)",
                "Taurine",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)",
                "\u03b2-Alanine",
                "\u03b3-Aminobutyric acid (GABA)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Tryptophan (Trp)",
            "Phenylalanine (Phe)",
            "Methionine (Met)",
            "Cysteine (Cys)",
            "Tyrosine (Tyr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Histidine (His)",
            "5-Hydroxylysine",
            "3-Methylhistidine",
            "Norleucine",
            "Norvaline",
            "Homoserine",
            "Homocysteine",
            "Citrulline",
            "Ornithine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Phenylalanine (Phe)",
                "Methionine (Met)",
                "Cysteine (Cys)",
                "Tyrosine (Tyr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Histidine (His)",
                "5-Hydroxylysine",
                "3-Methylhistidine",
                "Norleucine",
                "Norvaline",
                "Homoserine",
                "Homocysteine",
                "Citrulline",
                "Ornithine"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylhistidine",
                "5-Hydroxylysine",
                "Alanine (Ala)",
                "Asparagine (Asn)",
                "Citrulline",
                "Cysteine (Cys)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Homocysteine",
                "Homoserine",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Norleucine",
                "Norvaline",
                "Ornithine",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Arginine (Arg)",
            "Asparagine (Asn)",
            "Aspartic acid (Asp)",
            "Cysteine (Cys)",
            "Glutamic acid (Glu)",
            "Glutamine (Gln)",
            "Glycine (Gly)",
            "Histidine (His)",
            "Isoleucine (Ile)",
            "Leucine (Leu)",
            "Lysine (Lys)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Proline (Pro)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Valine (Val)",
            "Ornithine (Orn)",
            "Citrulline (Cit)",
            "Homocysteine (Hcy)",
            "Homoserine (Hse)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)",
                "Ornithine (Orn)",
                "Citrulline (Cit)",
                "Homocysteine (Hcy)",
                "Homoserine (Hse)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Citrulline (Cit)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Homocysteine (Hcy)",
                "Homoserine (Hse)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Ornithine (Orn)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala or A)",
            "Arginine (Arg or R)",
            "Asparagine (Asn or N)",
            "Aspartic acid (Asp or D)",
            "Cysteine (Cys or C)",
            "Glutamic acid (Glu or E)",
            "Glutamine (Gln or Q)",
            "Glycine (Gly or G)",
            "Histidine (His or H)",
            "Isoleucine (Ile or I)",
            "Leucine (Leu or L)",
            "Lysine (Lys or K)",
            "Methionine (Met or M)",
            "Phenylalanine (Phe or F)",
            "Proline (Pro or P)",
            "Serine (Ser or S)",
            "Threonine (Thr or T)",
            "Tryptophan (Trp or W)",
            "Tyrosine (Tyr or Y)",
            "Valine (Val or V)",
            "Asparagine (Asn or N) in proteins",
            "Glycine (Gly or G) in collagen",
            "Lysine (Lys or K) in keratin",
            "Serine (Ser or S) in casein",
            "Tyrosine (Tyr or Y) in dopamine",
            "Alanine (Ala or A) in hemoglobin",
            "Glycine (Gly or G) in DNA",
            "Histidine (His or H) in histamine",
            "Lysine (Lys or K) in lysozyme"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala or A)",
                "Arginine (Arg or R)",
                "Asparagine (Asn or N)",
                "Aspartic acid (Asp or D)",
                "Cysteine (Cys or C)",
                "Glutamic acid (Glu or E)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Histidine (His or H)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Lysine (Lys or K)",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Tryptophan (Trp or W)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)",
                "Asparagine (Asn or N) in proteins",
                "Glycine (Gly or G) in collagen",
                "Lysine (Lys or K) in keratin",
                "Serine (Ser or S) in casein"
            ],
            "mismatches": [
                "Tyrosine (Tyr or Y) in dopamine",
                "Alanine (Ala or A) in hemoglobin",
                "Glycine (Gly or G) in DNA",
                "Histidine (His or H) in histamine",
                "Lysine (Lys or K) in lysozyme"
            ],
            "true_referents": [
                "Alanine (Ala or A)",
                "Arginine (Arg or R)",
                "Asparagine (Asn or N)",
                "Asparagine (Asn or N) in proteins",
                "Aspartic acid (Asp or D)",
                "Cysteine (Cys or C)",
                "Glutamic acid (Glu or E)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Glycine (Gly or G) in collagen",
                "Histidine (His or H)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Lysine (Lys or K)",
                "Lysine (Lys or K) in keratin",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Serine (Ser or S)",
                "Serine (Ser or S) in casein",
                "Threonine (Thr or T)",
                "Tryptophan (Trp or W)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)"
            ],
            "TP": 24,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala or A)",
            "Serine (Ser or S)",
            "Threonine (Thr or T)",
            "Cysteine (Cys or C)",
            "Asparagine (Asn or N)",
            "Glutamine (Gln or Q)",
            "Glycine (Gly or G)",
            "Tyrosine (Tyr or Y)",
            "Valine (Val or V)",
            "Isoleucine (Ile or I)",
            "Leucine (Leu or L)",
            "Methionine (Met or M)",
            "Proline (Pro or P)",
            "Phenylalanine (Phe or F)",
            "Aspartic acid (Asp or D) at pH 7.4",
            "Glutamic acid (Glu or E) at pH 7.4",
            "Tyrosine (Tyr or Y) at pH 7.4",
            "Citrulline",
            "Alpha-aminobutyric acid (AABA)",
            "Alpha-aminoisobutyric acid (AIB)",
            "2-Aminobutyric acid (2AB)",
            "Beta-alanine",
            "Gamma-aminobutyric acid (GABA)",
            "Taurine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala or A)",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Cysteine (Cys or C)",
                "Asparagine (Asn or N)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Methionine (Met or M)",
                "Proline (Pro or P)",
                "Phenylalanine (Phe or F)",
                "Aspartic acid (Asp or D) at pH 7.4",
                "Glutamic acid (Glu or E) at pH 7.4",
                "Tyrosine (Tyr or Y) at pH 7.4",
                "Citrulline",
                "Alpha-aminobutyric acid (AABA)",
                "Alpha-aminoisobutyric acid (AIB)",
                "2-Aminobutyric acid (2AB)",
                "Beta-alanine",
                "Gamma-aminobutyric acid (GABA)",
                "Taurine"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Aminobutyric acid (2AB)",
                "Alanine (Ala or A)",
                "Alpha-aminobutyric acid (AABA)",
                "Alpha-aminoisobutyric acid (AIB)",
                "Asparagine (Asn or N)",
                "Aspartic acid (Asp or D) at pH 7.4",
                "Beta-alanine",
                "Citrulline",
                "Cysteine (Cys or C)",
                "Gamma-aminobutyric acid (GABA)",
                "Glutamic acid (Glu or E) at pH 7.4",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Serine (Ser or S)",
                "Taurine",
                "Threonine (Thr or T)",
                "Tyrosine (Tyr or Y)",
                "Tyrosine (Tyr or Y) at pH 7.4",
                "Valine (Val or V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "neutral amino acid at physiological pH",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Alanine",
                "Canonical Name": "Ala"
            },
            {
                "Referent": "Arginine",
                "Canonical Name": "Arg"
            },
            {
                "Referent": "Asparagine",
                "Canonical Name": "Asn"
            },
            {
                "Referent": "Aspartic acid",
                "Canonical Name": "Asp"
            },
            {
                "Referent": "Cysteine",
                "Canonical Name": "Cys"
            },
            {
                "Referent": "Glutamic acid",
                "Canonical Name": "Glu"
            },
            {
                "Referent": "Glutamine",
                "Canonical Name": "Gln"
            },
            {
                "Referent": "Glycine",
                "Canonical Name": "Gly"
            },
            {
                "Referent": "Histidine",
                "Canonical Name": "His"
            },
            {
                "Referent": "Isoleucine",
                "Canonical Name": "Ile"
            },
            {
                "Referent": "Leucine",
                "Canonical Name": "Leu"
            },
            {
                "Referent": "Lysine",
                "Canonical Name": "Lys"
            },
            {
                "Referent": "Methionine",
                "Canonical Name": "Met"
            },
            {
                "Referent": "Methionine sulfoxide",
                "Canonical Name": "MetO"
            },
            {
                "Referent": "Phenylalanine",
                "Canonical Name": "Phe"
            },
            {
                "Referent": "Proline",
                "Canonical Name": "Pro"
            },
            {
                "Referent": "Serine",
                "Canonical Name": "Ser"
            },
            {
                "Referent": "Threonine",
                "Canonical Name": "Thr"
            },
            {
                "Referent": "Tryptophan",
                "Canonical Name": "Trp"
            },
            {
                "Referent": "Tyrosine",
                "Canonical Name": "Tyr"
            },
            {
                "Referent": "Valine",
                "Canonical Name": "Val"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine",
                "Arginine",
                "Asparagine",
                "Aspartic acid",
                "Cysteine",
                "Glutamic acid",
                "Glutamine",
                "Glycine",
                "Histidine",
                "Isoleucine",
                "Leucine",
                "Lysine",
                "Methionine",
                "Methionine sulfoxide",
                "Phenylalanine",
                "Proline",
                "Serine",
                "Threonine",
                "Tryptophan",
                "Tyrosine",
                "Valine"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"Alanine\", \"Canonical Name\": \"Ala\"}",
                "{\"Referent\": \"Arginine\", \"Canonical Name\": \"Arg\"}",
                "{\"Referent\": \"Asparagine\", \"Canonical Name\": \"Asn\"}",
                "{\"Referent\": \"Aspartic acid\", \"Canonical Name\": \"Asp\"}",
                "{\"Referent\": \"Cysteine\", \"Canonical Name\": \"Cys\"}",
                "{\"Referent\": \"Glutamic acid\", \"Canonical Name\": \"Glu\"}",
                "{\"Referent\": \"Glutamine\", \"Canonical Name\": \"Gln\"}",
                "{\"Referent\": \"Glycine\", \"Canonical Name\": \"Gly\"}",
                "{\"Referent\": \"Histidine\", \"Canonical Name\": \"His\"}",
                "{\"Referent\": \"Isoleucine\", \"Canonical Name\": \"Ile\"}",
                "{\"Referent\": \"Leucine\", \"Canonical Name\": \"Leu\"}",
                "{\"Referent\": \"Lysine\", \"Canonical Name\": \"Lys\"}",
                "{\"Referent\": \"Methionine sulfoxide\", \"Canonical Name\": \"MetO\"}",
                "{\"Referent\": \"Methionine\", \"Canonical Name\": \"Met\"}",
                "{\"Referent\": \"Phenylalanine\", \"Canonical Name\": \"Phe\"}",
                "{\"Referent\": \"Proline\", \"Canonical Name\": \"Pro\"}",
                "{\"Referent\": \"Serine\", \"Canonical Name\": \"Ser\"}",
                "{\"Referent\": \"Threonine\", \"Canonical Name\": \"Thr\"}",
                "{\"Referent\": \"Tryptophan\", \"Canonical Name\": \"Trp\"}",
                "{\"Referent\": \"Tyrosine\", \"Canonical Name\": \"Tyr\"}",
                "{\"Referent\": \"Valine\", \"Canonical Name\": \"Val\"}"
            ],
            "TP": 21,
            "FP": 0,
            "FN": 0
        }
    }
]