[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Methionine (Met)",
            "Glycine (Gly)",
            "Cysteine (Cys)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tyrosine (Tyr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Histidine (His)",
            "Lysine (Lys)",
            "Arginine (Arg)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Citrulline",
            "Homocysteine",
            "Norvaline",
            "Norleucine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Methionine (Met)",
                "Glycine (Gly)",
                "Cysteine (Cys)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tyrosine (Tyr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Histidine (His)",
                "Lysine (Lys)",
                "Arginine (Arg)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Citrulline",
                "Homocysteine",
                "Norvaline",
                "Norleucine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Citrulline",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Homocysteine",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Norleucine",
                "Norvaline",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala, A)",
            "Glycine (Gly, G)",
            "Isoleucine (Ile, I)",
            "Leucine (Leu, L)",
            "Methionine (Met, M)",
            "Phenylalanine (Phe, F)",
            "Proline (Pro, P)",
            "Valine (Val, V)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala, A)",
                "Glycine (Gly, G)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Proline (Pro, P)",
                "Valine (Val, V)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala, A)",
                "Glycine (Gly, G)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Proline (Pro, P)",
                "Valine (Val, V)"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Methionine (Met)",
            "Serine (Ser) - sometimes considered borderline, but often grouped due to small size",
            "Threonine (Thr) - similarly borderline to Serine",
            "Cysteine (Cys) - can be nonpolar depending on environment",
            "Tyrosine (Tyr) - can be nonpolar depending on environment",
            "Asparagine (Asn) - amide group reduces polarity",
            "Glutamine (Gln) - amide group reduces polarity",
            "Norleucine",
            "Norvaline",
            "Caproic acid amino acid analog",
            "\u03b1-Aminoisobutyric acid (Aib)",
            "Cycloleucine",
            "Allothreonine",
            "\u03b2-Alanine",
            "D-Alanine",
            "D-Leucine",
            "Selenocysteine (Sec) - although containing selenium, the side chain is relatively nonpolar"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Methionine (Met)",
                "Serine (Ser) - sometimes considered borderline, but often grouped due to small size",
                "Threonine (Thr) - similarly borderline to Serine",
                "Cysteine (Cys) - can be nonpolar depending on environment",
                "Tyrosine (Tyr) - can be nonpolar depending on environment",
                "Asparagine (Asn) - amide group reduces polarity",
                "Glutamine (Gln) - amide group reduces polarity",
                "Norleucine",
                "Norvaline",
                "Caproic acid amino acid analog",
                "\u03b1-Aminoisobutyric acid (Aib)",
                "Cycloleucine",
                "Allothreonine",
                "\u03b2-Alanine",
                "D-Alanine",
                "D-Leucine"
            ],
            "mismatches": [
                "Selenocysteine (Sec) - although containing selenium, the side chain is relatively nonpolar"
            ],
            "true_referents": [
                "Alanine (Ala)",
                "Allothreonine",
                "Asparagine (Asn) - amide group reduces polarity",
                "Caproic acid amino acid analog",
                "Cycloleucine",
                "Cysteine (Cys) - can be nonpolar depending on environment",
                "D-Alanine",
                "D-Leucine",
                "Glutamine (Gln) - amide group reduces polarity",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Norleucine",
                "Norvaline",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser) - sometimes considered borderline, but often grouped due to small size",
                "Threonine (Thr) - similarly borderline to Serine",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr) - can be nonpolar depending on environment",
                "Valine (Val)",
                "\u03b1-Aminoisobutyric acid (Aib)",
                "\u03b2-Alanine"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Methionine (Met)",
            "Proline (Pro)",
            "Glycine (Gly)",
            "Tyrosine (Tyr)",
            "Cysteine (Cys)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Aspartic Acid (Asp)",
            "Glutamic Acid (Glu)",
            "Histidine (His)",
            "Lysine (Lys)",
            "Arginine (Arg)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Methionine (Met)",
                "Proline (Pro)",
                "Glycine (Gly)",
                "Tyrosine (Tyr)",
                "Cysteine (Cys)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Aspartic Acid (Asp)",
                "Glutamic Acid (Glu)",
                "Histidine (His)",
                "Lysine (Lys)",
                "Arginine (Arg)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic Acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic Acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala, A)",
            "Valine (Val, V)",
            "Leucine (Leu, L)",
            "Isoleucine (Ile, I)",
            "Methionine (Met, M)",
            "Phenylalanine (Phe, F)",
            "Tryptophan (Trp, W)",
            "Proline (Pro, P)",
            "Glycine (Gly, G)",
            "Cysteine (Cys, C)",
            "Tyrosine (Tyr, Y)",
            "Serine (Ser, S)",
            "Threonine (Thr, T)",
            "Asparagine (Asn, N)",
            "Glutamine (Gln, Q)",
            "Aspartic acid (Asp, D)",
            "Glutamic acid (Glu, E)",
            "Lysine (Lys, K)",
            "Arginine (Arg, R)",
            "Histidine (His, H)",
            "Phenylglycine",
            "3,4-Dihydroxyphenylalanine (DOPA)",
            "3-Iodo-L-tyrosine",
            "3-Nitro-L-tyrosine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala, A)",
                "Valine (Val, V)",
                "Leucine (Leu, L)",
                "Isoleucine (Ile, I)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Tryptophan (Trp, W)",
                "Proline (Pro, P)",
                "Glycine (Gly, G)",
                "Cysteine (Cys, C)",
                "Tyrosine (Tyr, Y)",
                "Serine (Ser, S)",
                "Threonine (Thr, T)",
                "Asparagine (Asn, N)",
                "Glutamine (Gln, Q)",
                "Aspartic acid (Asp, D)",
                "Glutamic acid (Glu, E)",
                "Lysine (Lys, K)",
                "Arginine (Arg, R)",
                "Histidine (His, H)",
                "Phenylglycine",
                "3,4-Dihydroxyphenylalanine (DOPA)",
                "3-Iodo-L-tyrosine",
                "3-Nitro-L-tyrosine"
            ],
            "mismatches": [],
            "true_referents": [
                "3,4-Dihydroxyphenylalanine (DOPA)",
                "3-Iodo-L-tyrosine",
                "3-Nitro-L-tyrosine",
                "Alanine (Ala, A)",
                "Arginine (Arg, R)",
                "Asparagine (Asn, N)",
                "Aspartic acid (Asp, D)",
                "Cysteine (Cys, C)",
                "Glutamic acid (Glu, E)",
                "Glutamine (Gln, Q)",
                "Glycine (Gly, G)",
                "Histidine (His, H)",
                "Isoleucine (Ile, I)",
                "Leucine (Leu, L)",
                "Lysine (Lys, K)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Phenylglycine",
                "Proline (Pro, P)",
                "Serine (Ser, S)",
                "Threonine (Thr, T)",
                "Tryptophan (Trp, W)",
                "Tyrosine (Tyr, Y)",
                "Valine (Val, V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Methionine (Met)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Proline (Pro)",
            "Cysteine (Cys)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Tyrosine (Tyr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Histidine (His)",
            "Lysine (Lys)",
            "Arginine (Arg)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Proline (Pro)",
                "Cysteine (Cys)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tyrosine (Tyr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Histidine (His)",
                "Lysine (Lys)",
                "Arginine (Arg)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly, G)",
            "Alanine (Ala, A)",
            "Valine (Val, V)",
            "Leucine (Leu, L)",
            "Isoleucine (Ile, I)",
            "Methionine (Met, M)",
            "Phenylalanine (Phe, F)",
            "Tryptophan (Trp, W)",
            "Proline (Pro, P)",
            "Norvaline (Nva)",
            "Norleucine",
            "N-Methylalanine",
            "\u03b1-Aminoisobutyric Acid (\u03b1-AIB)",
            "O-Methyltyrosine",
            "p-Methylphenylalanine",
            "L-4-Fluorophenylalanine",
            "N-Isopropylglycine",
            "N,N-Dimethylalanine",
            "\u03b1-Methylvaline",
            "3-Methylalanine",
            "N-Methylleucine",
            "N-Methylvaline",
            "N-Methylmethionine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly, G)",
                "Alanine (Ala, A)",
                "Valine (Val, V)",
                "Leucine (Leu, L)",
                "Isoleucine (Ile, I)",
                "Methionine (Met, M)",
                "Phenylalanine (Phe, F)",
                "Tryptophan (Trp, W)",
                "Proline (Pro, P)",
                "Norvaline (Nva)",
                "Norleucine",
                "N-Methylalanine",
                "\u03b1-Aminoisobutyric Acid (\u03b1-AIB)",
                "O-Methyltyrosine",
                "p-Methylphenylalanine",
                "L-4-Fluorophenylalanine",
                "N-Isopropylglycine",
                "N,N-Dimethylalanine",
                "\u03b1-Methylvaline",
                "3-Methylalanine",
                "N-Methylleucine",
                "N-Methylvaline",
                "N-Methylmethionine"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylalanine",
                "Alanine (Ala, A)",
                "Glycine (Gly, G)",
                "Isoleucine (Ile, I)",
                "L-4-Fluorophenylalanine",
                "Leucine (Leu, L)",
                "Methionine (Met, M)",
                "N,N-Dimethylalanine",
                "N-Isopropylglycine",
                "N-Methylalanine",
                "N-Methylleucine",
                "N-Methylmethionine",
                "N-Methylvaline",
                "Norleucine",
                "Norvaline (Nva)",
                "O-Methyltyrosine",
                "Phenylalanine (Phe, F)",
                "Proline (Pro, P)",
                "Tryptophan (Trp, W)",
                "Valine (Val, V)",
                "p-Methylphenylalanine",
                "\u03b1-Aminoisobutyric Acid (\u03b1-AIB)",
                "\u03b1-Methylvaline"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Methionine (Met)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Glycine (Gly)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Methionine (Met)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Glycine (Gly)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Methionine (Met)",
            "Cysteine (Cys)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Methionine (Met)",
                "Cysteine (Cys)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Cysteine (Cys)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Glycine (Gly)",
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Methionine (Met)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycine (Gly)",
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Methionine (Met)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Tryptophan (Trp)",
                "Valine (Val)"
            ],
            "TP": 9,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Proline (Pro)",
            "Phenylalanine (Phe)",
            "Tryptophan (Trp)",
            "Tyrosine (Tyr)",
            "Cysteine (Cys)",
            "Methionine (Met)",
            "Glycine (Gly)",
            "Serine (Ser)",
            "Threonine (Thr)",
            "Asparagine (Asn)",
            "Glutamine (Gln)",
            "Aspartic acid (Asp)",
            "Glutamic acid (Glu)",
            "Lysine (Lys)",
            "Arginine (Arg)",
            "Histidine (His)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Proline (Pro)",
                "Phenylalanine (Phe)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Cysteine (Cys)",
                "Methionine (Met)",
                "Glycine (Gly)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Asparagine (Asn)",
                "Glutamine (Gln)",
                "Aspartic acid (Asp)",
                "Glutamic acid (Glu)",
                "Lysine (Lys)",
                "Arginine (Arg)",
                "Histidine (His)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Arginine (Arg)",
                "Asparagine (Asn)",
                "Aspartic acid (Asp)",
                "Cysteine (Cys)",
                "Glutamic acid (Glu)",
                "Glutamine (Gln)",
                "Glycine (Gly)",
                "Histidine (His)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Lysine (Lys)",
                "Methionine (Met)",
                "Phenylalanine (Phe)",
                "Proline (Pro)",
                "Serine (Ser)",
                "Threonine (Thr)",
                "Tryptophan (Trp)",
                "Tyrosine (Tyr)",
                "Valine (Val)"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala or A)",
            "Glycine (Gly or G)",
            "Proline (Pro or P)",
            "Valine (Val or V)",
            "Leucine (Leu or L)",
            "Isoleucine (Ile or I)",
            "Methionine (Met or M)",
            "Phenylalanine (Phe or F)",
            "Tyrosine (Tyr or Y)",
            "Tryptophan (Trp or W)",
            "Aspartic acid (Asp or D)",
            "Glutamic acid (Glu or E)",
            "Asparagine (Asn or N)",
            "Glutamine (Gln or Q)",
            "Serine (Ser or S)",
            "Threonine (Thr or T)",
            "Cysteine (Cys or C)",
            "Alanine (Ala or A)",
            "Proline (Pro or P)",
            "Valine (Val or V)",
            "Leucine (Leu or L)",
            "Isoleucine (Ile or I)",
            "Methionine (Met or M)",
            "Phenylalanine (Phe or F)",
            "Tyrosine (Tyr or Y)",
            "Tryptophan (Trp or W)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala or A)",
                "Glycine (Gly or G)",
                "Proline (Pro or P)",
                "Valine (Val or V)",
                "Leucine (Leu or L)",
                "Isoleucine (Ile or I)",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Tyrosine (Tyr or Y)",
                "Tryptophan (Trp or W)",
                "Aspartic acid (Asp or D)",
                "Glutamic acid (Glu or E)",
                "Asparagine (Asn or N)",
                "Glutamine (Gln or Q)",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Cysteine (Cys or C)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala or A)",
                "Asparagine (Asn or N)",
                "Aspartic acid (Asp or D)",
                "Cysteine (Cys or C)",
                "Glutamic acid (Glu or E)",
                "Glutamine (Gln or Q)",
                "Glycine (Gly or G)",
                "Isoleucine (Ile or I)",
                "Leucine (Leu or L)",
                "Methionine (Met or M)",
                "Phenylalanine (Phe or F)",
                "Proline (Pro or P)",
                "Serine (Ser or S)",
                "Threonine (Thr or T)",
                "Tryptophan (Trp or W)",
                "Tyrosine (Tyr or Y)",
                "Valine (Val or V)"
            ],
            "TP": 17,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine (Ala)",
            "Valine (Val)",
            "Leucine (Leu)",
            "Isoleucine (Ile)",
            "Glycine (Gly)",
            "Methionine (Met)",
            "Proline (Pro)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine (Ala)",
                "Valine (Val)",
                "Leucine (Leu)",
                "Isoleucine (Ile)",
                "Glycine (Gly)",
                "Methionine (Met)",
                "Proline (Pro)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine (Ala)",
                "Glycine (Gly)",
                "Isoleucine (Ile)",
                "Leucine (Leu)",
                "Methionine (Met)",
                "Proline (Pro)",
                "Valine (Val)"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "nonpolar amino acid",
        "domain": "chemistry",
        "response": [
            "Alanine",
            "Arginine",
            "Asparagine",
            "Aspartic acid",
            "Cysteine",
            "Glutamic acid",
            "Glutamine",
            "Glycine",
            "Histidine",
            "Isoleucine",
            "Leucine",
            "Lysine",
            "Methionine",
            "Phenylalanine",
            "Proline",
            "Serine",
            "Threonine",
            "Tryptophan",
            "Tyrosine",
            "Valine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alanine",
                "Arginine",
                "Asparagine",
                "Aspartic acid",
                "Cysteine",
                "Glutamic acid",
                "Glutamine",
                "Glycine",
                "Histidine",
                "Isoleucine",
                "Leucine",
                "Lysine",
                "Methionine",
                "Phenylalanine",
                "Proline",
                "Serine",
                "Threonine",
                "Tryptophan",
                "Tyrosine",
                "Valine"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine",
                "Arginine",
                "Asparagine",
                "Aspartic acid",
                "Cysteine",
                "Glutamic acid",
                "Glutamine",
                "Glycine",
                "Histidine",
                "Isoleucine",
                "Leucine",
                "Lysine",
                "Methionine",
                "Phenylalanine",
                "Proline",
                "Serine",
                "Threonine",
                "Tryptophan",
                "Tyrosine",
                "Valine"
            ],
            "TP": 20,
            "FP": 0,
            "FN": 0
        }
    }
]