[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "MT-ATP6 (ATP6)",
            "MT-ATP8 (ATP8)",
            "MT-CO1 (CO1)",
            "MT-CO2 (CO2)",
            "MT-CO3 (CO3)",
            "MT-ND1 (ND1)",
            "MT-ND2 (ND2)",
            "MT-ND3 (ND3)",
            "MT-ND4 (ND4)",
            "MT-ND4L (ND4L)",
            "MT-ND5 (ND5)",
            "MT-ND6 (ND6)",
            "MT-CYB (CYB)",
            "MT-ATP4 (ATP4)",
            "MT-ATP5M (ATP5M)",
            "MT-ATP5L (ATP5L)",
            "MT-ATP5O (ATP5O)",
            "MT-ATP5P (ATP5P)",
            "MT-ATP5H (ATP5H)",
            "MT-ATP5G (ATP5G)",
            "MT-ATP5F1A (ATP5F1A)",
            "MT-ATP5F1B (ATP5F1B)",
            "MT-ATP5F1C (ATP5F1C)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "MT-ATP6 (ATP6)",
                "MT-ATP8 (ATP8)",
                "MT-CO1 (CO1)",
                "MT-CO2 (CO2)",
                "MT-CO3 (CO3)",
                "MT-ND1 (ND1)",
                "MT-ND2 (ND2)",
                "MT-ND3 (ND3)",
                "MT-ND4 (ND4)",
                "MT-ND4L (ND4L)",
                "MT-ND5 (ND5)",
                "MT-ND6 (ND6)",
                "MT-CYB (CYB)",
                "MT-ATP4 (ATP4)",
                "MT-ATP5M (ATP5M)",
                "MT-ATP5L (ATP5L)",
                "MT-ATP5O (ATP5O)",
                "MT-ATP5P (ATP5P)",
                "MT-ATP5H (ATP5H)",
                "MT-ATP5G (ATP5G)",
                "MT-ATP5F1A (ATP5F1A)",
                "MT-ATP5F1B (ATP5F1B)",
                "MT-ATP5F1C (ATP5F1C)"
            ],
            "mismatches": [],
            "true_referents": [
                "MT-ATP4 (ATP4)",
                "MT-ATP5F1A (ATP5F1A)",
                "MT-ATP5F1B (ATP5F1B)",
                "MT-ATP5F1C (ATP5F1C)",
                "MT-ATP5G (ATP5G)",
                "MT-ATP5H (ATP5H)",
                "MT-ATP5L (ATP5L)",
                "MT-ATP5M (ATP5M)",
                "MT-ATP5O (ATP5O)",
                "MT-ATP5P (ATP5P)",
                "MT-ATP6 (ATP6)",
                "MT-ATP8 (ATP8)",
                "MT-CO1 (CO1)",
                "MT-CO2 (CO2)",
                "MT-CO3 (CO3)",
                "MT-CYB (CYB)",
                "MT-ND1 (ND1)",
                "MT-ND2 (ND2)",
                "MT-ND3 (ND3)",
                "MT-ND4 (ND4)",
                "MT-ND4L (ND4L)",
                "MT-ND5 (ND5)",
                "MT-ND6 (ND6)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ND1 (MT-ND1)",
            "ND2 (MT-ND2)",
            "ND3 (MT-ND3)",
            "ND4 (MT-ND4)",
            "ND4L (MT-ND4L)",
            "ND5 (MT-ND5)",
            "ND6 (MT-ND6)",
            "CYTB (MT-CYB)",
            "ATP6 (MT-ATP6)",
            "ATP8 (MT-ATP8)",
            "COX1 (MT-CO1)",
            "COX2 (MT-CO2)",
            "COX3 (MT-CO3)",
            "CYTb (MT-CYB)",
            "ND1 (MT-ND1)",
            "ND2 (MT-ND2)",
            "ND3 (MT-ND3)",
            "ND4 (MT-ND4)",
            "ND4L (MT-ND4L)",
            "ND5 (MT-ND5)",
            "ND6 (MT-ND6)",
            "ATPase 6 (MT-ATP6)",
            "ATPase 8 (MT-ATP8)",
            "Cytochrome c oxidase subunit 1 (MT-CO1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)",
                "CYTB (MT-CYB)",
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "CYTb (MT-CYB)",
                "ATPase 6 (MT-ATP6)",
                "ATPase 8 (MT-ATP8)",
                "Cytochrome c oxidase subunit 1 (MT-CO1)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "ATPase 6 (MT-ATP6)",
                "ATPase 8 (MT-ATP8)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "CYTB (MT-CYB)",
                "CYTb (MT-CYB)",
                "Cytochrome c oxidase subunit 1 (MT-CO1)",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)"
            ],
            "TP": 17,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ATP6 (MT-ATP6)",
            "ATP8 (MT-ATP8)",
            "COX1 (MT-CO1)",
            "COX2 (MT-CO2)",
            "COX3 (MT-CO3)",
            "CYTB (MT-CYB)",
            "ND1 (MT-ND1)",
            "ND2 (MT-ND2)",
            "ND3 (MT-ND3)",
            "ND4 (MT-ND4)",
            "ND4L (MT-ND4L)",
            "ND5 (MT-ND5)",
            "ND6 (MT-ND6)",
            "MT-TP",
            "MT-RNR1",
            "MT-RNR2",
            "MT-TA",
            "MT-TC",
            "MT-TD",
            "MT-TE",
            "MT-TF",
            "MT-TG",
            "MT-TH",
            "MT-TI",
            "MT-TK",
            "MT-TL1",
            "MT-TL2",
            "MT-TM",
            "MT-TN",
            "MT-TP",
            "MT-TQ",
            "MT-TR",
            "MT-TS1",
            "MT-TS2",
            "MT-TT",
            "MT-TV",
            "MT-TW",
            "MT-TY"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "CYTB (MT-CYB)",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)",
                "MT-TP",
                "MT-RNR1",
                "MT-RNR2",
                "MT-TA",
                "MT-TC",
                "MT-TD",
                "MT-TE",
                "MT-TF",
                "MT-TG",
                "MT-TH",
                "MT-TI"
            ],
            "mismatches": [
                "MT-TK",
                "MT-TL1",
                "MT-TL2",
                "MT-TM",
                "MT-TN",
                "MT-TQ",
                "MT-TR",
                "MT-TS1",
                "MT-TS2",
                "MT-TT",
                "MT-TV",
                "MT-TW",
                "MT-TY"
            ],
            "true_referents": [
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "CYTB (MT-CYB)",
                "MT-RNR1",
                "MT-RNR2",
                "MT-TA",
                "MT-TC",
                "MT-TD",
                "MT-TE",
                "MT-TF",
                "MT-TG",
                "MT-TH",
                "MT-TI",
                "MT-TP",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)"
            ],
            "TP": 24,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ND1 (MT-ND1)",
            "ND2 (MT-ND2)",
            "ND3 (MT-ND3)",
            "ND4 (MT-ND4)",
            "ND4L (MT-ND4L)",
            "ND5 (MT-ND5)",
            "ND6 (MT-ND6)",
            "CYTB (MT-CO1)",
            "COII (MT-CO2)",
            "COIII (MT-CO3)",
            "ATP6 (MT-ATP6)",
            "ATP8 (MT-ATP8)",
            "ATP9 (MT-ATP9)",
            "Complex I Subunit N1 (MT-ND1)",
            "Complex I Subunit N2 (MT-ND2)",
            "Complex I Subunit N3 (MT-ND3)",
            "Complex I Subunit N4 (MT-ND4)",
            "Complex I Subunit N4L (MT-ND4L)",
            "Complex I Subunit N5 (MT-ND5)",
            "Complex I Subunit N6 (MT-ND6)",
            "Cytochrome b (MT-CYB)",
            "Cytochrome c Oxidase Subunit II (MT-CO2)",
            "Cytochrome c Oxidase Subunit III (MT-CO3)",
            "ATP Synthase Subunit 6 (MT-ATP6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)",
                "CYTB (MT-CO1)",
                "COII (MT-CO2)",
                "COIII (MT-CO3)",
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "ATP9 (MT-ATP9)",
                "Complex I Subunit N1 (MT-ND1)",
                "Complex I Subunit N2 (MT-ND2)",
                "Complex I Subunit N3 (MT-ND3)",
                "Complex I Subunit N4 (MT-ND4)",
                "Complex I Subunit N4L (MT-ND4L)",
                "Complex I Subunit N5 (MT-ND5)",
                "Complex I Subunit N6 (MT-ND6)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c Oxidase Subunit II (MT-CO2)",
                "Cytochrome c Oxidase Subunit III (MT-CO3)",
                "ATP Synthase Subunit 6 (MT-ATP6)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP Synthase Subunit 6 (MT-ATP6)",
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "ATP9 (MT-ATP9)",
                "COII (MT-CO2)",
                "COIII (MT-CO3)",
                "CYTB (MT-CO1)",
                "Complex I Subunit N1 (MT-ND1)",
                "Complex I Subunit N2 (MT-ND2)",
                "Complex I Subunit N3 (MT-ND3)",
                "Complex I Subunit N4 (MT-ND4)",
                "Complex I Subunit N4L (MT-ND4L)",
                "Complex I Subunit N5 (MT-ND5)",
                "Complex I Subunit N6 (MT-ND6)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c Oxidase Subunit II (MT-CO2)",
                "Cytochrome c Oxidase Subunit III (MT-CO3)",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH-ubiquinone oxidoreductase chain 1 (MT-ND1)",
            "NADH-ubiquinone oxidoreductase chain 2 (MT-ND2)",
            "NADH-ubiquinone oxidoreductase chain 3 (MT-ND3)",
            "NADH-ubiquinone oxidoreductase chain 4 (MT-ND4)",
            "NADH-ubiquinone oxidoreductase chain 4L (MT-ND4L)",
            "NADH-ubiquinone oxidoreductase chain 5 (MT-ND5)",
            "NADH-ubiquinone oxidoreductase chain 6 (MT-ND6)",
            "Cytochrome c oxidase subunit I (MT-CO1)",
            "Cytochrome c oxidase subunit II (MT-CO2)",
            "Cytochrome c oxidase subunit III (MT-CO3)",
            "Cytochrome b (MT-CYB)",
            "ATP synthase membrane subunit 6 (MT-ATP6)",
            "ATP synthase membrane subunit 8 (MT-ATP8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH-ubiquinone oxidoreductase chain 1 (MT-ND1)",
                "NADH-ubiquinone oxidoreductase chain 2 (MT-ND2)",
                "NADH-ubiquinone oxidoreductase chain 3 (MT-ND3)",
                "NADH-ubiquinone oxidoreductase chain 4 (MT-ND4)",
                "NADH-ubiquinone oxidoreductase chain 4L (MT-ND4L)",
                "NADH-ubiquinone oxidoreductase chain 5 (MT-ND5)",
                "NADH-ubiquinone oxidoreductase chain 6 (MT-ND6)",
                "Cytochrome c oxidase subunit I (MT-CO1)",
                "Cytochrome c oxidase subunit II (MT-CO2)",
                "Cytochrome c oxidase subunit III (MT-CO3)",
                "Cytochrome b (MT-CYB)",
                "ATP synthase membrane subunit 6 (MT-ATP6)",
                "ATP synthase membrane subunit 8 (MT-ATP8)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase membrane subunit 6 (MT-ATP6)",
                "ATP synthase membrane subunit 8 (MT-ATP8)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c oxidase subunit I (MT-CO1)",
                "Cytochrome c oxidase subunit II (MT-CO2)",
                "Cytochrome c oxidase subunit III (MT-CO3)",
                "NADH-ubiquinone oxidoreductase chain 1 (MT-ND1)",
                "NADH-ubiquinone oxidoreductase chain 2 (MT-ND2)",
                "NADH-ubiquinone oxidoreductase chain 3 (MT-ND3)",
                "NADH-ubiquinone oxidoreductase chain 4 (MT-ND4)",
                "NADH-ubiquinone oxidoreductase chain 4L (MT-ND4L)",
                "NADH-ubiquinone oxidoreductase chain 5 (MT-ND5)",
                "NADH-ubiquinone oxidoreductase chain 6 (MT-ND6)"
            ],
            "TP": 13,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "MT-ATP6 (MT-ATP6)",
            "MT-ATP8 (MT-ATP8)",
            "MT-CO1 (MT-CO1)",
            "MT-CO2 (MT-CO2)",
            "MT-CO3 (MT-CO3)",
            "MT-CYB (MT-CYB)",
            "MT-ND1 (MT-ND1)",
            "MT-ND2 (MT-ND2)",
            "MT-ND3 (MT-ND3)",
            "MT-ND4 (MT-ND4)",
            "MT-ND4L (MT-ND4L)",
            "MT-ND5 (MT-ND5)",
            "MT-ND6 (MT-ND6)",
            "MT-RNR1 (MT-RNR1)",
            "MT-RNR2 (MT-RNR2)",
            "MT-TL1 (MT-TL1)",
            "MT-TL2 (MT-TL2)",
            "MT-TH (MT-TH)",
            "MT-TY (MT-TY)",
            "MT-TF (MT-TF)",
            "MT-TS1 (MT-TS1)",
            "MT-TS2 (MT-TS2)",
            "MT-TQ (MT-TQ)",
            "MT-TG (MT-TG)",
            "MT-TC (MT-TC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "MT-ATP6 (MT-ATP6)",
                "MT-ATP8 (MT-ATP8)",
                "MT-CO1 (MT-CO1)",
                "MT-CO2 (MT-CO2)",
                "MT-CO3 (MT-CO3)",
                "MT-CYB (MT-CYB)",
                "MT-ND1 (MT-ND1)",
                "MT-ND2 (MT-ND2)",
                "MT-ND3 (MT-ND3)",
                "MT-ND4 (MT-ND4)",
                "MT-ND4L (MT-ND4L)",
                "MT-ND5 (MT-ND5)",
                "MT-ND6 (MT-ND6)",
                "MT-RNR1 (MT-RNR1)",
                "MT-RNR2 (MT-RNR2)",
                "MT-TL1 (MT-TL1)",
                "MT-TL2 (MT-TL2)",
                "MT-TH (MT-TH)",
                "MT-TY (MT-TY)",
                "MT-TF (MT-TF)",
                "MT-TS1 (MT-TS1)",
                "MT-TS2 (MT-TS2)",
                "MT-TQ (MT-TQ)",
                "MT-TG (MT-TG)"
            ],
            "mismatches": [
                "MT-TC (MT-TC)"
            ],
            "true_referents": [
                "MT-ATP6 (MT-ATP6)",
                "MT-ATP8 (MT-ATP8)",
                "MT-CO1 (MT-CO1)",
                "MT-CO2 (MT-CO2)",
                "MT-CO3 (MT-CO3)",
                "MT-CYB (MT-CYB)",
                "MT-ND1 (MT-ND1)",
                "MT-ND2 (MT-ND2)",
                "MT-ND3 (MT-ND3)",
                "MT-ND4 (MT-ND4)",
                "MT-ND4L (MT-ND4L)",
                "MT-ND5 (MT-ND5)",
                "MT-ND6 (MT-ND6)",
                "MT-RNR1 (MT-RNR1)",
                "MT-RNR2 (MT-RNR2)",
                "MT-TF (MT-TF)",
                "MT-TG (MT-TG)",
                "MT-TH (MT-TH)",
                "MT-TL1 (MT-TL1)",
                "MT-TL2 (MT-TL2)",
                "MT-TQ (MT-TQ)",
                "MT-TS1 (MT-TS1)",
                "MT-TS2 (MT-TS2)",
                "MT-TY (MT-TY)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ATP synthase F0 subunit 6 (MT-ATP6)",
            "ATP synthase F0 subunit 8 (MT-ATP8)",
            "Cytochrome c oxidase I (MT-CO1)",
            "Cytochrome c oxidase II (MT-CO2)",
            "Cytochrome c oxidase III (MT-CO3)",
            "Cytochrome b (MT-CYB)",
            "NADH dehydrogenase 1 (MT-ND1)",
            "NADH dehydrogenase 2 (MT-ND2)",
            "NADH dehydrogenase 3 (MT-ND3)",
            "NADH dehydrogenase 4 (MT-ND4)",
            "NADH dehydrogenase 4L (MT-ND4L)",
            "NADH dehydrogenase 5 (MT-ND5)",
            "NADH dehydrogenase 6 (MT-ND6)",
            "Ribosomal protein S12 (MT-RPS12)",
            "Ribosomal protein S7 (MT-RPS7)",
            "Ribosomal RNA 12S (MT-RNR1)",
            "Ribosomal RNA 16S (MT-RNR2)",
            "Transfer RNA leucine 1 (MT-TL1)",
            "Transfer RNA leucine 2 (MT-TL2)",
            "Transfer RNA lysine (MT-TK)",
            "Transfer RNA methionine (MT-TM)",
            "Transfer RNA proline (MT-TP)",
            "Transfer RNA serine 1 (MT-TS1)",
            "Transfer RNA serine 2 (MT-TS2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "ATP synthase F0 subunit 8 (MT-ATP8)",
                "Cytochrome c oxidase I (MT-CO1)",
                "Cytochrome c oxidase II (MT-CO2)",
                "Cytochrome c oxidase III (MT-CO3)",
                "Cytochrome b (MT-CYB)",
                "NADH dehydrogenase 1 (MT-ND1)",
                "NADH dehydrogenase 2 (MT-ND2)",
                "NADH dehydrogenase 3 (MT-ND3)",
                "NADH dehydrogenase 4 (MT-ND4)",
                "NADH dehydrogenase 4L (MT-ND4L)",
                "NADH dehydrogenase 5 (MT-ND5)",
                "NADH dehydrogenase 6 (MT-ND6)",
                "Ribosomal protein S12 (MT-RPS12)",
                "Ribosomal protein S7 (MT-RPS7)",
                "Ribosomal RNA 12S (MT-RNR1)",
                "Ribosomal RNA 16S (MT-RNR2)",
                "Transfer RNA leucine 1 (MT-TL1)",
                "Transfer RNA leucine 2 (MT-TL2)",
                "Transfer RNA lysine (MT-TK)",
                "Transfer RNA methionine (MT-TM)",
                "Transfer RNA proline (MT-TP)",
                "Transfer RNA serine 1 (MT-TS1)",
                "Transfer RNA serine 2 (MT-TS2)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "ATP synthase F0 subunit 8 (MT-ATP8)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c oxidase I (MT-CO1)",
                "Cytochrome c oxidase II (MT-CO2)",
                "Cytochrome c oxidase III (MT-CO3)",
                "NADH dehydrogenase 1 (MT-ND1)",
                "NADH dehydrogenase 2 (MT-ND2)",
                "NADH dehydrogenase 3 (MT-ND3)",
                "NADH dehydrogenase 4 (MT-ND4)",
                "NADH dehydrogenase 4L (MT-ND4L)",
                "NADH dehydrogenase 5 (MT-ND5)",
                "NADH dehydrogenase 6 (MT-ND6)",
                "Ribosomal RNA 12S (MT-RNR1)",
                "Ribosomal RNA 16S (MT-RNR2)",
                "Ribosomal protein S12 (MT-RPS12)",
                "Ribosomal protein S7 (MT-RPS7)",
                "Transfer RNA leucine 1 (MT-TL1)",
                "Transfer RNA leucine 2 (MT-TL2)",
                "Transfer RNA lysine (MT-TK)",
                "Transfer RNA methionine (MT-TM)",
                "Transfer RNA proline (MT-TP)",
                "Transfer RNA serine 1 (MT-TS1)",
                "Transfer RNA serine 2 (MT-TS2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ATP synthase F0 subunit 6 (MT-ATP6)",
            "Cytochrome c oxidase subunit I (MT-CO1)",
            "Cytochrome c oxidase subunit II (MT-CO2)",
            "Cytochrome c oxidase subunit III (MT-CO3)",
            "Cytochrome b (MT-CYB)",
            "NADH dehydrogenase subunit 1 (MT-ND1)",
            "NADH dehydrogenase subunit 2 (MT-ND2)",
            "NADH dehydrogenase subunit 3 (MT-ND3)",
            "NADH dehydrogenase subunit 4 (MT-ND4)",
            "NADH dehydrogenase subunit 4L (MT-ND4L)",
            "NADH dehydrogenase subunit 5 (MT-ND5)",
            "NADH dehydrogenase subunit 6 (MT-ND6)",
            "Cytochrome c oxidase subunit IV (MT-CO4)",
            "Cytochrome c oxidase subunit V (MT-CO5)",
            "Cytochrome c oxidase subunit VIa (MT-CO6)",
            "Cytochrome c oxidase subunit VIb (MT-CO7)",
            "Cytochrome c oxidase subunit VIIa (MT-CO8)",
            "Cytochrome c oxidase subunit VIII (MT-CO8)",
            "Cytochrome c oxidase subunit IX (MT-CO9)",
            "Cytochrome c oxidase subunit X (MT-CO10)",
            "Cytochrome c oxidase subunit XI (MT-CO11)",
            "Cytochrome c oxidase subunit XII (MT-CO12)",
            "Cytochrome c oxidase subunit XIII (MT-CO13)",
            "Cytochrome c oxidase subunit XIV (MT-CO14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "Cytochrome c oxidase subunit I (MT-CO1)",
                "Cytochrome c oxidase subunit II (MT-CO2)",
                "Cytochrome c oxidase subunit III (MT-CO3)",
                "Cytochrome b (MT-CYB)",
                "NADH dehydrogenase subunit 1 (MT-ND1)",
                "NADH dehydrogenase subunit 2 (MT-ND2)",
                "NADH dehydrogenase subunit 3 (MT-ND3)",
                "NADH dehydrogenase subunit 4 (MT-ND4)",
                "NADH dehydrogenase subunit 4L (MT-ND4L)",
                "NADH dehydrogenase subunit 5 (MT-ND5)",
                "NADH dehydrogenase subunit 6 (MT-ND6)",
                "Cytochrome c oxidase subunit IV (MT-CO4)",
                "Cytochrome c oxidase subunit V (MT-CO5)",
                "Cytochrome c oxidase subunit VIa (MT-CO6)",
                "Cytochrome c oxidase subunit VIb (MT-CO7)",
                "Cytochrome c oxidase subunit VIIa (MT-CO8)",
                "Cytochrome c oxidase subunit VIII (MT-CO8)",
                "Cytochrome c oxidase subunit IX (MT-CO9)",
                "Cytochrome c oxidase subunit X (MT-CO10)",
                "Cytochrome c oxidase subunit XI (MT-CO11)",
                "Cytochrome c oxidase subunit XII (MT-CO12)",
                "Cytochrome c oxidase subunit XIII (MT-CO13)",
                "Cytochrome c oxidase subunit XIV (MT-CO14)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c oxidase subunit I (MT-CO1)",
                "Cytochrome c oxidase subunit II (MT-CO2)",
                "Cytochrome c oxidase subunit III (MT-CO3)",
                "Cytochrome c oxidase subunit IV (MT-CO4)",
                "Cytochrome c oxidase subunit IX (MT-CO9)",
                "Cytochrome c oxidase subunit V (MT-CO5)",
                "Cytochrome c oxidase subunit VIII (MT-CO8)",
                "Cytochrome c oxidase subunit VIIa (MT-CO8)",
                "Cytochrome c oxidase subunit VIa (MT-CO6)",
                "Cytochrome c oxidase subunit VIb (MT-CO7)",
                "Cytochrome c oxidase subunit X (MT-CO10)",
                "Cytochrome c oxidase subunit XI (MT-CO11)",
                "Cytochrome c oxidase subunit XII (MT-CO12)",
                "Cytochrome c oxidase subunit XIII (MT-CO13)",
                "Cytochrome c oxidase subunit XIV (MT-CO14)",
                "NADH dehydrogenase subunit 1 (MT-ND1)",
                "NADH dehydrogenase subunit 2 (MT-ND2)",
                "NADH dehydrogenase subunit 3 (MT-ND3)",
                "NADH dehydrogenase subunit 4 (MT-ND4)",
                "NADH dehydrogenase subunit 4L (MT-ND4L)",
                "NADH dehydrogenase subunit 5 (MT-ND5)",
                "NADH dehydrogenase subunit 6 (MT-ND6)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase subunit 1 (MT-ND1)",
            "NADH dehydrogenase subunit 2 (MT-ND2)",
            "Cytochrome b (MT-CYB)",
            "Cytochrome c oxidase subunit 1 (MT-CO1)",
            "Cytochrome c oxidase subunit 2 (MT-CO2)",
            "Cytochrome c oxidase subunit 3 (MT-CO3)",
            "ATP synthase F0 subunit 6 (MT-ATP6)",
            "ATP synthase F0 subunit 8 (MT-ATP8)",
            "NADH dehydrogenase subunit 3 (MT-ND3)",
            "NADH dehydrogenase subunit 4 (MT-ND4)",
            "NADH dehydrogenase subunit 4L (MT-ND4L)",
            "NADH dehydrogenase subunit 5 (MT-ND5)",
            "NADH dehydrogenase subunit 6 (MT-ND6)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase subunit 1 (MT-ND1)",
                "NADH dehydrogenase subunit 2 (MT-ND2)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c oxidase subunit 1 (MT-CO1)",
                "Cytochrome c oxidase subunit 2 (MT-CO2)",
                "Cytochrome c oxidase subunit 3 (MT-CO3)",
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "ATP synthase F0 subunit 8 (MT-ATP8)",
                "NADH dehydrogenase subunit 3 (MT-ND3)",
                "NADH dehydrogenase subunit 4 (MT-ND4)",
                "NADH dehydrogenase subunit 4L (MT-ND4L)",
                "NADH dehydrogenase subunit 5 (MT-ND5)",
                "NADH dehydrogenase subunit 6 (MT-ND6)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (MT-ATP6)",
                "ATP synthase F0 subunit 8 (MT-ATP8)",
                "Cytochrome b (MT-CYB)",
                "Cytochrome c oxidase subunit 1 (MT-CO1)",
                "Cytochrome c oxidase subunit 2 (MT-CO2)",
                "Cytochrome c oxidase subunit 3 (MT-CO3)",
                "NADH dehydrogenase subunit 1 (MT-ND1)",
                "NADH dehydrogenase subunit 2 (MT-ND2)",
                "NADH dehydrogenase subunit 3 (MT-ND3)",
                "NADH dehydrogenase subunit 4 (MT-ND4)",
                "NADH dehydrogenase subunit 4L (MT-ND4L)",
                "NADH dehydrogenase subunit 5 (MT-ND5)",
                "NADH dehydrogenase subunit 6 (MT-ND6)"
            ],
            "TP": 13,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase subunit 1 (ND1)",
            "NADH dehydrogenase subunit 2 (ND2)",
            "NADH dehydrogenase subunit 3 (ND3)",
            "NADH dehydrogenase subunit 4 (ND4)",
            "NADH dehydrogenase subunit 4L (ND4L)",
            "NADH dehydrogenase subunit 5 (ND5)",
            "NADH dehydrogenase subunit 6 (ND6)",
            "Cytochrome c oxidase subunit I (COI)",
            "Cytochrome c oxidase subunit II (COII)",
            "Cytochrome c oxidase subunit III (COIII)",
            "ATP synthase F0 subunit 6 (ATP6)",
            "ATP synthase F0 subunit 8 (ATP8)",
            "Cytochrome b (CYTB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)",
                "Cytochrome c oxidase subunit I (COI)",
                "Cytochrome c oxidase subunit II (COII)",
                "Cytochrome c oxidase subunit III (COIII)",
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome b (CYTB)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome b (CYTB)",
                "Cytochrome c oxidase subunit I (COI)",
                "Cytochrome c oxidase subunit II (COII)",
                "Cytochrome c oxidase subunit III (COIII)",
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)"
            ],
            "TP": 13,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase subunit 1 (ND1)",
            "NADH dehydrogenase subunit 2 (ND2)",
            "NADH dehydrogenase subunit 3 (ND3)",
            "NADH dehydrogenase subunit 4 (ND4)",
            "NADH dehydrogenase subunit 4L (ND4L)",
            "NADH dehydrogenase subunit 5 (ND5)",
            "NADH dehydrogenase subunit 6 (ND6)",
            "Cytochrome c oxidase subunit I (COX1)",
            "Cytochrome c oxidase subunit II (COX2)",
            "Cytochrome c oxidase subunit III (COX3)",
            "ATP synthase F0 subunit 6 (ATP6)",
            "ATP synthase F0 subunit 8 (ATP8)",
            "Cytochrome b (CYTB)",
            "Humanin (HN)",
            "MOTS-c",
            "Small humanin-like peptide 1 (SHLP1)",
            "Small humanin-like peptide 2 (SHLP2)",
            "Small humanin-like peptide 3 (SHLP3)",
            "Small humanin-like peptide 4 (SHLP4)",
            "Small humanin-like peptide 5 (SHLP5)",
            "Small humanin-like peptide 6 (SHLP6)",
            "Mitochondrial open reading frame of the 12S rRNA-c (MOTS-c)",
            "Mitochondrial-derived peptide 1 (MDP1)",
            "Mitochondrial-derived peptide 2 (MDP2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)",
                "Cytochrome c oxidase subunit I (COX1)",
                "Cytochrome c oxidase subunit II (COX2)",
                "Cytochrome c oxidase subunit III (COX3)",
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome b (CYTB)",
                "Humanin (HN)",
                "MOTS-c",
                "Small humanin-like peptide 1 (SHLP1)",
                "Small humanin-like peptide 2 (SHLP2)",
                "Small humanin-like peptide 3 (SHLP3)",
                "Small humanin-like peptide 4 (SHLP4)",
                "Small humanin-like peptide 5 (SHLP5)",
                "Small humanin-like peptide 6 (SHLP6)",
                "Mitochondrial open reading frame of the 12S rRNA-c (MOTS-c)",
                "Mitochondrial-derived peptide 1 (MDP1)",
                "Mitochondrial-derived peptide 2 (MDP2)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome b (CYTB)",
                "Cytochrome c oxidase subunit I (COX1)",
                "Cytochrome c oxidase subunit II (COX2)",
                "Cytochrome c oxidase subunit III (COX3)",
                "Humanin (HN)",
                "MOTS-c",
                "Mitochondrial open reading frame of the 12S rRNA-c (MOTS-c)",
                "Mitochondrial-derived peptide 1 (MDP1)",
                "Mitochondrial-derived peptide 2 (MDP2)",
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)",
                "Small humanin-like peptide 1 (SHLP1)",
                "Small humanin-like peptide 2 (SHLP2)",
                "Small humanin-like peptide 3 (SHLP3)",
                "Small humanin-like peptide 4 (SHLP4)",
                "Small humanin-like peptide 5 (SHLP5)",
                "Small humanin-like peptide 6 (SHLP6)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase subunit 1 (ND1)",
            "NADH dehydrogenase subunit 2 (ND2)",
            "NADH dehydrogenase subunit 3 (ND3)",
            "NADH dehydrogenase subunit 4 (ND4)",
            "NADH dehydrogenase subunit 4L (ND4L)",
            "NADH dehydrogenase subunit 5 (ND5)",
            "NADH dehydrogenase subunit 6 (ND6)",
            "Cytochrome b (CYTB)",
            "Cytochrome c oxidase subunit 1 (COX1)",
            "Cytochrome c oxidase subunit 2 (COX2)",
            "Cytochrome c oxidase subunit 3 (COX3)",
            "ATP synthase F0 subunit 6 (ATP6)",
            "ATP synthase F0 subunit 8 (ATP8)",
            "Cytochrome c oxidase subunit 4 (COX4)",
            "Cytochrome c oxidase subunit 5A (COX5A)",
            "Cytochrome c oxidase subunit 5B (COX5B)",
            "Cytochrome c oxidase subunit 6A (COX6A)",
            "Cytochrome c oxidase subunit 6B (COX6B)",
            "Cytochrome c oxidase subunit 7A (COX7A)",
            "Cytochrome c oxidase subunit 7B (COX7B)",
            "Cytochrome c oxidase subunit 7C (COX7C)",
            "Cytochrome c oxidase subunit 8A (COX8A)",
            "Cytochrome c oxidase subunit 8B (COX8B)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)",
                "Cytochrome b (CYTB)",
                "Cytochrome c oxidase subunit 1 (COX1)",
                "Cytochrome c oxidase subunit 2 (COX2)",
                "Cytochrome c oxidase subunit 3 (COX3)",
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome c oxidase subunit 4 (COX4)",
                "Cytochrome c oxidase subunit 5A (COX5A)",
                "Cytochrome c oxidase subunit 5B (COX5B)",
                "Cytochrome c oxidase subunit 6A (COX6A)",
                "Cytochrome c oxidase subunit 6B (COX6B)",
                "Cytochrome c oxidase subunit 7A (COX7A)",
                "Cytochrome c oxidase subunit 7B (COX7B)",
                "Cytochrome c oxidase subunit 7C (COX7C)",
                "Cytochrome c oxidase subunit 8A (COX8A)",
                "Cytochrome c oxidase subunit 8B (COX8B)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase F0 subunit 6 (ATP6)",
                "ATP synthase F0 subunit 8 (ATP8)",
                "Cytochrome b (CYTB)",
                "Cytochrome c oxidase subunit 1 (COX1)",
                "Cytochrome c oxidase subunit 2 (COX2)",
                "Cytochrome c oxidase subunit 3 (COX3)",
                "Cytochrome c oxidase subunit 4 (COX4)",
                "Cytochrome c oxidase subunit 5A (COX5A)",
                "Cytochrome c oxidase subunit 5B (COX5B)",
                "Cytochrome c oxidase subunit 6A (COX6A)",
                "Cytochrome c oxidase subunit 6B (COX6B)",
                "Cytochrome c oxidase subunit 7A (COX7A)",
                "Cytochrome c oxidase subunit 7B (COX7B)",
                "Cytochrome c oxidase subunit 7C (COX7C)",
                "Cytochrome c oxidase subunit 8A (COX8A)",
                "Cytochrome c oxidase subunit 8B (COX8B)",
                "NADH dehydrogenase subunit 1 (ND1)",
                "NADH dehydrogenase subunit 2 (ND2)",
                "NADH dehydrogenase subunit 3 (ND3)",
                "NADH dehydrogenase subunit 4 (ND4)",
                "NADH dehydrogenase subunit 4L (ND4L)",
                "NADH dehydrogenase subunit 5 (ND5)",
                "NADH dehydrogenase subunit 6 (ND6)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "NADH Dehydrogenase Subunit 1 (ND1)",
            "NADH Dehydrogenase Subunit 2 (ND2)",
            "NADH Dehydrogenase Subunit 3 (ND3)",
            "NADH Dehydrogenase Subunit 4 (ND4)",
            "NADH Dehydrogenase Subunit 4L (ND4L)",
            "NADH Dehydrogenase Subunit 5 (ND5)",
            "NADH Dehydrogenase Subunit 6 (ND6)",
            "Cytochrome c Oxidase Subunit I (COX1)",
            "Cytochrome c Oxidase Subunit II (COX2)",
            "Cytochrome c Oxidase Subunit III (COX3)",
            "Cytochrome b (CYTB)",
            "ATP Synthase Subunit 6 (ATP6)",
            "ATP Synthase Subunit 8 (ATP8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH Dehydrogenase Subunit 1 (ND1)",
                "NADH Dehydrogenase Subunit 2 (ND2)",
                "NADH Dehydrogenase Subunit 3 (ND3)",
                "NADH Dehydrogenase Subunit 4 (ND4)",
                "NADH Dehydrogenase Subunit 4L (ND4L)",
                "NADH Dehydrogenase Subunit 5 (ND5)",
                "NADH Dehydrogenase Subunit 6 (ND6)",
                "Cytochrome c Oxidase Subunit I (COX1)",
                "Cytochrome c Oxidase Subunit II (COX2)",
                "Cytochrome c Oxidase Subunit III (COX3)",
                "Cytochrome b (CYTB)",
                "ATP Synthase Subunit 6 (ATP6)",
                "ATP Synthase Subunit 8 (ATP8)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP Synthase Subunit 6 (ATP6)",
                "ATP Synthase Subunit 8 (ATP8)",
                "Cytochrome b (CYTB)",
                "Cytochrome c Oxidase Subunit I (COX1)",
                "Cytochrome c Oxidase Subunit II (COX2)",
                "Cytochrome c Oxidase Subunit III (COX3)",
                "NADH Dehydrogenase Subunit 1 (ND1)",
                "NADH Dehydrogenase Subunit 2 (ND2)",
                "NADH Dehydrogenase Subunit 3 (ND3)",
                "NADH Dehydrogenase Subunit 4 (ND4)",
                "NADH Dehydrogenase Subunit 4L (ND4L)",
                "NADH Dehydrogenase Subunit 5 (ND5)",
                "NADH Dehydrogenase Subunit 6 (ND6)"
            ],
            "TP": 13,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ATP synthase subunit alpha (ATPSA)",
            "ATP synthase subunit beta (ATPSB)",
            "ATP synthase subunit gamma (ATPSG)",
            "ATP synthase subunit delta (ATPSD)",
            "ATP synthase subunit epsilon (ATPSE)",
            "ATP synthase subunit F0 subunit 1 (ATP6)",
            "ATP synthase subunit F0 subunit 6 (ATP8)",
            "ATP synthase subunit F1 subunit alpha (ATP5A)",
            "ATP synthase subunit F1 subunit beta (ATP5B)",
            "ATP synthase subunit F1 subunit gamma (ATP5G)",
            "ATP synthase subunit F1 subunit delta (ATP5D)",
            "ATP synthase subunit F1 subunit epsilon (ATP5E)",
            "COX1 (cytochrome c oxidase subunit 1)",
            "COX2 (cytochrome c oxidase subunit 2)",
            "COX3 (cytochrome c oxidase subunit 3)",
            "Cytb (cytochrome b)",
            "ND1 (NADH dehydrogenase subunit 1)",
            "ND2 (NADH dehydrogenase subunit 2)",
            "ND3 (NADH dehydrogenase subunit 3)",
            "ND4 (NADH dehydrogenase subunit 4)",
            "ND4L (NADH dehydrogenase subunit 4L)",
            "ND5 (NADH dehydrogenase subunit 5)",
            "ND6 (NADH dehydrogenase subunit 6)",
            "RNR2 (ribonucleotide reductase subunit 2)",
            "RNR3 (ribonucleotide reductase subunit 3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP synthase subunit alpha (ATPSA)",
                "ATP synthase subunit beta (ATPSB)",
                "ATP synthase subunit gamma (ATPSG)",
                "ATP synthase subunit delta (ATPSD)",
                "ATP synthase subunit epsilon (ATPSE)",
                "ATP synthase subunit F0 subunit 1 (ATP6)",
                "ATP synthase subunit F0 subunit 6 (ATP8)",
                "ATP synthase subunit F1 subunit alpha (ATP5A)",
                "ATP synthase subunit F1 subunit beta (ATP5B)",
                "ATP synthase subunit F1 subunit gamma (ATP5G)",
                "ATP synthase subunit F1 subunit delta (ATP5D)",
                "ATP synthase subunit F1 subunit epsilon (ATP5E)",
                "COX1 (cytochrome c oxidase subunit 1)",
                "COX2 (cytochrome c oxidase subunit 2)",
                "COX3 (cytochrome c oxidase subunit 3)",
                "Cytb (cytochrome b)",
                "ND1 (NADH dehydrogenase subunit 1)",
                "ND2 (NADH dehydrogenase subunit 2)",
                "ND3 (NADH dehydrogenase subunit 3)",
                "ND4 (NADH dehydrogenase subunit 4)",
                "ND4L (NADH dehydrogenase subunit 4L)",
                "ND5 (NADH dehydrogenase subunit 5)",
                "ND6 (NADH dehydrogenase subunit 6)",
                "RNR2 (ribonucleotide reductase subunit 2)"
            ],
            "mismatches": [
                "RNR3 (ribonucleotide reductase subunit 3)"
            ],
            "true_referents": [
                "ATP synthase subunit F0 subunit 1 (ATP6)",
                "ATP synthase subunit F0 subunit 6 (ATP8)",
                "ATP synthase subunit F1 subunit alpha (ATP5A)",
                "ATP synthase subunit F1 subunit beta (ATP5B)",
                "ATP synthase subunit F1 subunit delta (ATP5D)",
                "ATP synthase subunit F1 subunit epsilon (ATP5E)",
                "ATP synthase subunit F1 subunit gamma (ATP5G)",
                "ATP synthase subunit alpha (ATPSA)",
                "ATP synthase subunit beta (ATPSB)",
                "ATP synthase subunit delta (ATPSD)",
                "ATP synthase subunit epsilon (ATPSE)",
                "ATP synthase subunit gamma (ATPSG)",
                "COX1 (cytochrome c oxidase subunit 1)",
                "COX2 (cytochrome c oxidase subunit 2)",
                "COX3 (cytochrome c oxidase subunit 3)",
                "Cytb (cytochrome b)",
                "ND1 (NADH dehydrogenase subunit 1)",
                "ND2 (NADH dehydrogenase subunit 2)",
                "ND3 (NADH dehydrogenase subunit 3)",
                "ND4 (NADH dehydrogenase subunit 4)",
                "ND4L (NADH dehydrogenase subunit 4L)",
                "ND5 (NADH dehydrogenase subunit 5)",
                "ND6 (NADH dehydrogenase subunit 6)",
                "RNR2 (ribonucleotide reductase subunit 2)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            "ATP6 (MT-ATP6)",
            "ATP8 (MT-ATP8)",
            "CYB (MTCYB)",
            "CO1 (MT-CO1)",
            "CO2 (MT-CO2)",
            "CO3 (MT-CO3)",
            "ND1 (MT-ND1)",
            "ND2 (MT-ND2)",
            "ND3 (MT-ND3)",
            "ND4 (MT-ND4)",
            "ND4L (MT-ND4L)",
            "ND5 (MT-ND5)",
            "ND6 (MT-ND6)",
            "COX1 (MT-CO1)",
            "COX2 (MT-CO2)",
            "COX3 (MT-CO3)",
            "ATP9 (MT-ATP9)",
            "NDIFV3 (MT-NDIFV3)",
            "MT-CYB (CYB)",
            "MT-TF (MT-TF)",
            "MT-RNR1 (MT-RNR1)",
            "MT-RNR2 (MT-RNR2)",
            "MT-TL1 (MT-TL1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "CYB (MTCYB)",
                "CO1 (MT-CO1)",
                "CO2 (MT-CO2)",
                "CO3 (MT-CO3)",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "ATP9 (MT-ATP9)",
                "NDIFV3 (MT-NDIFV3)",
                "MT-CYB (CYB)",
                "MT-TF (MT-TF)",
                "MT-RNR1 (MT-RNR1)",
                "MT-RNR2 (MT-RNR2)",
                "MT-TL1 (MT-TL1)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP6 (MT-ATP6)",
                "ATP8 (MT-ATP8)",
                "ATP9 (MT-ATP9)",
                "CO1 (MT-CO1)",
                "CO2 (MT-CO2)",
                "CO3 (MT-CO3)",
                "COX1 (MT-CO1)",
                "COX2 (MT-CO2)",
                "COX3 (MT-CO3)",
                "CYB (MTCYB)",
                "MT-CYB (CYB)",
                "MT-RNR1 (MT-RNR1)",
                "MT-RNR2 (MT-RNR2)",
                "MT-TF (MT-TF)",
                "MT-TL1 (MT-TL1)",
                "ND1 (MT-ND1)",
                "ND2 (MT-ND2)",
                "ND3 (MT-ND3)",
                "ND4 (MT-ND4)",
                "ND4L (MT-ND4L)",
                "ND5 (MT-ND5)",
                "ND6 (MT-ND6)",
                "NDIFV3 (MT-NDIFV3)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "mitochondrial DNA encoded protein-coding gene",
        "domain": "biology",
        "response": [
            {
                "Referent": "ATP synthase subunit beta (ATP5B)",
                "Canonical name": "ATP5B"
            },
            {
                "Referent": "ATP synthase subunit alpha (ATP5A)",
                "Canonical name": "ATP5A"
            },
            {
                "Referent": "ATP synthase subunit O (ATP5O)",
                "Canonical name": "ATP5O"
            },
            {
                "Referent": "ATP synthase subunit F6 (ATP5F6)",
                "Canonical name": "ATP5F6"
            },
            {
                "Referent": "ATP synthase subunit D (ATP5D)",
                "Canonical name": "ATP5D"
            },
            {
                "Referent": "ATP synthase subunit E (ATP5E)",
                "Canonical name": "ATP5E"
            },
            {
                "Referent": "ATP synthase subunit C (ATP5C)",
                "Canonical name": "ATP5C"
            },
            {
                "Referent": "ATP synthase subunit G (ATP5G)",
                "Canonical name": "ATP5G"
            },
            {
                "Referent": "ATP synthase subunit I (ATP5I)",
                "Canonical name": "ATP5I"
            },
            {
                "Referent": "ATP synthase subunit H (ATP5H)",
                "Canonical name": "ATP5H"
            },
            {
                "Referent": "ATP synthase subunit J (ATP5J)",
                "Canonical name": "ATP5J"
            },
            {
                "Referent": "ATP synthase subunit K (ATP5K)",
                "Canonical name": "ATP5K"
            },
            {
                "Referent": "ATP synthase subunit N (ATP5N)",
                "Canonical name": "ATP5N"
            },
            {
                "Referent": "ATP synthase subunit M (ATP5M)",
                "Canonical name": "ATP5M"
            },
            {
                "Referent": "ATP synthase subunit F8 (ATP5F8)",
                "Canonical name": "ATP5F8"
            },
            {
                "Referent": "ATP synthase subunit C1 (ATP5C1)",
                "Canonical name": "ATP5C1"
            },
            {
                "Referent": "ATP synthase subunit C2 (ATP5C2)",
                "Canonical name": "ATP5C2"
            },
            {
                "Referent": "ATP synthase subunit C3 (ATP5C3)",
                "Canonical name": "ATP5C3"
            },
            {
                "Referent": "ATP synthase subunit C4 (ATP5C4)",
                "Canonical name": "ATP5C4"
            },
            {
                "Referent": "ATP synthase subunit C5 (ATP5C5)",
                "Canonical name": "ATP5C5"
            },
            {
                "Referent": "ATP synthase subunit C6 (ATP5C6)",
                "Canonical name": "ATP5C6"
            },
            {
                "Referent": "ATP synthase subunit C7 (ATP5C7)",
                "Canonical name": "ATP5C7"
            },
            {
                "Referent": "ATP synthase subunit C8 (ATP5C8)",
                "Canonical name": "ATP5C8"
            },
            {
                "Referent": "ATP synthase subunit C9 (ATP5C9)",
                "Canonical name": "ATP5C9"
            },
            {
                "Referent": "ATP synthase subunit C10 (ATP5C10)",
                "Canonical name": "ATP5C10"
            },
            {
                "Referent": "ATP synthase subunit C11 (ATP5C11)",
                "Canonical name": "ATP5C11"
            },
            {
                "Referent": "ATP synthase subunit C12 (ATP5C12)",
                "Canonical name": "ATP5C12"
            },
            {
                "Referent": "ATP synthase subunit C13 (ATP5C13)",
                "Canonical name": "ATP5C13"
            },
            {
                "Referent": "ATP synthase subunit C14 (ATP5C14)",
                "Canonical name": "ATP5C14"
            },
            {
                "Referent": "ATP synthase subunit C15 (ATP5C15)",
                "Canonical name": "ATP5C15"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ATP5B",
                "ATP5A",
                "ATP5O",
                "ATP5F6",
                "ATP5D",
                "ATP5E",
                "ATP5C",
                "ATP5G",
                "ATP5I",
                "ATP5H",
                "ATP5J",
                "ATP5K",
                "ATP5N",
                "ATP5M",
                "ATP5F8",
                "ATP5C1",
                "ATP5C2",
                "ATP5C3",
                "ATP5C4",
                "ATP5C5",
                "ATP5C6",
                "ATP5C7",
                "ATP5C8",
                "ATP5C9"
            ],
            "mismatches": [
                "ATP5C10",
                "ATP5C11",
                "ATP5C12",
                "ATP5C13",
                "ATP5C14",
                "ATP5C15"
            ],
            "true_referents": [
                "{\"Referent\": \"ATP synthase subunit C (ATP5C)\", \"Canonical name\": \"ATP5C\"}",
                "{\"Referent\": \"ATP synthase subunit C1 (ATP5C1)\", \"Canonical name\": \"ATP5C1\"}",
                "{\"Referent\": \"ATP synthase subunit C2 (ATP5C2)\", \"Canonical name\": \"ATP5C2\"}",
                "{\"Referent\": \"ATP synthase subunit C3 (ATP5C3)\", \"Canonical name\": \"ATP5C3\"}",
                "{\"Referent\": \"ATP synthase subunit C4 (ATP5C4)\", \"Canonical name\": \"ATP5C4\"}",
                "{\"Referent\": \"ATP synthase subunit C5 (ATP5C5)\", \"Canonical name\": \"ATP5C5\"}",
                "{\"Referent\": \"ATP synthase subunit C6 (ATP5C6)\", \"Canonical name\": \"ATP5C6\"}",
                "{\"Referent\": \"ATP synthase subunit C7 (ATP5C7)\", \"Canonical name\": \"ATP5C7\"}",
                "{\"Referent\": \"ATP synthase subunit C8 (ATP5C8)\", \"Canonical name\": \"ATP5C8\"}",
                "{\"Referent\": \"ATP synthase subunit C9 (ATP5C9)\", \"Canonical name\": \"ATP5C9\"}",
                "{\"Referent\": \"ATP synthase subunit D (ATP5D)\", \"Canonical name\": \"ATP5D\"}",
                "{\"Referent\": \"ATP synthase subunit E (ATP5E)\", \"Canonical name\": \"ATP5E\"}",
                "{\"Referent\": \"ATP synthase subunit F6 (ATP5F6)\", \"Canonical name\": \"ATP5F6\"}",
                "{\"Referent\": \"ATP synthase subunit F8 (ATP5F8)\", \"Canonical name\": \"ATP5F8\"}",
                "{\"Referent\": \"ATP synthase subunit G (ATP5G)\", \"Canonical name\": \"ATP5G\"}",
                "{\"Referent\": \"ATP synthase subunit H (ATP5H)\", \"Canonical name\": \"ATP5H\"}",
                "{\"Referent\": \"ATP synthase subunit I (ATP5I)\", \"Canonical name\": \"ATP5I\"}",
                "{\"Referent\": \"ATP synthase subunit J (ATP5J)\", \"Canonical name\": \"ATP5J\"}",
                "{\"Referent\": \"ATP synthase subunit K (ATP5K)\", \"Canonical name\": \"ATP5K\"}",
                "{\"Referent\": \"ATP synthase subunit M (ATP5M)\", \"Canonical name\": \"ATP5M\"}",
                "{\"Referent\": \"ATP synthase subunit N (ATP5N)\", \"Canonical name\": \"ATP5N\"}",
                "{\"Referent\": \"ATP synthase subunit O (ATP5O)\", \"Canonical name\": \"ATP5O\"}",
                "{\"Referent\": \"ATP synthase subunit alpha (ATP5A)\", \"Canonical name\": \"ATP5A\"}",
                "{\"Referent\": \"ATP synthase subunit beta (ATP5B)\", \"Canonical name\": \"ATP5B\"}"
            ],
            "TP": 24,
            "FP": 6,
            "FN": 0
        }
    }
]