[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "Complex I (NADH:ubiquinone oxidoreductase)",
            "Complex II (Succinate:ubiquinone oxidoreductase)",
            "Complex III (Ubiquinol-cytochrome c reductase)",
            "Complex IV (Cytochrome c oxidase)",
            "Complex V (ATP synthase)",
            "Subunit 9 (Complex III subunit 9)",
            "Subunit 1 (Complex I subunit 1)",
            "Subunit 2 (Complex I subunit 2)",
            "Subunit 3 (Complex I subunit 3)",
            "Subunit 4 (Complex I subunit 4)",
            "Subunit 5 (Complex I subunit 5)",
            "Subunit 6 (Complex I subunit 6)",
            "Subunit 7 (Complex I subunit 7)",
            "Subunit 8 (Complex I subunit 8)",
            "Subunit A (Complex IV subunit 1)",
            "Subunit B (Complex IV subunit 2)",
            "Subunit C (Complex IV subunit 3)",
            "Subunit D (Complex IV subunit 4)",
            "Subunit E (Complex IV subunit 5)",
            "Subunit F (Complex IV subunit 6)",
            "Subunit G (Complex IV subunit 7)",
            "Subunit H (Complex IV subunit 8)",
            "Subunit I (Complex V subunit 1)",
            "Subunit J (Complex V subunit 2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Complex I (NADH:ubiquinone oxidoreductase)",
                "Complex II (Succinate:ubiquinone oxidoreductase)",
                "Complex III (Ubiquinol-cytochrome c reductase)",
                "Complex IV (Cytochrome c oxidase)",
                "Complex V (ATP synthase)",
                "Subunit 9 (Complex III subunit 9)",
                "Subunit 1 (Complex I subunit 1)",
                "Subunit 2 (Complex I subunit 2)",
                "Subunit 3 (Complex I subunit 3)",
                "Subunit 4 (Complex I subunit 4)",
                "Subunit 5 (Complex I subunit 5)",
                "Subunit 6 (Complex I subunit 6)",
                "Subunit 7 (Complex I subunit 7)",
                "Subunit 8 (Complex I subunit 8)",
                "Subunit A (Complex IV subunit 1)",
                "Subunit B (Complex IV subunit 2)",
                "Subunit C (Complex IV subunit 3)",
                "Subunit D (Complex IV subunit 4)",
                "Subunit E (Complex IV subunit 5)",
                "Subunit F (Complex IV subunit 6)",
                "Subunit G (Complex IV subunit 7)",
                "Subunit H (Complex IV subunit 8)",
                "Subunit I (Complex V subunit 1)",
                "Subunit J (Complex V subunit 2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Complex I (NADH:ubiquinone oxidoreductase)",
                "Complex II (Succinate:ubiquinone oxidoreductase)",
                "Complex III (Ubiquinol-cytochrome c reductase)",
                "Complex IV (Cytochrome c oxidase)",
                "Complex V (ATP synthase)",
                "Subunit 1 (Complex I subunit 1)",
                "Subunit 2 (Complex I subunit 2)",
                "Subunit 3 (Complex I subunit 3)",
                "Subunit 4 (Complex I subunit 4)",
                "Subunit 5 (Complex I subunit 5)",
                "Subunit 6 (Complex I subunit 6)",
                "Subunit 7 (Complex I subunit 7)",
                "Subunit 8 (Complex I subunit 8)",
                "Subunit 9 (Complex III subunit 9)",
                "Subunit A (Complex IV subunit 1)",
                "Subunit B (Complex IV subunit 2)",
                "Subunit C (Complex IV subunit 3)",
                "Subunit D (Complex IV subunit 4)",
                "Subunit E (Complex IV subunit 5)",
                "Subunit F (Complex IV subunit 6)",
                "Subunit G (Complex IV subunit 7)",
                "Subunit H (Complex IV subunit 8)",
                "Subunit I (Complex V subunit 1)",
                "Subunit J (Complex V subunit 2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate:ubiquinone oxidoreductase (Complex II)",
            "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Cytochrome b-c1 complex",
            "Cytochrome bc1 complex (Complex III)",
            "NADH dehydrogenase",
            "Succinate dehydrogenase (Complex II)",
            "Iron-sulfur protein",
            "Cytochrome c1",
            "Cytochrome c",
            "Ubiquinone (Coenzyme Q)",
            "Cytochrome a",
            "Cytochrome a3",
            "Heme a",
            "Heme a3",
            "Oxygen binding site",
            "Proton pump",
            "ATPase activity",
            "Proton translocation",
            "Electron carrier",
            "Catalytic subunit",
            "Regulatory subunit"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Cytochrome b-c1 complex",
                "Cytochrome bc1 complex (Complex III)",
                "NADH dehydrogenase",
                "Succinate dehydrogenase (Complex II)",
                "Iron-sulfur protein",
                "Cytochrome c1",
                "Cytochrome c",
                "Ubiquinone (Coenzyme Q)",
                "Cytochrome a",
                "Cytochrome a3",
                "Heme a",
                "Heme a3",
                "Oxygen binding site",
                "Proton pump",
                "ATPase activity",
                "Proton translocation",
                "Electron carrier",
                "Catalytic subunit",
                "Regulatory subunit"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "ATPase activity",
                "Catalytic subunit",
                "Cytochrome a",
                "Cytochrome a3",
                "Cytochrome b-c1 complex",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c1",
                "Electron carrier",
                "Heme a",
                "Heme a3",
                "Iron-sulfur protein",
                "NADH dehydrogenase",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Oxygen binding site",
                "Proton pump",
                "Proton translocation",
                "Regulatory subunit",
                "Succinate dehydrogenase (Complex II)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Ubiquinone (Coenzyme Q)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "Complex I (NADH dehydrogenase)",
            "Complex II (Succinate dehydrogenase)",
            "Complex III (Cytochrome bc1 complex)",
            "Complex IV (Cytochrome c oxidase)",
            "Complex V (ATP synthase)",
            "Cytochrome c",
            "Ubiquinone (Coenzyme Q)",
            "Ferredoxin",
            "Ferredoxin reductase",
            "Ferredoxin-NADP+ reductase",
            "Cytochrome b5",
            "Cytochrome b5 reductase",
            "Cytochrome c1",
            "Cytochrome c oxidase subunit I",
            "Cytochrome c oxidase subunit II",
            "Cytochrome c oxidase subunit III",
            "Cytochrome c oxidase subunit IV",
            "Cytochrome c oxidase subunit V",
            "Cytochrome c oxidase subunit VI",
            "Cytochrome c oxidase subunit VII",
            "Cytochrome c oxidase subunit VIII",
            "Cytochrome c oxidase subunit IX",
            "Cytochrome c oxidase subunit X",
            "Cytochrome c oxidase subunit XI"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Complex I (NADH dehydrogenase)",
                "Complex II (Succinate dehydrogenase)",
                "Complex III (Cytochrome bc1 complex)",
                "Complex IV (Cytochrome c oxidase)",
                "Complex V (ATP synthase)",
                "Cytochrome c",
                "Ubiquinone (Coenzyme Q)",
                "Ferredoxin",
                "Ferredoxin reductase",
                "Ferredoxin-NADP+ reductase",
                "Cytochrome b5",
                "Cytochrome b5 reductase",
                "Cytochrome c1",
                "Cytochrome c oxidase subunit I",
                "Cytochrome c oxidase subunit II",
                "Cytochrome c oxidase subunit III",
                "Cytochrome c oxidase subunit IV",
                "Cytochrome c oxidase subunit V",
                "Cytochrome c oxidase subunit VI",
                "Cytochrome c oxidase subunit VII",
                "Cytochrome c oxidase subunit VIII",
                "Cytochrome c oxidase subunit IX",
                "Cytochrome c oxidase subunit X",
                "Cytochrome c oxidase subunit XI"
            ],
            "mismatches": [],
            "true_referents": [
                "Complex I (NADH dehydrogenase)",
                "Complex II (Succinate dehydrogenase)",
                "Complex III (Cytochrome bc1 complex)",
                "Complex IV (Cytochrome c oxidase)",
                "Complex V (ATP synthase)",
                "Cytochrome b5",
                "Cytochrome b5 reductase",
                "Cytochrome c",
                "Cytochrome c oxidase subunit I",
                "Cytochrome c oxidase subunit II",
                "Cytochrome c oxidase subunit III",
                "Cytochrome c oxidase subunit IV",
                "Cytochrome c oxidase subunit IX",
                "Cytochrome c oxidase subunit V",
                "Cytochrome c oxidase subunit VI",
                "Cytochrome c oxidase subunit VII",
                "Cytochrome c oxidase subunit VIII",
                "Cytochrome c oxidase subunit X",
                "Cytochrome c oxidase subunit XI",
                "Cytochrome c1",
                "Ferredoxin",
                "Ferredoxin reductase",
                "Ferredoxin-NADP+ reductase",
                "Ubiquinone (Coenzyme Q)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (Complex I)",
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Succinate:ubiquinone oxidoreductase (Complex II)",
            "Cytochrome bc1 complex (Complex III)",
            "Ubiquinol-cytochrome c reductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "Complex IV",
            "ATP synthase (Complex V)",
            "F0F1-ATPase (Complex V)",
            "NduA subunit (Complex I)",
            "SdhA subunit (Complex II)",
            "Qcr2 protein (Complex III)",
            "Cox1 subunit (Complex IV)",
            "AtpA subunit (Complex V)",
            "Iron-sulfur protein (Complex I/II)",
            "Ubiquinone (Q)",
            "Cytochrome c",
            "Rieske protein (Complex III)",
            "FAD (Complex II)",
            "Heme group (Complex III/IV)",
            "Copper center (Complex IV)",
            "Proton pump (Complex I/III/IV)",
            "Matrix side of inner mitochondrial membrane (location)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (Complex I)",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Cytochrome bc1 complex (Complex III)",
                "Ubiquinol-cytochrome c reductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "Complex IV",
                "ATP synthase (Complex V)",
                "F0F1-ATPase (Complex V)",
                "NduA subunit (Complex I)",
                "SdhA subunit (Complex II)",
                "Qcr2 protein (Complex III)",
                "Cox1 subunit (Complex IV)",
                "AtpA subunit (Complex V)",
                "Iron-sulfur protein (Complex I/II)",
                "Ubiquinone (Q)",
                "Cytochrome c",
                "Rieske protein (Complex III)",
                "FAD (Complex II)",
                "Heme group (Complex III/IV)",
                "Copper center (Complex IV)",
                "Proton pump (Complex I/III/IV)",
                "Matrix side of inner mitochondrial membrane (location)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "AtpA subunit (Complex V)",
                "Complex IV",
                "Copper center (Complex IV)",
                "Cox1 subunit (Complex IV)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "F0F1-ATPase (Complex V)",
                "FAD (Complex II)",
                "Heme group (Complex III/IV)",
                "Iron-sulfur protein (Complex I/II)",
                "Matrix side of inner mitochondrial membrane (location)",
                "NADH dehydrogenase (Complex I)",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "NduA subunit (Complex I)",
                "Proton pump (Complex I/III/IV)",
                "Qcr2 protein (Complex III)",
                "Rieske protein (Complex III)",
                "SdhA subunit (Complex II)",
                "Succinate dehydrogenase (Complex II)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol-cytochrome c reductase (Complex III)",
                "Ubiquinone (Q)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate:ubiquinone oxidoreductase (Complex II)",
            "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "NADH dehydrogenase (NDH-1)",
            "Succinate dehydrogenase (SDH)",
            "Cytochrome bc1 complex",
            "Cytochrome c",
            "Cytochrome a3",
            "Cytochrome b",
            "Cytochrome c1",
            "Cytochrome a",
            "Cytochrome oxidase",
            "Coenzyme Q (CoQ)",
            "Ubiquinone",
            "Cytochrome b6f complex",
            "Plastoquinone",
            "Plastocyanin",
            "Ferredoxin",
            "Ferredoxin-NADP+ reductase",
            "Rieske iron-sulfur protein",
            "Iron-sulfur cluster",
            "Cytochrome c oxidase subunit I"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "NADH dehydrogenase (NDH-1)",
                "Succinate dehydrogenase (SDH)",
                "Cytochrome bc1 complex",
                "Cytochrome c",
                "Cytochrome a3",
                "Cytochrome b",
                "Cytochrome c1",
                "Cytochrome a",
                "Cytochrome oxidase",
                "Coenzyme Q (CoQ)",
                "Ubiquinone",
                "Cytochrome b6f complex",
                "Plastoquinone",
                "Plastocyanin",
                "Ferredoxin",
                "Ferredoxin-NADP+ reductase",
                "Rieske iron-sulfur protein",
                "Iron-sulfur cluster",
                "Cytochrome c oxidase subunit I"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "Coenzyme Q (CoQ)",
                "Cytochrome a",
                "Cytochrome a3",
                "Cytochrome b",
                "Cytochrome b6f complex",
                "Cytochrome bc1 complex",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c oxidase subunit I",
                "Cytochrome c1",
                "Cytochrome oxidase",
                "Ferredoxin",
                "Ferredoxin-NADP+ reductase",
                "Iron-sulfur cluster",
                "NADH dehydrogenase (NDH-1)",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Plastocyanin",
                "Plastoquinone",
                "Rieske iron-sulfur protein",
                "Succinate dehydrogenase (SDH)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Ubiquinone"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "NADH dehydrogenase (Complex I)",
            "Iron-sulfur protein (Complex I)",
            "Cytochrome b (Complex III)",
            "Cytochrome c1 (Complex III)",
            "Cytochrome c (soluble)",
            "Ubiquinone (Coenzyme Q)",
            "Aconitase (part of Complex II)",
            "Fumarate reductase (Complex II)",
            "Rieske iron-sulfur protein (Complex III)",
            "Adenine nucleotide translocase (ANT)",
            "ATP-binding cassette transporter (ABC)",
            "Cytochrome a (Complex IV)",
            "Cytochrome a3 (Complex IV)",
            "Cytosolic NADH dehydrogenase",
            "NADH-cytochrome b5 reductase",
            "Complex I-like protein (NDUFAF)",
            "Complex II-like protein (SDHAF)",
            "Complex III-like protein (UQCR)",
            "Complex IV-like protein (COX)",
            "NADH-ubiquinone oxidoreductase (NDUFS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "NADH dehydrogenase (Complex I)",
                "Iron-sulfur protein (Complex I)",
                "Cytochrome b (Complex III)",
                "Cytochrome c1 (Complex III)",
                "Cytochrome c (soluble)",
                "Ubiquinone (Coenzyme Q)",
                "Aconitase (part of Complex II)",
                "Fumarate reductase (Complex II)",
                "Rieske iron-sulfur protein (Complex III)",
                "Adenine nucleotide translocase (ANT)",
                "ATP-binding cassette transporter (ABC)",
                "Cytochrome a (Complex IV)",
                "Cytochrome a3 (Complex IV)",
                "Cytosolic NADH dehydrogenase",
                "NADH-cytochrome b5 reductase",
                "Complex I-like protein (NDUFAF)",
                "Complex II-like protein (SDHAF)",
                "Complex III-like protein (UQCR)",
                "Complex IV-like protein (COX)"
            ],
            "mismatches": [
                "NADH-ubiquinone oxidoreductase (NDUFS)"
            ],
            "true_referents": [
                "ATP synthase (Complex V)",
                "ATP-binding cassette transporter (ABC)",
                "Aconitase (part of Complex II)",
                "Adenine nucleotide translocase (ANT)",
                "Complex I-like protein (NDUFAF)",
                "Complex II-like protein (SDHAF)",
                "Complex III-like protein (UQCR)",
                "Complex IV-like protein (COX)",
                "Cytochrome a (Complex IV)",
                "Cytochrome a3 (Complex IV)",
                "Cytochrome b (Complex III)",
                "Cytochrome c (soluble)",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c1 (Complex III)",
                "Cytosolic NADH dehydrogenase",
                "Fumarate reductase (Complex II)",
                "Iron-sulfur protein (Complex I)",
                "NADH dehydrogenase (Complex I)",
                "NADH-cytochrome b5 reductase",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Rieske iron-sulfur protein (Complex III)",
                "Succinate dehydrogenase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Ubiquinone (Coenzyme Q)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate:ubiquinone oxidoreductase (Complex II)",
            "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "NDUFA1 (Complex I subunit)",
            "NDUFB9 (Complex I subunit)",
            "SDHA (Complex II subunit)",
            "SDHB (Complex II subunit)",
            "UQCRC1 (Complex III subunit)",
            "UQCRC2 (Complex III subunit)",
            "COX1 (Complex IV subunit)",
            "COX2 (Complex IV subunit)",
            "ATP5A1 (Complex V subunit)",
            "ATP5B (Complex V subunit)",
            "NDUFS1 (Complex I subunit)",
            "NDUFS2 (Complex I subunit)",
            "SDHC (Complex II subunit)",
            "SDHD (Complex II subunit)",
            "UQCRFS1 (Complex III subunit)",
            "UQCRH (Complex III subunit)",
            "COX3 (Complex IV subunit)",
            "COX4I1 (Complex IV subunit)",
            "ATP5C1 (Complex V subunit)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "NDUFA1 (Complex I subunit)",
                "NDUFB9 (Complex I subunit)",
                "SDHA (Complex II subunit)",
                "SDHB (Complex II subunit)",
                "UQCRC1 (Complex III subunit)",
                "UQCRC2 (Complex III subunit)",
                "COX1 (Complex IV subunit)",
                "COX2 (Complex IV subunit)",
                "ATP5A1 (Complex V subunit)",
                "ATP5B (Complex V subunit)",
                "NDUFS1 (Complex I subunit)",
                "NDUFS2 (Complex I subunit)",
                "SDHC (Complex II subunit)",
                "SDHD (Complex II subunit)",
                "UQCRFS1 (Complex III subunit)",
                "UQCRH (Complex III subunit)",
                "COX3 (Complex IV subunit)",
                "COX4I1 (Complex IV subunit)",
                "ATP5C1 (Complex V subunit)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "ATP5A1 (Complex V subunit)",
                "ATP5B (Complex V subunit)",
                "ATP5C1 (Complex V subunit)",
                "COX1 (Complex IV subunit)",
                "COX2 (Complex IV subunit)",
                "COX3 (Complex IV subunit)",
                "COX4I1 (Complex IV subunit)",
                "Cytochrome c oxidase (Complex IV)",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "NDUFA1 (Complex I subunit)",
                "NDUFB9 (Complex I subunit)",
                "NDUFS1 (Complex I subunit)",
                "NDUFS2 (Complex I subunit)",
                "SDHA (Complex II subunit)",
                "SDHB (Complex II subunit)",
                "SDHC (Complex II subunit)",
                "SDHD (Complex II subunit)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "UQCRC1 (Complex III subunit)",
                "UQCRC2 (Complex III subunit)",
                "UQCRFS1 (Complex III subunit)",
                "UQCRH (Complex III subunit)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Cytochrome bc1 complex (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Iron-sulfur protein (ISP)",
            "Ubiquinone (Coenzyme Q)",
            "Cytochrome b",
            "Cytochrome c",
            "Cytochrome a",
            "Cytochrome a3",
            "Ferredoxin",
            "NADH-ubiquinone oxidoreductase",
            "Succinate-ubiquinone oxidoreductase",
            "Ubiquinol-cytochrome c oxidoreductase",
            "Cytochrome c oxidase subunit I",
            "Cytochrome c oxidase subunit II",
            "Cytochrome c oxidase subunit III",
            "ATP synthase subunit alpha",
            "ATP synthase subunit beta",
            "ATP synthase subunit gamma",
            "ATP synthase subunit delta",
            "ATP synthase subunit epsilon",
            "ATP synthase subunit c"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Iron-sulfur protein (ISP)",
                "Ubiquinone (Coenzyme Q)",
                "Cytochrome b",
                "Cytochrome c",
                "Cytochrome a",
                "Cytochrome a3",
                "Ferredoxin",
                "NADH-ubiquinone oxidoreductase",
                "Succinate-ubiquinone oxidoreductase",
                "Ubiquinol-cytochrome c oxidoreductase",
                "Cytochrome c oxidase subunit I",
                "Cytochrome c oxidase subunit II",
                "Cytochrome c oxidase subunit III",
                "ATP synthase subunit alpha",
                "ATP synthase subunit beta",
                "ATP synthase subunit gamma",
                "ATP synthase subunit delta",
                "ATP synthase subunit epsilon",
                "ATP synthase subunit c"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "ATP synthase subunit alpha",
                "ATP synthase subunit beta",
                "ATP synthase subunit c",
                "ATP synthase subunit delta",
                "ATP synthase subunit epsilon",
                "ATP synthase subunit gamma",
                "Cytochrome a",
                "Cytochrome a3",
                "Cytochrome b",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c oxidase subunit I",
                "Cytochrome c oxidase subunit II",
                "Cytochrome c oxidase subunit III",
                "Ferredoxin",
                "Iron-sulfur protein (ISP)",
                "NADH dehydrogenase (Complex I)",
                "NADH-ubiquinone oxidoreductase",
                "Succinate dehydrogenase (Complex II)",
                "Succinate-ubiquinone oxidoreductase",
                "Ubiquinol-cytochrome c oxidoreductase",
                "Ubiquinone (Coenzyme Q)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate:ubiquinone oxidoreductase (Complex II)",
            "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Alternative NADH dehydrogenase",
            "Alternative oxidase",
            "Plastocyanin",
            "Plastoquinone",
            "Ferredoxin-NADP+ reductase",
            "Cytochrome bc1 complex",
            "Electron transferring flavoprotein",
            "Rieske iron-sulfur protein",
            "Cytochrome b",
            "Cytochrome c1",
            "COX7A",
            "COX8",
            "ATP5A",
            "ATP5B",
            "ATP5C",
            "Cyt c",
            "Cyt b",
            "ND1",
            "SDHA"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Alternative NADH dehydrogenase",
                "Alternative oxidase",
                "Plastocyanin",
                "Plastoquinone",
                "Ferredoxin-NADP+ reductase",
                "Cytochrome bc1 complex",
                "Electron transferring flavoprotein",
                "Rieske iron-sulfur protein",
                "Cytochrome b",
                "Cytochrome c1",
                "COX7A",
                "COX8",
                "ATP5A",
                "ATP5B",
                "ATP5C",
                "Cyt c",
                "Cyt b",
                "ND1",
                "SDHA"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "ATP5A",
                "ATP5B",
                "ATP5C",
                "Alternative NADH dehydrogenase",
                "Alternative oxidase",
                "COX7A",
                "COX8",
                "Cyt b",
                "Cyt c",
                "Cytochrome b",
                "Cytochrome bc1 complex",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c1",
                "Electron transferring flavoprotein",
                "Ferredoxin-NADP+ reductase",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "ND1",
                "Plastocyanin",
                "Plastoquinone",
                "Rieske iron-sulfur protein",
                "SDHA",
                "Succinate:ubiquinone oxidoreductase (Complex II)",
                "Ubiquinol:cytochrome c oxidoreductase (Complex III)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:ubiquinone oxidoreductase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Coenzyme Q - cytochrome c reductase (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Cytochrome b6f complex",
            "Plastoquinol\u2014plastocyanin reductase",
            "NADH dehydrogenase (NDH) complex",
            "Formate dehydrogenase-N",
            "Nitrate reductase",
            "Sulfite oxidase",
            "Glycerol-3-phosphate dehydrogenase",
            "Electron-transferring-flavoprotein dehydrogenase (ETF-QO)",
            "Proline dehydrogenase",
            "Dihydroorotate dehydrogenase",
            "Alternative oxidase (AOX)",
            "Cytochrome bd oxidase",
            "Cytochrome bo3 oxidase",
            "Fumarate reductase",
            "NADH:menaquinone oxidoreductase",
            "Rhodoquinol-cytochrome c reductase",
            "Plastocyanin-ferredoxin oxidoreductase",
            "Sulfide:quinone oxidoreductase",
            "Chloroplast NDH-like complex"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Coenzyme Q - cytochrome c reductase (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Cytochrome b6f complex",
                "Plastoquinol\u2014plastocyanin reductase",
                "NADH dehydrogenase (NDH) complex",
                "Formate dehydrogenase-N",
                "Nitrate reductase",
                "Sulfite oxidase",
                "Glycerol-3-phosphate dehydrogenase",
                "Electron-transferring-flavoprotein dehydrogenase (ETF-QO)",
                "Proline dehydrogenase",
                "Dihydroorotate dehydrogenase",
                "Alternative oxidase (AOX)",
                "Cytochrome bd oxidase",
                "Cytochrome bo3 oxidase",
                "Fumarate reductase",
                "NADH:menaquinone oxidoreductase",
                "Rhodoquinol-cytochrome c reductase",
                "Plastocyanin-ferredoxin oxidoreductase",
                "Sulfide:quinone oxidoreductase",
                "Chloroplast NDH-like complex"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "Alternative oxidase (AOX)",
                "Chloroplast NDH-like complex",
                "Coenzyme Q - cytochrome c reductase (Complex III)",
                "Cytochrome b6f complex",
                "Cytochrome bd oxidase",
                "Cytochrome bo3 oxidase",
                "Cytochrome c oxidase (Complex IV)",
                "Dihydroorotate dehydrogenase",
                "Electron-transferring-flavoprotein dehydrogenase (ETF-QO)",
                "Formate dehydrogenase-N",
                "Fumarate reductase",
                "Glycerol-3-phosphate dehydrogenase",
                "NADH dehydrogenase (NDH) complex",
                "NADH:menaquinone oxidoreductase",
                "NADH:ubiquinone oxidoreductase (Complex I)",
                "Nitrate reductase",
                "Plastocyanin-ferredoxin oxidoreductase",
                "Plastoquinol\u2014plastocyanin reductase",
                "Proline dehydrogenase",
                "Rhodoquinol-cytochrome c reductase",
                "Succinate dehydrogenase (Complex II)",
                "Sulfide:quinone oxidoreductase",
                "Sulfite oxidase"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Coenzyme Q10 (CoQ10)",
            "Cytochrome bc1 complex (Complex III)",
            "Cytochrome c",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Electron transfer flavoprotein (ETF)",
            "Electron transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO)",
            "Glycerol-3-phosphate dehydrogenase (GPDH)",
            "Proline dehydrogenase (PRODH)",
            "Choline dehydrogenase (CHDH)",
            "Dihydroorotate dehydrogenase (DHODH)",
            "Electron-transferring-flavoprotein dehydrogenase (ETFDH)",
            "Sulfide:quinone oxidoreductase (SQOR)",
            "NADH-cytochrome b5 reductase (CYB5R)",
            "Ferredoxin-NADP+ reductase (FNR)",
            "Ubiquinone-binding protein (QP-C)",
            "Cytochrome b561 (CYB561)",
            "Succinate dehydrogenase assembly factor 2 (SDHAF2)",
            "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex assembly factor 2 (NDUFAF2)",
            "Cytochrome c oxidase assembly factor 3 (COA3)",
            "Cytochrome c oxidase assembly factor 6 (COA6)",
            "Cytochrome c oxidase copper chaperone (COX17)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Coenzyme Q10 (CoQ10)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Electron transfer flavoprotein (ETF)",
                "Electron transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO)",
                "Glycerol-3-phosphate dehydrogenase (GPDH)",
                "Proline dehydrogenase (PRODH)",
                "Choline dehydrogenase (CHDH)",
                "Dihydroorotate dehydrogenase (DHODH)",
                "Electron-transferring-flavoprotein dehydrogenase (ETFDH)",
                "Sulfide:quinone oxidoreductase (SQOR)",
                "NADH-cytochrome b5 reductase (CYB5R)",
                "Ferredoxin-NADP+ reductase (FNR)",
                "Ubiquinone-binding protein (QP-C)",
                "Cytochrome b561 (CYB561)",
                "Succinate dehydrogenase assembly factor 2 (SDHAF2)",
                "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex assembly factor 2 (NDUFAF2)",
                "Cytochrome c oxidase assembly factor 3 (COA3)",
                "Cytochrome c oxidase assembly factor 6 (COA6)",
                "Cytochrome c oxidase copper chaperone (COX17)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "Choline dehydrogenase (CHDH)",
                "Coenzyme Q10 (CoQ10)",
                "Cytochrome b561 (CYB561)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c oxidase assembly factor 3 (COA3)",
                "Cytochrome c oxidase assembly factor 6 (COA6)",
                "Cytochrome c oxidase copper chaperone (COX17)",
                "Dihydroorotate dehydrogenase (DHODH)",
                "Electron transfer flavoprotein (ETF)",
                "Electron transfer flavoprotein-ubiquinone oxidoreductase (ETF-QO)",
                "Electron-transferring-flavoprotein dehydrogenase (ETFDH)",
                "Ferredoxin-NADP+ reductase (FNR)",
                "Glycerol-3-phosphate dehydrogenase (GPDH)",
                "NADH dehydrogenase (Complex I)",
                "NADH dehydrogenase [ubiquinone] 1 alpha subcomplex assembly factor 2 (NDUFAF2)",
                "NADH-cytochrome b5 reductase (CYB5R)",
                "Proline dehydrogenase (PRODH)",
                "Succinate dehydrogenase (Complex II)",
                "Succinate dehydrogenase assembly factor 2 (SDHAF2)",
                "Sulfide:quinone oxidoreductase (SQOR)",
                "Ubiquinone-binding protein (QP-C)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (Complex I)",
            "Succinate dehydrogenase (Complex II)",
            "Cytochrome bc1 complex (Complex III)",
            "Cytochrome c oxidase (Complex IV)",
            "ATP synthase (Complex V)",
            "Ubiquinol-cytochrome c reductase (bc1 complex)",
            "NADH-ubiquinone oxidoreductase (Complex I)",
            "Succinate-ubiquinone reductase (Complex II)",
            "Cytochrome c reductase (bc1 complex)",
            "F0F1 ATP synthase (Complex V)",
            "Cytochrome b6f complex (in plants and cyanobacteria)",
            "Alternative oxidase (in plants and some fungi)",
            "Rieske iron-sulfur protein (in Complex III)",
            "Cytochrome c1 (in Complex III)",
            "Cytochrome b (in Complex III)",
            "Cytochrome a (in Complex IV)",
            "Cytochrome a3 (in Complex IV)",
            "Copper-binding subunits (in Complex IV)",
            "Ubiquinone (Coenzyme Q)",
            "Cytochrome c (mobile electron carrier)",
            "Proton-translocating NADH dehydrogenase (in bacteria)",
            "Cytochrome bd oxidase (in bacteria)",
            "Alternative NADH dehydrogenase (in bacteria)",
            "Rhodoquinol-cytochrome c reductase (in anoxygenic photosynthetic bacteria)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (Complex I)",
                "Succinate dehydrogenase (Complex II)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome c oxidase (Complex IV)",
                "ATP synthase (Complex V)",
                "Ubiquinol-cytochrome c reductase (bc1 complex)",
                "NADH-ubiquinone oxidoreductase (Complex I)",
                "Succinate-ubiquinone reductase (Complex II)",
                "Cytochrome c reductase (bc1 complex)",
                "F0F1 ATP synthase (Complex V)",
                "Cytochrome b6f complex (in plants and cyanobacteria)",
                "Alternative oxidase (in plants and some fungi)",
                "Rieske iron-sulfur protein (in Complex III)",
                "Cytochrome c1 (in Complex III)",
                "Cytochrome b (in Complex III)",
                "Cytochrome a (in Complex IV)",
                "Cytochrome a3 (in Complex IV)",
                "Copper-binding subunits (in Complex IV)",
                "Ubiquinone (Coenzyme Q)",
                "Cytochrome c (mobile electron carrier)",
                "Proton-translocating NADH dehydrogenase (in bacteria)",
                "Cytochrome bd oxidase (in bacteria)",
                "Alternative NADH dehydrogenase (in bacteria)",
                "Rhodoquinol-cytochrome c reductase (in anoxygenic photosynthetic bacteria)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP synthase (Complex V)",
                "Alternative NADH dehydrogenase (in bacteria)",
                "Alternative oxidase (in plants and some fungi)",
                "Copper-binding subunits (in Complex IV)",
                "Cytochrome a (in Complex IV)",
                "Cytochrome a3 (in Complex IV)",
                "Cytochrome b (in Complex III)",
                "Cytochrome b6f complex (in plants and cyanobacteria)",
                "Cytochrome bc1 complex (Complex III)",
                "Cytochrome bd oxidase (in bacteria)",
                "Cytochrome c (mobile electron carrier)",
                "Cytochrome c oxidase (Complex IV)",
                "Cytochrome c reductase (bc1 complex)",
                "Cytochrome c1 (in Complex III)",
                "F0F1 ATP synthase (Complex V)",
                "NADH dehydrogenase (Complex I)",
                "NADH-ubiquinone oxidoreductase (Complex I)",
                "Proton-translocating NADH dehydrogenase (in bacteria)",
                "Rhodoquinol-cytochrome c reductase (in anoxygenic photosynthetic bacteria)",
                "Rieske iron-sulfur protein (in Complex III)",
                "Succinate dehydrogenase (Complex II)",
                "Succinate-ubiquinone reductase (Complex II)",
                "Ubiquinol-cytochrome c reductase (bc1 complex)",
                "Ubiquinone (Coenzyme Q)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH:Ubiquinone Oxidoreductase (Complex I) (CI)",
            "Succinate Dehydrogenase (Complex II) (CII)",
            "Ubiquinol-Cytochrome c Oxidoreductase (Complex III) (CIII)",
            "Cytochrome c Oxidase (Complex IV) (CIV)",
            "ATP Synthase (Complex V) (CV)",
            "Ferredoxin-NADP+ Reductase (FNR)",
            "Hydrogenase (H2ase)",
            "Formate Dehydrogenase (FDH)",
            "Glycerol-3-Phosphate Dehydrogenase (G3PDH)",
            "Methylenetetrahydrofolate Reductase (MTHFR)",
            "Nicotinamide Nucleotide Transhydrogenase (NNT)",
            "Dihydrolipoyl Dehydrogenase (DLDH)",
            "Pyruvate Dehydrogenase (PDH)",
            "Alpha-Ketoglutarate Dehydrogenase (KGDH)",
            "Branched-Chain Alpha-Keto Acid Dehydrogenase (BCKDH)",
            "Glutaryl-CoA Dehydrogenase (GCD)",
            "Acyl-CoA Dehydrogenase (ACAD)",
            "Electron Transfer Flavoprotein (ETF)",
            "Electron Transfer Flavoprotein-Ubiquinone Oxidoreductase (ETF-QO)",
            "Choline Dehydrogenase (CHDH)",
            "Sarcosine Dehydrogenase (SARDH)",
            "Dimethylglycine Dehydrogenase (DMGDH)",
            "Betaine-Homocysteine S-Methyltransferase (BHMT)",
            "Methylmalonyl CoA Mutase (MUT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH:Ubiquinone Oxidoreductase (Complex I) (CI)",
                "Succinate Dehydrogenase (Complex II) (CII)",
                "Ubiquinol-Cytochrome c Oxidoreductase (Complex III) (CIII)",
                "Cytochrome c Oxidase (Complex IV) (CIV)",
                "ATP Synthase (Complex V) (CV)",
                "Ferredoxin-NADP+ Reductase (FNR)",
                "Hydrogenase (H2ase)",
                "Formate Dehydrogenase (FDH)",
                "Glycerol-3-Phosphate Dehydrogenase (G3PDH)",
                "Methylenetetrahydrofolate Reductase (MTHFR)",
                "Nicotinamide Nucleotide Transhydrogenase (NNT)",
                "Dihydrolipoyl Dehydrogenase (DLDH)",
                "Pyruvate Dehydrogenase (PDH)",
                "Alpha-Ketoglutarate Dehydrogenase (KGDH)",
                "Branched-Chain Alpha-Keto Acid Dehydrogenase (BCKDH)",
                "Glutaryl-CoA Dehydrogenase (GCD)",
                "Acyl-CoA Dehydrogenase (ACAD)",
                "Electron Transfer Flavoprotein (ETF)",
                "Electron Transfer Flavoprotein-Ubiquinone Oxidoreductase (ETF-QO)",
                "Choline Dehydrogenase (CHDH)",
                "Sarcosine Dehydrogenase (SARDH)",
                "Dimethylglycine Dehydrogenase (DMGDH)",
                "Betaine-Homocysteine S-Methyltransferase (BHMT)",
                "Methylmalonyl CoA Mutase (MUT)"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP Synthase (Complex V) (CV)",
                "Acyl-CoA Dehydrogenase (ACAD)",
                "Alpha-Ketoglutarate Dehydrogenase (KGDH)",
                "Betaine-Homocysteine S-Methyltransferase (BHMT)",
                "Branched-Chain Alpha-Keto Acid Dehydrogenase (BCKDH)",
                "Choline Dehydrogenase (CHDH)",
                "Cytochrome c Oxidase (Complex IV) (CIV)",
                "Dihydrolipoyl Dehydrogenase (DLDH)",
                "Dimethylglycine Dehydrogenase (DMGDH)",
                "Electron Transfer Flavoprotein (ETF)",
                "Electron Transfer Flavoprotein-Ubiquinone Oxidoreductase (ETF-QO)",
                "Ferredoxin-NADP+ Reductase (FNR)",
                "Formate Dehydrogenase (FDH)",
                "Glutaryl-CoA Dehydrogenase (GCD)",
                "Glycerol-3-Phosphate Dehydrogenase (G3PDH)",
                "Hydrogenase (H2ase)",
                "Methylenetetrahydrofolate Reductase (MTHFR)",
                "Methylmalonyl CoA Mutase (MUT)",
                "NADH:Ubiquinone Oxidoreductase (Complex I) (CI)",
                "Nicotinamide Nucleotide Transhydrogenase (NNT)",
                "Pyruvate Dehydrogenase (PDH)",
                "Sarcosine Dehydrogenase (SARDH)",
                "Succinate Dehydrogenase (Complex II) (CII)",
                "Ubiquinol-Cytochrome c Oxidoreductase (Complex III) (CIII)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 1 (NDUFA1)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 2 (NDUFA2)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 3 (NDUFA3)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 4 (NDUFA4)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 5 (NDUFA5)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 6 (NDUFA6)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 7 (NDUFA7)",
            "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 8 (NDUFA8)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 1 (NDUFB1)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 2 (NDUFB2)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 3 (NDUFB3)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 4 (NDUFB4)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 5 (NDUFB5)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 6 (NDUFB6)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 7 (NDUFB7)",
            "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 8 (NDUFB8)",
            "Cytochrome b-c1 complex subunit 1 (UQCRB)",
            "Cytochrome b-c1 complex subunit 2 (UQCRB2)",
            "Cytochrome b-c1 complex subunit 3 (UQCRB3)",
            "Cytochrome b-c1 complex subunit 4 (UQCRB4)",
            "Cytochrome c1 (CYC1)",
            "Cytochrome c oxidase subunit 1 (COX1)",
            "Cytochrome c oxidase subunit 2 (COX2)",
            "Cytochrome c oxidase subunit 3 (COX3)",
            "Cytochrome c oxidase subunit 4 (COX4)",
            "Cytochrome c oxidase subunit 5 (COX5)",
            "Cytochrome c oxidase subunit 6 (COX6)",
            "Cytochrome c oxidase subunit 7 (COX7)",
            "Cytochrome c oxidase subunit 8 (COX8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 1 (NDUFA1)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 2 (NDUFA2)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 3 (NDUFA3)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 4 (NDUFA4)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 5 (NDUFA5)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 6 (NDUFA6)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 7 (NDUFA7)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 8 (NDUFA8)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 1 (NDUFB1)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 2 (NDUFB2)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 3 (NDUFB3)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 4 (NDUFB4)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 5 (NDUFB5)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 6 (NDUFB6)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 7 (NDUFB7)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 8 (NDUFB8)",
                "Cytochrome b-c1 complex subunit 1 (UQCRB)",
                "Cytochrome b-c1 complex subunit 2 (UQCRB2)",
                "Cytochrome b-c1 complex subunit 3 (UQCRB3)",
                "Cytochrome b-c1 complex subunit 4 (UQCRB4)",
                "Cytochrome c1 (CYC1)",
                "Cytochrome c oxidase subunit 1 (COX1)",
                "Cytochrome c oxidase subunit 2 (COX2)",
                "Cytochrome c oxidase subunit 3 (COX3)"
            ],
            "mismatches": [
                "Cytochrome c oxidase subunit 4 (COX4)",
                "Cytochrome c oxidase subunit 5 (COX5)",
                "Cytochrome c oxidase subunit 6 (COX6)",
                "Cytochrome c oxidase subunit 7 (COX7)",
                "Cytochrome c oxidase subunit 8 (COX8)"
            ],
            "true_referents": [
                "Cytochrome b-c1 complex subunit 1 (UQCRB)",
                "Cytochrome b-c1 complex subunit 2 (UQCRB2)",
                "Cytochrome b-c1 complex subunit 3 (UQCRB3)",
                "Cytochrome b-c1 complex subunit 4 (UQCRB4)",
                "Cytochrome c oxidase subunit 1 (COX1)",
                "Cytochrome c oxidase subunit 2 (COX2)",
                "Cytochrome c oxidase subunit 3 (COX3)",
                "Cytochrome c1 (CYC1)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 1 (NDUFA1)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 2 (NDUFA2)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 3 (NDUFA3)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 4 (NDUFA4)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 5 (NDUFA5)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 6 (NDUFA6)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 7 (NDUFA7)",
                "NADH dehydrogenase (ubiquinone) 1 alpha subcomplex 8 (NDUFA8)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 1 (NDUFB1)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 2 (NDUFB2)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 3 (NDUFB3)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 4 (NDUFB4)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 5 (NDUFB5)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 6 (NDUFB6)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 7 (NDUFB7)",
                "NADH dehydrogenase (ubiquinone) 1 beta subcomplex 8 (NDUFB8)"
            ],
            "TP": 24,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase (ubiquinone) (Complex I) (NDUFAF1)",
            " succinate dehydrogenase (ubiquinone) (Complex II) (SDHA)",
            "cytochrome b-c1 complex (Complex III) (CYB5A)",
            "cytochrome c oxidase (Complex IV) (COX1)",
            "flavoplavodoxin (Complex I) (NADH dehydrogenase flavoprotein 1) (NDUFV1)",
            "iron-sulfur protein (Complex I) (NADH dehydrogenase iron-sulfur protein 3) (NDUFS3)",
            "cytochrome b (Complex III) (CYB5B)",
            "Rieske iron-sulfur protein (Complex III) (UQCRFS1)",
            "cytochrome c1 (Complex III) (CYC1)",
            "cytochrome c oxidase subunit I (Complex IV) (COX1)",
            "cytochrome c oxidase subunit II (Complex IV) (COX2)",
            "cytochrome c oxidase subunit III (Complex IV) (COX3)",
            "ATP synthase F0 complex (ATP5F1)",
            "ATP synthase F1 complex (ATP5A1)",
            "coenzyme Q - cytochrome c reductase (Complex III) (UQCRQ)",
            "cytochrome c oxidase subunit IV (Complex IV) (COX4)",
            "cytochrome c oxidase subunit V (Complex IV) (COX5A)",
            "cytochrome c oxidase subunit VI (Complex IV) (COX6A)",
            "cytochrome c oxidase subunit VII (Complex IV) (COX7A)",
            "cytochrome c oxidase subunit VIII (Complex IV) (COX8)",
            "ubiquinol-cytochrome c reductase binding protein (Complex III) (UQCRB)",
            "cytochrome c oxidase subunit 7A1 (Complex IV) (COX7A1)",
            "cytochrome c oxidase subunit 7A2 (Complex IV) (COX7A2)",
            "cytochrome c oxidase subunit 7B (Complex IV) (COX7B)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase (ubiquinone) (Complex I) (NDUFAF1)",
                " succinate dehydrogenase (ubiquinone) (Complex II) (SDHA)",
                "cytochrome b-c1 complex (Complex III) (CYB5A)",
                "cytochrome c oxidase (Complex IV) (COX1)",
                "flavoplavodoxin (Complex I) (NADH dehydrogenase flavoprotein 1) (NDUFV1)",
                "iron-sulfur protein (Complex I) (NADH dehydrogenase iron-sulfur protein 3) (NDUFS3)",
                "cytochrome b (Complex III) (CYB5B)",
                "Rieske iron-sulfur protein (Complex III) (UQCRFS1)",
                "cytochrome c1 (Complex III) (CYC1)",
                "cytochrome c oxidase subunit I (Complex IV) (COX1)",
                "cytochrome c oxidase subunit II (Complex IV) (COX2)",
                "cytochrome c oxidase subunit III (Complex IV) (COX3)",
                "ATP synthase F0 complex (ATP5F1)",
                "ATP synthase F1 complex (ATP5A1)",
                "coenzyme Q - cytochrome c reductase (Complex III) (UQCRQ)",
                "cytochrome c oxidase subunit IV (Complex IV) (COX4)",
                "cytochrome c oxidase subunit V (Complex IV) (COX5A)",
                "cytochrome c oxidase subunit VI (Complex IV) (COX6A)",
                "cytochrome c oxidase subunit VIII (Complex IV) (COX8)",
                "ubiquinol-cytochrome c reductase binding protein (Complex III) (UQCRB)",
                "cytochrome c oxidase subunit 7A1 (Complex IV) (COX7A1)",
                "cytochrome c oxidase subunit 7A2 (Complex IV) (COX7A2)",
                "cytochrome c oxidase subunit 7B (Complex IV) (COX7B)"
            ],
            "mismatches": [],
            "true_referents": [
                " succinate dehydrogenase (ubiquinone) (Complex II) (SDHA)",
                "ATP synthase F0 complex (ATP5F1)",
                "ATP synthase F1 complex (ATP5A1)",
                "NADH dehydrogenase (ubiquinone) (Complex I) (NDUFAF1)",
                "Rieske iron-sulfur protein (Complex III) (UQCRFS1)",
                "coenzyme Q - cytochrome c reductase (Complex III) (UQCRQ)",
                "cytochrome b (Complex III) (CYB5B)",
                "cytochrome b-c1 complex (Complex III) (CYB5A)",
                "cytochrome c oxidase (Complex IV) (COX1)",
                "cytochrome c oxidase subunit 7A1 (Complex IV) (COX7A1)",
                "cytochrome c oxidase subunit 7A2 (Complex IV) (COX7A2)",
                "cytochrome c oxidase subunit 7B (Complex IV) (COX7B)",
                "cytochrome c oxidase subunit I (Complex IV) (COX1)",
                "cytochrome c oxidase subunit II (Complex IV) (COX2)",
                "cytochrome c oxidase subunit III (Complex IV) (COX3)",
                "cytochrome c oxidase subunit IV (Complex IV) (COX4)",
                "cytochrome c oxidase subunit V (Complex IV) (COX5A)",
                "cytochrome c oxidase subunit VI (Complex IV) (COX6A)",
                "cytochrome c oxidase subunit VII (Complex IV) (COX7A)",
                "cytochrome c oxidase subunit VIII (Complex IV) (COX8)",
                "cytochrome c1 (Complex III) (CYC1)",
                "flavoplavodoxin (Complex I) (NADH dehydrogenase flavoprotein 1) (NDUFV1)",
                "iron-sulfur protein (Complex I) (NADH dehydrogenase iron-sulfur protein 3) (NDUFS3)",
                "ubiquinol-cytochrome c reductase binding protein (Complex III) (UQCRB)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "electron transport chain protein complex",
        "domain": "biology",
        "response": [
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 1 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 2 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 3 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 4 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 5 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 6 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 7 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 8 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 9 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 10 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 11 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 12 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 13 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 14 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 15 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 16 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 17 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 18 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 19 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 20 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 21 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 22 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 23 [EC 1.6.5.3]",
            "NADH dehydrogenase [ubiquinone] iron-sulfur protein 24 [EC 1.6.5.3]"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 1 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 2 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 3 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 4 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 5 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 6 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 7 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 8 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 9 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 10 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 11 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 12 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 13 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 14 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 15 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 16 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 17 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 18 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 19 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 20 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 21 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 22 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 23 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 24 [EC 1.6.5.3]"
            ],
            "mismatches": [],
            "true_referents": [
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 1 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 10 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 11 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 12 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 13 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 14 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 15 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 16 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 17 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 18 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 19 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 2 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 20 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 21 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 22 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 23 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 24 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 3 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 4 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 5 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 6 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 7 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 8 [EC 1.6.5.3]",
                "NADH dehydrogenase [ubiquinone] iron-sulfur protein 9 [EC 1.6.5.3]"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    }
]