[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Cytochrome c oxidase",
            "Carbonic anhydrase",
            "Nitrogenase",
            "Superoxide dismutase (SOD)",
            "Lactate dehydrogenase",
            "Ferritin",
            "Iron regulatory protein",
            "Zinc-finger protein",
            "Copper-zinc superoxide dismutase (CuZnSOD)",
            "Catalase",
            "Manganese superoxide dismutase (MnSOD)",
            "Iron-sulfur protein",
            "Metallothionein",
            "Alcohol dehydrogenase",
            "Cytochrome c",
            "Cytochrome P450",
            "Ribonuclease A",
            "Transferrin",
            "Metal-dependent proteases",
            "Iron-containing aconitase",
            "Nickel-containing hydrogenase",
            "Iron-containing superoxide dismutase",
            "Magnesium-containing carbonic anhydrase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Cytochrome c oxidase",
                "Carbonic anhydrase",
                "Nitrogenase",
                "Superoxide dismutase (SOD)",
                "Lactate dehydrogenase",
                "Ferritin",
                "Iron regulatory protein",
                "Zinc-finger protein",
                "Copper-zinc superoxide dismutase (CuZnSOD)",
                "Catalase",
                "Manganese superoxide dismutase (MnSOD)",
                "Iron-sulfur protein",
                "Metallothionein",
                "Alcohol dehydrogenase",
                "Cytochrome c",
                "Cytochrome P450",
                "Ribonuclease A",
                "Transferrin",
                "Metal-dependent proteases",
                "Iron-containing aconitase",
                "Nickel-containing hydrogenase",
                "Iron-containing superoxide dismutase",
                "Magnesium-containing carbonic anhydrase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase",
                "Carbonic anhydrase",
                "Catalase",
                "Copper-zinc superoxide dismutase (CuZnSOD)",
                "Cytochrome P450",
                "Cytochrome c",
                "Cytochrome c oxidase",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Iron regulatory protein",
                "Iron-containing aconitase",
                "Iron-containing superoxide dismutase",
                "Iron-sulfur protein",
                "Lactate dehydrogenase",
                "Magnesium-containing carbonic anhydrase",
                "Manganese superoxide dismutase (MnSOD)",
                "Metal-dependent proteases",
                "Metallothionein",
                "Nickel-containing hydrogenase",
                "Nitrogenase",
                "Ribonuclease A",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Zinc-finger protein"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Cytochrome P450",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Carboxypeptidase A",
            "Carbonic anhydrase",
            "Zinc finger proteins",
            "Ferroportin",
            "Transferrin",
            "Ferritin",
            "Carbonic anhydrase II (CA II)",
            "Alcohol dehydrogenase",
            "Carboxypeptidase G2",
            "Matrix metalloproteinase-9 (MMP-9)",
            "Neprilysin",
            "Angiotensin-converting enzyme (ACE)",
            "Hephaestin",
            "Manganese superoxide dismutase (MnSOD)",
            "Metallothionein",
            "Hypoxia-inducible factor-prolyl hydroxylase (HIF-PHD)",
            "NADPH oxidase",
            "Carbonic anhydrase IX (CA IX)",
            "Matrix metalloproteinase-2 (MMP-2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Cytochrome P450",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Carboxypeptidase A",
                "Carbonic anhydrase",
                "Zinc finger proteins",
                "Ferroportin",
                "Transferrin",
                "Ferritin",
                "Carbonic anhydrase II (CA II)",
                "Alcohol dehydrogenase",
                "Carboxypeptidase G2",
                "Matrix metalloproteinase-9 (MMP-9)",
                "Neprilysin",
                "Angiotensin-converting enzyme (ACE)",
                "Hephaestin",
                "Manganese superoxide dismutase (MnSOD)",
                "Metallothionein",
                "Hypoxia-inducible factor-prolyl hydroxylase (HIF-PHD)",
                "NADPH oxidase",
                "Carbonic anhydrase IX (CA IX)"
            ],
            "mismatches": [
                "Matrix metalloproteinase-2 (MMP-2)"
            ],
            "true_referents": [
                "Alcohol dehydrogenase",
                "Angiotensin-converting enzyme (ACE)",
                "Carbonic anhydrase",
                "Carbonic anhydrase II (CA II)",
                "Carbonic anhydrase IX (CA IX)",
                "Carboxypeptidase A",
                "Carboxypeptidase G2",
                "Catalase",
                "Cytochrome P450",
                "Cytochrome c",
                "Ferritin",
                "Ferroportin",
                "Hemoglobin (Hb)",
                "Hephaestin",
                "Hypoxia-inducible factor-prolyl hydroxylase (HIF-PHD)",
                "Manganese superoxide dismutase (MnSOD)",
                "Matrix metalloproteinase-9 (MMP-9)",
                "Metallothionein",
                "Myoglobin (Mb)",
                "NADPH oxidase",
                "Neprilysin",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Carbonic anhydrase (CA)",
            "Superoxide dismutase (SOD)",
            "Catalase (CAT)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Alcohol dehydrogenase (ADH)",
            "Xanthine oxidase",
            "Nitric oxide synthase (NOS)",
            "Ferredoxin",
            "Rubredoxin",
            "Heme oxygenase",
            "Copper-zinc superoxide dismutase (CuZnSOD)",
            "Manganese superoxide dismutase (MnSOD)",
            "Copper-containing amine oxidase",
            "Metallothionein",
            "Copper transporter 1 (CTR1)",
            "Copper transporter 2 (CTR2)",
            "Zinc transporter 1 (ZIP1)",
            "Iron regulatory protein 1 (IRP1)",
            "Magnetosome-associated protein (MamA)",
            "Copper chaperone for superoxide dismutase (CCS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Carbonic anhydrase (CA)",
                "Superoxide dismutase (SOD)",
                "Catalase (CAT)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Alcohol dehydrogenase (ADH)",
                "Xanthine oxidase",
                "Nitric oxide synthase (NOS)",
                "Ferredoxin",
                "Rubredoxin",
                "Heme oxygenase",
                "Copper-zinc superoxide dismutase (CuZnSOD)",
                "Manganese superoxide dismutase (MnSOD)",
                "Copper-containing amine oxidase",
                "Metallothionein",
                "Copper transporter 1 (CTR1)",
                "Copper transporter 2 (CTR2)",
                "Zinc transporter 1 (ZIP1)",
                "Iron regulatory protein 1 (IRP1)",
                "Magnetosome-associated protein (MamA)"
            ],
            "mismatches": [
                "Copper chaperone for superoxide dismutase (CCS)"
            ],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Carbonic anhydrase (CA)",
                "Catalase (CAT)",
                "Ceruloplasmin",
                "Copper transporter 1 (CTR1)",
                "Copper transporter 2 (CTR2)",
                "Copper-containing amine oxidase",
                "Copper-zinc superoxide dismutase (CuZnSOD)",
                "Cytochrome c",
                "Ferredoxin",
                "Ferritin",
                "Heme oxygenase",
                "Hemoglobin (Hb)",
                "Iron regulatory protein 1 (IRP1)",
                "Magnetosome-associated protein (MamA)",
                "Manganese superoxide dismutase (MnSOD)",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Nitric oxide synthase (NOS)",
                "Rubredoxin",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Xanthine oxidase",
                "Zinc transporter 1 (ZIP1)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Catalase",
            "Superoxide Dismutase (SOD)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Carbonic Anhydrase (CA)",
            "Nitrogenase",
            "Ribonucleotide Reductase (RNR)",
            "Zinc Finger Proteins",
            "Matrix Metalloproteinases (MMPs)",
            "Heme Oxygenase",
            "Plastocyanin",
            "Azurin",
            "Hemerythrin",
            "Vanadium Haloperoxidases",
            "Molybdenum Cofactor Containing Enzymes (e.g., Nitrate Reductase)",
            "Sulfite Reductase",
            "Cobalamin-dependent Enzymes (e.g., Methionine Synthase)",
            "Blue Copper Proteins",
            "Iron-Sulfur Cluster Proteins (e.g., Ferredoxins)",
            "Metallothioneins"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Catalase",
                "Superoxide Dismutase (SOD)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Carbonic Anhydrase (CA)",
                "Nitrogenase",
                "Ribonucleotide Reductase (RNR)",
                "Zinc Finger Proteins",
                "Matrix Metalloproteinases (MMPs)",
                "Heme Oxygenase",
                "Plastocyanin",
                "Azurin",
                "Hemerythrin",
                "Vanadium Haloperoxidases",
                "Molybdenum Cofactor Containing Enzymes (e.g., Nitrate Reductase)",
                "Sulfite Reductase",
                "Cobalamin-dependent Enzymes (e.g., Methionine Synthase)",
                "Blue Copper Proteins",
                "Iron-Sulfur Cluster Proteins (e.g., Ferredoxins)",
                "Metallothioneins"
            ],
            "mismatches": [],
            "true_referents": [
                "Azurin",
                "Blue Copper Proteins",
                "Carbonic Anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Cobalamin-dependent Enzymes (e.g., Methionine Synthase)",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Heme Oxygenase",
                "Hemerythrin",
                "Hemoglobin (Hb)",
                "Iron-Sulfur Cluster Proteins (e.g., Ferredoxins)",
                "Matrix Metalloproteinases (MMPs)",
                "Metallothioneins",
                "Molybdenum Cofactor Containing Enzymes (e.g., Nitrate Reductase)",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Plastocyanin",
                "Ribonucleotide Reductase (RNR)",
                "Sulfite Reductase",
                "Superoxide Dismutase (SOD)",
                "Transferrin",
                "Vanadium Haloperoxidases",
                "Zinc Finger Proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Cytochrome P450 (CYP)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Carbonic anhydrase (CA)",
            "Superoxide dismutase (SOD)",
            "Nitrogenase",
            "Alcohol dehydrogenase (ADH)",
            "Catalase",
            "Laccase",
            "Methane monooxygenase (MMO)",
            "Urease",
            "Azurin",
            "Plastocyanin",
            "Ribonucleotide reductase (RNR)",
            "Photosystem II (PSII)",
            "Xanthine oxidase",
            "Nitrate reductase",
            "Hydrogenase",
            "Cobalamin (Vitamin B12)",
            "Nitric oxide synthase (NOS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Cytochrome P450 (CYP)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Carbonic anhydrase (CA)",
                "Superoxide dismutase (SOD)",
                "Nitrogenase",
                "Alcohol dehydrogenase (ADH)",
                "Catalase",
                "Laccase",
                "Methane monooxygenase (MMO)",
                "Urease",
                "Azurin",
                "Plastocyanin",
                "Ribonucleotide reductase (RNR)",
                "Photosystem II (PSII)",
                "Xanthine oxidase",
                "Nitrate reductase",
                "Hydrogenase",
                "Cobalamin (Vitamin B12)",
                "Nitric oxide synthase (NOS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Azurin",
                "Carbonic anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Cobalamin (Vitamin B12)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Hydrogenase",
                "Laccase",
                "Methane monooxygenase (MMO)",
                "Myoglobin (Mb)",
                "Nitrate reductase",
                "Nitric oxide synthase (NOS)",
                "Nitrogenase",
                "Photosystem II (PSII)",
                "Plastocyanin",
                "Ribonucleotide reductase (RNR)",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Urease",
                "Xanthine oxidase"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (CYC)",
            "Carbonic anhydrase (CA)",
            "Ferritin (FT)",
            "Transferrin (TF)",
            "Superoxide dismutase (SOD)",
            "Nitric oxide synthase (NOS)",
            "Chlorophyll a (Chl a)",
            "Cytochrome P450 (CYP)",
            "Metallothionein (MT)",
            "Zinc finger proteins (ZFP)",
            "Laccase (LAC)",
            "Ceruloplasmin (CP)",
            "Alcohol dehydrogenase (ADH)",
            "Aconitase (ACO)",
            "Urease (URE)",
            "Ribonucleotide reductase (RNR)",
            "Dipeptidyl peptidase IV (DPP IV)",
            "Nitrate reductase (NR)",
            "Alkaline phosphatase (ALP)",
            "Glyoxalase I (GLO I)",
            "Catalase (CAT)",
            "Peroxidase (POD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (CYC)",
                "Carbonic anhydrase (CA)",
                "Ferritin (FT)",
                "Transferrin (TF)",
                "Superoxide dismutase (SOD)",
                "Nitric oxide synthase (NOS)",
                "Chlorophyll a (Chl a)",
                "Cytochrome P450 (CYP)",
                "Metallothionein (MT)",
                "Zinc finger proteins (ZFP)",
                "Laccase (LAC)",
                "Ceruloplasmin (CP)",
                "Alcohol dehydrogenase (ADH)",
                "Aconitase (ACO)",
                "Urease (URE)",
                "Ribonucleotide reductase (RNR)",
                "Dipeptidyl peptidase IV (DPP IV)",
                "Nitrate reductase (NR)",
                "Alkaline phosphatase (ALP)",
                "Glyoxalase I (GLO I)",
                "Catalase (CAT)",
                "Peroxidase (POD)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase (ACO)",
                "Alcohol dehydrogenase (ADH)",
                "Alkaline phosphatase (ALP)",
                "Carbonic anhydrase (CA)",
                "Catalase (CAT)",
                "Ceruloplasmin (CP)",
                "Chlorophyll a (Chl a)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (CYC)",
                "Dipeptidyl peptidase IV (DPP IV)",
                "Ferritin (FT)",
                "Glyoxalase I (GLO I)",
                "Hemoglobin (Hb)",
                "Laccase (LAC)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Nitrate reductase (NR)",
                "Nitric oxide synthase (NOS)",
                "Peroxidase (POD)",
                "Ribonucleotide reductase (RNR)",
                "Superoxide dismutase (SOD)",
                "Transferrin (TF)",
                "Urease (URE)",
                "Zinc finger proteins (ZFP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochromes (Cyt)",
            "Ferritin (Ft)",
            "Transferrin (Tf)",
            "Lactoferrin (Lf)",
            "Ceruloplasmin (Cp)",
            "Plastocyanin (Pc)",
            "Superoxide Dismutase (SOD)",
            "Zinc Finger Proteins (ZFP)",
            "Calmodulin (CaM)",
            "Troponin C (TnC)",
            "Carbonic Anhydrase (CA)",
            "Alcohol Dehydrogenase (ADH)",
            "Carboxypeptidase (CP)",
            "Matrix Metalloproteinase (MMP)",
            "Angiotensin Converting Enzyme (ACE)",
            "Nitric Oxide Synthase (NOS)",
            "Methane Monooxygenase (MMO)",
            "Rubredoxin (Rd)",
            "Ferredoxin (Fd)",
            "Hydrogenase (H2ase)",
            "Photosystem II (PSII)",
            "Nitrogenase (N2ase)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochromes (Cyt)",
                "Ferritin (Ft)",
                "Transferrin (Tf)",
                "Lactoferrin (Lf)",
                "Ceruloplasmin (Cp)",
                "Plastocyanin (Pc)",
                "Superoxide Dismutase (SOD)",
                "Zinc Finger Proteins (ZFP)",
                "Calmodulin (CaM)",
                "Troponin C (TnC)",
                "Carbonic Anhydrase (CA)",
                "Alcohol Dehydrogenase (ADH)",
                "Carboxypeptidase (CP)",
                "Matrix Metalloproteinase (MMP)",
                "Angiotensin Converting Enzyme (ACE)",
                "Nitric Oxide Synthase (NOS)",
                "Methane Monooxygenase (MMO)",
                "Rubredoxin (Rd)",
                "Ferredoxin (Fd)",
                "Hydrogenase (H2ase)",
                "Photosystem II (PSII)",
                "Nitrogenase (N2ase)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol Dehydrogenase (ADH)",
                "Angiotensin Converting Enzyme (ACE)",
                "Calmodulin (CaM)",
                "Carbonic Anhydrase (CA)",
                "Carboxypeptidase (CP)",
                "Ceruloplasmin (Cp)",
                "Cytochromes (Cyt)",
                "Ferredoxin (Fd)",
                "Ferritin (Ft)",
                "Hemoglobin (Hb)",
                "Hydrogenase (H2ase)",
                "Lactoferrin (Lf)",
                "Matrix Metalloproteinase (MMP)",
                "Methane Monooxygenase (MMO)",
                "Myoglobin (Mb)",
                "Nitric Oxide Synthase (NOS)",
                "Nitrogenase (N2ase)",
                "Photosystem II (PSII)",
                "Plastocyanin (Pc)",
                "Rubredoxin (Rd)",
                "Superoxide Dismutase (SOD)",
                "Transferrin (Tf)",
                "Troponin C (TnC)",
                "Zinc Finger Proteins (ZFP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Catalase",
            "Superoxide dismutase (SOD)",
            "Carbonic anhydrase (CA)",
            "Metallothionein (MT)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Zinc finger protein",
            "Dioxygenase",
            "Nitric oxide synthase (NOS)",
            "Methionine aminopeptidase (MetAP)",
            "Superoxide reductase (SOR)",
            "Nickel-dependent hydrogenase",
            "Iron-sulfur protein",
            "Copper-zinc superoxide dismutase (Cu/Zn SOD)",
            "Mercury reductase",
            "Vanadium-dependent bromoperoxidase",
            "Manganese peroxidase",
            "Iron regulatory protein (IRP)",
            "Zinc-dependent alcohol dehydrogenase",
            "Cobalt-dependent nitrile hydratase",
            "Copper nitrite reductase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Catalase",
                "Superoxide dismutase (SOD)",
                "Carbonic anhydrase (CA)",
                "Metallothionein (MT)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Zinc finger protein",
                "Dioxygenase",
                "Nitric oxide synthase (NOS)",
                "Methionine aminopeptidase (MetAP)",
                "Superoxide reductase (SOR)",
                "Nickel-dependent hydrogenase",
                "Iron-sulfur protein",
                "Copper-zinc superoxide dismutase (Cu/Zn SOD)",
                "Mercury reductase",
                "Vanadium-dependent bromoperoxidase",
                "Manganese peroxidase",
                "Iron regulatory protein (IRP)",
                "Zinc-dependent alcohol dehydrogenase",
                "Cobalt-dependent nitrile hydratase"
            ],
            "mismatches": [
                "Copper nitrite reductase"
            ],
            "true_referents": [
                "Carbonic anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Cobalt-dependent nitrile hydratase",
                "Copper-zinc superoxide dismutase (Cu/Zn SOD)",
                "Cytochrome c (Cyt c)",
                "Dioxygenase",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Iron regulatory protein (IRP)",
                "Iron-sulfur protein",
                "Manganese peroxidase",
                "Mercury reductase",
                "Metallothionein (MT)",
                "Methionine aminopeptidase (MetAP)",
                "Myoglobin (Mb)",
                "Nickel-dependent hydrogenase",
                "Nitric oxide synthase (NOS)",
                "Superoxide dismutase (SOD)",
                "Superoxide reductase (SOR)",
                "Transferrin",
                "Vanadium-dependent bromoperoxidase",
                "Zinc finger protein",
                "Zinc-dependent alcohol dehydrogenase"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Ferritin (FER)",
            "Superoxide Dismutase (SOD)",
            "Carbonic Anhydrase (CA)",
            "Catalase (CAT)",
            "Nitrogenase",
            "Metallothionein",
            "Ribonucleotide Reductase",
            "Ferric Uptake Regulator (Fur)",
            "Aldehyde Dehydrogenase (ALDH)",
            "Matrix Metalloproteinase-9 (MMP-9)",
            "Copper-Zinc Superoxide Dismutase (Cu,Zn-SOD)",
            "Zinc Finger Protein 1 (ZFP1)",
            "Ceruloplasmin (CP)",
            "Plastocyanin",
            "Alcohol Dehydrogenase (ADH)",
            "Neuroglobin",
            "S100A1",
            "Alkaline Phosphatase (ALP)",
            "Nitric Oxide Synthase (NOS)",
            "Blue Copper Protein (Azurin)",
            "Phosphoprotein Phosphatase 2A (PP2A)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Ferritin (FER)",
                "Superoxide Dismutase (SOD)",
                "Carbonic Anhydrase (CA)",
                "Catalase (CAT)",
                "Nitrogenase",
                "Metallothionein",
                "Ribonucleotide Reductase",
                "Ferric Uptake Regulator (Fur)",
                "Aldehyde Dehydrogenase (ALDH)",
                "Matrix Metalloproteinase-9 (MMP-9)",
                "Copper-Zinc Superoxide Dismutase (Cu,Zn-SOD)",
                "Zinc Finger Protein 1 (ZFP1)",
                "Ceruloplasmin (CP)",
                "Plastocyanin",
                "Alcohol Dehydrogenase (ADH)",
                "Neuroglobin",
                "S100A1",
                "Alkaline Phosphatase (ALP)",
                "Nitric Oxide Synthase (NOS)",
                "Blue Copper Protein (Azurin)",
                "Phosphoprotein Phosphatase 2A (PP2A)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol Dehydrogenase (ADH)",
                "Aldehyde Dehydrogenase (ALDH)",
                "Alkaline Phosphatase (ALP)",
                "Blue Copper Protein (Azurin)",
                "Carbonic Anhydrase (CA)",
                "Catalase (CAT)",
                "Ceruloplasmin (CP)",
                "Copper-Zinc Superoxide Dismutase (Cu,Zn-SOD)",
                "Cytochrome c (Cyt c)",
                "Ferric Uptake Regulator (Fur)",
                "Ferritin (FER)",
                "Hemoglobin (Hb)",
                "Matrix Metalloproteinase-9 (MMP-9)",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Neuroglobin",
                "Nitric Oxide Synthase (NOS)",
                "Nitrogenase",
                "Phosphoprotein Phosphatase 2A (PP2A)",
                "Plastocyanin",
                "Ribonucleotide Reductase",
                "S100A1",
                "Superoxide Dismutase (SOD)",
                "Zinc Finger Protein 1 (ZFP1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Carbonic anhydrase (CA)",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Peroxidase",
            "Cytochrome P450 (CYP)",
            "Nitrogenase",
            "Hydrogenase",
            "Plastocyanin",
            "Ferredoxin",
            "Rubredoxin",
            "Aconitase",
            "Alcohol dehydrogenase (ADH)",
            "Carboxypeptidase A",
            "Zinc finger proteins",
            "Metallothionein",
            "Xanthine oxidase",
            "Urease",
            "Tyrosinase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Carbonic anhydrase (CA)",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Peroxidase",
                "Cytochrome P450 (CYP)",
                "Nitrogenase",
                "Hydrogenase",
                "Plastocyanin",
                "Ferredoxin",
                "Rubredoxin",
                "Aconitase",
                "Alcohol dehydrogenase (ADH)",
                "Carboxypeptidase A",
                "Zinc finger proteins",
                "Metallothionein",
                "Xanthine oxidase",
                "Urease",
                "Tyrosinase"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase",
                "Alcohol dehydrogenase (ADH)",
                "Carbonic anhydrase (CA)",
                "Carboxypeptidase A",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (Cyt c)",
                "Ferredoxin",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Hydrogenase",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Peroxidase",
                "Plastocyanin",
                "Rubredoxin",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Tyrosinase",
                "Urease",
                "Xanthine oxidase",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Cytochrome P450 (CYP)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Nitrogenase",
            "Hydrogenase",
            "Vitamin B12 (Cobalamin)",
            "Carbonic anhydrase (CA)",
            "Matrix metalloproteinases (MMPs)",
            "Alcohol dehydrogenase (ADH)",
            "Xanthine oxidase (XO)",
            "Aconitase",
            "Ferredoxin",
            "Rubredoxin",
            "Plastocyanin",
            "Azurin",
            "Calmodulin (CaM)",
            "Troponin C (TnC)",
            "Metallothionein (MT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Cytochrome P450 (CYP)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Nitrogenase",
                "Hydrogenase",
                "Vitamin B12 (Cobalamin)",
                "Carbonic anhydrase (CA)",
                "Matrix metalloproteinases (MMPs)",
                "Alcohol dehydrogenase (ADH)",
                "Xanthine oxidase (XO)",
                "Aconitase",
                "Ferredoxin",
                "Rubredoxin",
                "Plastocyanin",
                "Azurin",
                "Calmodulin (CaM)",
                "Troponin C (TnC)",
                "Metallothionein (MT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase",
                "Alcohol dehydrogenase (ADH)",
                "Azurin",
                "Calmodulin (CaM)",
                "Carbonic anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (Cyt c)",
                "Ferredoxin",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Hydrogenase",
                "Matrix metalloproteinases (MMPs)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Plastocyanin",
                "Rubredoxin",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Troponin C (TnC)",
                "Vitamin B12 (Cobalamin)",
                "Xanthine oxidase (XO)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Cytochrome c (Cyt c)",
            "Catalase (CAT)",
            "Ferritin (FTN)",
            "Transferrin (TF)",
            "Superoxide dismutase (SOD)",
            "Nitrogenase (Nif)",
            "Alcohol dehydrogenase (ADH)",
            "Laccase (Lac)",
            "Cytochrome P450 (CYP450)",
            "Carbonic anhydrase (CA)",
            "Metallothionein (MT)",
            "Ceruloplasmin (CP)",
            "Plastocyanin (Pc)",
            "Rubrerythrin (Rbr)",
            "Hemerythrin (Hr)",
            "Hydrogenase (H2ase)",
            "Methane monooxygenase (MMO)",
            "Urease (URE)",
            "Arginase (ARG)",
            "Nitric oxide synthase (NOS)",
            "Copper-zinc superoxide dismutase (Cu,Zn-SOD)",
            "Manganese superoxide dismutase (Mn-SOD)",
            "Iron-sulfur proteins (Fe-S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Cytochrome c (Cyt c)",
                "Catalase (CAT)",
                "Ferritin (FTN)",
                "Transferrin (TF)",
                "Superoxide dismutase (SOD)",
                "Nitrogenase (Nif)",
                "Alcohol dehydrogenase (ADH)",
                "Laccase (Lac)",
                "Cytochrome P450 (CYP450)",
                "Carbonic anhydrase (CA)",
                "Metallothionein (MT)",
                "Ceruloplasmin (CP)",
                "Plastocyanin (Pc)",
                "Rubrerythrin (Rbr)",
                "Hemerythrin (Hr)",
                "Hydrogenase (H2ase)",
                "Methane monooxygenase (MMO)",
                "Urease (URE)",
                "Arginase (ARG)",
                "Nitric oxide synthase (NOS)",
                "Copper-zinc superoxide dismutase (Cu,Zn-SOD)",
                "Manganese superoxide dismutase (Mn-SOD)",
                "Iron-sulfur proteins (Fe-S)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Arginase (ARG)",
                "Carbonic anhydrase (CA)",
                "Catalase (CAT)",
                "Ceruloplasmin (CP)",
                "Copper-zinc superoxide dismutase (Cu,Zn-SOD)",
                "Cytochrome P450 (CYP450)",
                "Cytochrome c (Cyt c)",
                "Ferritin (FTN)",
                "Hemerythrin (Hr)",
                "Hemoglobin (Hb)",
                "Hydrogenase (H2ase)",
                "Iron-sulfur proteins (Fe-S)",
                "Laccase (Lac)",
                "Manganese superoxide dismutase (Mn-SOD)",
                "Metallothionein (MT)",
                "Methane monooxygenase (MMO)",
                "Nitric oxide synthase (NOS)",
                "Nitrogenase (Nif)",
                "Plastocyanin (Pc)",
                "Rubrerythrin (Rbr)",
                "Superoxide dismutase (SOD)",
                "Transferrin (TF)",
                "Urease (URE)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Superoxide dismutase (SOD)",
            "Carbonic anhydrase (CA)",
            "Carboxypeptidase A (CPA)",
            "Alcohol dehydrogenase (ADH)",
            "Catalase (CAT)",
            "Ferritin (FTN)",
            "Transferrin (TF)",
            "Ceruloplasmin (CP)",
            "Metallothionein (MT)",
            "Vitamin B12-dependent enzymes",
            "Nitrogenase (NifH)",
            "Hydrogenase (Hyd)",
            "Cytochrome P450 (CYP)",
            "Glutathione peroxidase (GPx)",
            "Thrombin (FIIa)",
            "Alkaline phosphatase (ALP)",
            "Xanthine oxidase (XO)",
            "Ribonucleotide reductase (RNR)",
            "Acetylcholinesterase (AChE)",
            "Arginase (ARG)",
            "Carnitine acyltransferase (CAT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Superoxide dismutase (SOD)",
                "Carbonic anhydrase (CA)",
                "Carboxypeptidase A (CPA)",
                "Alcohol dehydrogenase (ADH)",
                "Catalase (CAT)",
                "Ferritin (FTN)",
                "Transferrin (TF)",
                "Ceruloplasmin (CP)",
                "Metallothionein (MT)",
                "Vitamin B12-dependent enzymes",
                "Nitrogenase (NifH)",
                "Hydrogenase (Hyd)",
                "Cytochrome P450 (CYP)",
                "Glutathione peroxidase (GPx)",
                "Thrombin (FIIa)",
                "Alkaline phosphatase (ALP)",
                "Xanthine oxidase (XO)",
                "Ribonucleotide reductase (RNR)",
                "Acetylcholinesterase (AChE)",
                "Arginase (ARG)",
                "Carnitine acyltransferase (CAT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylcholinesterase (AChE)",
                "Alcohol dehydrogenase (ADH)",
                "Alkaline phosphatase (ALP)",
                "Arginase (ARG)",
                "Carbonic anhydrase (CA)",
                "Carboxypeptidase A (CPA)",
                "Carnitine acyltransferase (CAT)",
                "Catalase (CAT)",
                "Ceruloplasmin (CP)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (Cyt c)",
                "Ferritin (FTN)",
                "Glutathione peroxidase (GPx)",
                "Hemoglobin (Hb)",
                "Hydrogenase (Hyd)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Nitrogenase (NifH)",
                "Ribonucleotide reductase (RNR)",
                "Superoxide dismutase (SOD)",
                "Thrombin (FIIa)",
                "Transferrin (TF)",
                "Vitamin B12-dependent enzymes",
                "Xanthine oxidase (XO)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Carbonic anhydrase (CA)",
            "Lactoferrin",
            "Thioredoxin",
            "Ferredoxin",
            "Copper-zinc superoxide dismutase (SOD1)",
            "Manganese superoxide dismutase (SOD2)",
            "Iron-containing superoxide dismutase (SOD3)",
            "Cytochrome c oxidase",
            "Cytochrome b5",
            "Cytochrome P450",
            "Lysyl oxidase",
            "Dopamine beta-hydroxylase",
            "Tyrosinase",
            "Phenylalanine hydroxylase",
            "Aconitase",
            "Isocitrate dehydrogenase",
            "Succinate dehydrogenase",
            "Fumarase",
            "Aldolase",
            "Enolase",
            "Pyruvate kinase",
            "Carbonic anhydrase II",
            "Carbonic anhydrase IV",
            "Carbonic anhydrase IX",
            "Carbonic anhydrase XII"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbonic anhydrase (CA)",
                "Lactoferrin",
                "Thioredoxin",
                "Ferredoxin",
                "Copper-zinc superoxide dismutase (SOD1)",
                "Manganese superoxide dismutase (SOD2)",
                "Iron-containing superoxide dismutase (SOD3)",
                "Cytochrome c oxidase",
                "Cytochrome b5",
                "Cytochrome P450",
                "Lysyl oxidase",
                "Dopamine beta-hydroxylase",
                "Tyrosinase",
                "Phenylalanine hydroxylase",
                "Aconitase",
                "Isocitrate dehydrogenase",
                "Succinate dehydrogenase",
                "Fumarase",
                "Aldolase",
                "Enolase",
                "Pyruvate kinase",
                "Carbonic anhydrase II",
                "Carbonic anhydrase IV",
                "Carbonic anhydrase IX"
            ],
            "mismatches": [
                "Carbonic anhydrase XII"
            ],
            "true_referents": [
                "Aconitase",
                "Aldolase",
                "Carbonic anhydrase (CA)",
                "Carbonic anhydrase II",
                "Carbonic anhydrase IV",
                "Carbonic anhydrase IX",
                "Copper-zinc superoxide dismutase (SOD1)",
                "Cytochrome P450",
                "Cytochrome b5",
                "Cytochrome c oxidase",
                "Dopamine beta-hydroxylase",
                "Enolase",
                "Ferredoxin",
                "Fumarase",
                "Iron-containing superoxide dismutase (SOD3)",
                "Isocitrate dehydrogenase",
                "Lactoferrin",
                "Lysyl oxidase",
                "Manganese superoxide dismutase (SOD2)",
                "Phenylalanine hydroxylase",
                "Pyruvate kinase",
                "Succinate dehydrogenase",
                "Thioredoxin",
                "Tyrosinase"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Albumin (ALB)",
            "Carboxypeptidase A (CPA)",
            "Cytochrome c oxidase (COX)",
            "Cytochrome P450 (CYP)",
            "Ferritin (FRT)",
            "Hemoglobin (Hb)",
            "Hemocyanin (Hmc)",
            "Lactoferrin (LTF)",
            "Metallothionein (MT)",
            "Myoglobin (Mb)",
            "Plastocyanin (PC)",
            "Superoxide dismutase (SOD)",
            "Tyrosinase (TYR)",
            "Urease (URE)",
            "Ceruloplasmin (CP)",
            "Transferrin (TF)",
            "Lipocalin (LCN)",
            "Carbonic anhydrase (CAH)",
            "Alcohol dehydrogenase (ADH)",
            " Catalase (CAT)",
            "Chlorophyll (Chl)",
            "Phospholipase A2 (PLA2)",
            "Xanthine oxidase (XO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Albumin (ALB)",
                "Carboxypeptidase A (CPA)",
                "Cytochrome c oxidase (COX)",
                "Cytochrome P450 (CYP)",
                "Ferritin (FRT)",
                "Hemoglobin (Hb)",
                "Hemocyanin (Hmc)",
                "Lactoferrin (LTF)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Plastocyanin (PC)",
                "Superoxide dismutase (SOD)",
                "Tyrosinase (TYR)",
                "Urease (URE)",
                "Ceruloplasmin (CP)",
                "Transferrin (TF)",
                "Lipocalin (LCN)",
                "Carbonic anhydrase (CAH)",
                "Alcohol dehydrogenase (ADH)",
                " Catalase (CAT)",
                "Chlorophyll (Chl)",
                "Phospholipase A2 (PLA2)",
                "Xanthine oxidase (XO)"
            ],
            "mismatches": [],
            "true_referents": [
                " Catalase (CAT)",
                "Albumin (ALB)",
                "Alcohol dehydrogenase (ADH)",
                "Carbonic anhydrase (CAH)",
                "Carboxypeptidase A (CPA)",
                "Ceruloplasmin (CP)",
                "Chlorophyll (Chl)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c oxidase (COX)",
                "Ferritin (FRT)",
                "Hemocyanin (Hmc)",
                "Hemoglobin (Hb)",
                "Lactoferrin (LTF)",
                "Lipocalin (LCN)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Phospholipase A2 (PLA2)",
                "Plastocyanin (PC)",
                "Superoxide dismutase (SOD)",
                "Transferrin (TF)",
                "Tyrosinase (TYR)",
                "Urease (URE)",
                "Xanthine oxidase (XO)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]