[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Cytochrome c oxidase",
            "Carbonic anhydrase",
            "Nitrogenase",
            "Superoxide dismutase (SOD)",
            "Ferritin",
            "Lactoferrin",
            "Transferrin",
            "Metallothionein",
            "Copper-zinc superoxide dismutase",
            "Iron-sulfur protein",
            "Magnesium-dependent phosphatases",
            "Manganese superoxide dismutase",
            "Zinc finger proteins",
            "Catalase",
            "Alkaline phosphatase",
            "DNA polymerase",
            "DNA helicase",
            "DNA ligase",
            "RNA polymerase",
            "DNA gyrase",
            "Topoisomerase",
            "Sirtuin",
            "Proteasome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Cytochrome c oxidase",
                "Carbonic anhydrase",
                "Nitrogenase",
                "Superoxide dismutase (SOD)",
                "Ferritin",
                "Lactoferrin",
                "Transferrin",
                "Metallothionein",
                "Copper-zinc superoxide dismutase",
                "Iron-sulfur protein",
                "Magnesium-dependent phosphatases",
                "Manganese superoxide dismutase",
                "Zinc finger proteins",
                "Catalase",
                "Alkaline phosphatase",
                "DNA polymerase",
                "DNA helicase",
                "DNA ligase",
                "RNA polymerase",
                "DNA gyrase",
                "Topoisomerase",
                "Sirtuin",
                "Proteasome"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaline phosphatase",
                "Carbonic anhydrase",
                "Catalase",
                "Copper-zinc superoxide dismutase",
                "Cytochrome c oxidase",
                "DNA gyrase",
                "DNA helicase",
                "DNA ligase",
                "DNA polymerase",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Iron-sulfur protein",
                "Lactoferrin",
                "Magnesium-dependent phosphatases",
                "Manganese superoxide dismutase",
                "Metallothionein",
                "Nitrogenase",
                "Proteasome",
                "RNA polymerase",
                "Sirtuin",
                "Superoxide dismutase (SOD)",
                "Topoisomerase",
                "Transferrin",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Cytochrome P450",
            "Superoxide dismutase (SOD)",
            "Carbonic anhydrase",
            "Zinc finger proteins",
            "Catalase",
            "Ferritin",
            "Ribonucleotide reductase",
            "Nitrogenase",
            "Transferrin",
            "Lactoferrin",
            "Alcohol dehydrogenase",
            "Carboxypeptidase A",
            "Xanthine oxidase",
            "Methionine synthase",
            "Nitric oxide synthase",
            "Urease",
            "Alkaline phosphatase",
            "Prolyl hydroxylase",
            "Aconitase",
            "Cobalamin-dependent methionine synthase",
            "Peptidylglycine alpha-amidating monooxygenase",
            "Aromatase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Cytochrome P450",
                "Superoxide dismutase (SOD)",
                "Carbonic anhydrase",
                "Zinc finger proteins",
                "Catalase",
                "Ferritin",
                "Ribonucleotide reductase",
                "Nitrogenase",
                "Transferrin",
                "Lactoferrin",
                "Alcohol dehydrogenase",
                "Carboxypeptidase A",
                "Xanthine oxidase",
                "Methionine synthase",
                "Nitric oxide synthase",
                "Urease",
                "Alkaline phosphatase",
                "Prolyl hydroxylase",
                "Aconitase",
                "Cobalamin-dependent methionine synthase",
                "Peptidylglycine alpha-amidating monooxygenase"
            ],
            "mismatches": [
                "Aromatase"
            ],
            "true_referents": [
                "Aconitase",
                "Alcohol dehydrogenase",
                "Alkaline phosphatase",
                "Carbonic anhydrase",
                "Carboxypeptidase A",
                "Catalase",
                "Cobalamin-dependent methionine synthase",
                "Cytochrome P450",
                "Cytochrome c",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Lactoferrin",
                "Methionine synthase",
                "Myoglobin (Mb)",
                "Nitric oxide synthase",
                "Nitrogenase",
                "Peptidylglycine alpha-amidating monooxygenase",
                "Prolyl hydroxylase",
                "Ribonucleotide reductase",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Urease",
                "Xanthine oxidase",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Carbonic Anhydrase",
            "Superoxide Dismutase",
            "Catalase",
            "Copper-Zinc Superoxide Dismutase (SOD1)",
            "Manganese Superoxide Dismutase (SOD2)",
            "Iron Superoxide Dismutase (SOD3)",
            "Ferritin",
            "Heme Oxygenase",
            "Nitric Oxide Synthase (NOS)",
            "Cytochrome P450",
            "Cytochrome b5",
            "Copper Amine Oxidase",
            "Zinc Finger Proteins",
            "Alcohol Dehydrogenase",
            "Xanthine Oxidase",
            "Tyrosinase",
            "Lactate Dehydrogenase",
            "Alkaline Phosphatase",
            "Monoamine Oxidase",
            "Nitrate Reductase",
            "Methyltransferase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Carbonic Anhydrase",
                "Superoxide Dismutase",
                "Catalase",
                "Copper-Zinc Superoxide Dismutase (SOD1)",
                "Manganese Superoxide Dismutase (SOD2)",
                "Iron Superoxide Dismutase (SOD3)",
                "Ferritin",
                "Heme Oxygenase",
                "Nitric Oxide Synthase (NOS)",
                "Cytochrome P450",
                "Cytochrome b5",
                "Copper Amine Oxidase",
                "Zinc Finger Proteins",
                "Alcohol Dehydrogenase",
                "Xanthine Oxidase",
                "Tyrosinase",
                "Lactate Dehydrogenase",
                "Alkaline Phosphatase",
                "Monoamine Oxidase",
                "Nitrate Reductase",
                "Methyltransferase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol Dehydrogenase",
                "Alkaline Phosphatase",
                "Carbonic Anhydrase",
                "Catalase",
                "Copper Amine Oxidase",
                "Copper-Zinc Superoxide Dismutase (SOD1)",
                "Cytochrome P450",
                "Cytochrome b5",
                "Cytochrome c",
                "Ferritin",
                "Heme Oxygenase",
                "Hemoglobin (Hb)",
                "Iron Superoxide Dismutase (SOD3)",
                "Lactate Dehydrogenase",
                "Manganese Superoxide Dismutase (SOD2)",
                "Methyltransferase",
                "Monoamine Oxidase",
                "Myoglobin (Mb)",
                "Nitrate Reductase",
                "Nitric Oxide Synthase (NOS)",
                "Superoxide Dismutase",
                "Tyrosinase",
                "Xanthine Oxidase",
                "Zinc Finger Proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Catalase",
            "Superoxide Dismutase (SOD)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Carbonic Anhydrase (CA)",
            "Nitrogenase",
            "Ribonucleotide Reductase (RNR)",
            "Zinc Finger Proteins",
            "Metallothioneins",
            "Plastocyanin",
            "Cytochrome c Oxidase (CcO)",
            "Heme Peroxidases",
            "Argonautes (AGO)",
            "Siderophores (bacterial)",
            "Purple Phototrophic Bacteria Reaction Centers",
            "Manganese Catalase",
            "Copper-Zinc Superoxide Dismutase",
            "Iron-Sulfur Cluster Proteins",
            "Vanadium Haloperoxidases",
            "Molybdenum Cofactor Containing Enzymes"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Catalase",
                "Superoxide Dismutase (SOD)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Carbonic Anhydrase (CA)",
                "Nitrogenase",
                "Ribonucleotide Reductase (RNR)",
                "Zinc Finger Proteins",
                "Metallothioneins",
                "Plastocyanin",
                "Cytochrome c Oxidase (CcO)",
                "Heme Peroxidases",
                "Argonautes (AGO)",
                "Siderophores (bacterial)",
                "Purple Phototrophic Bacteria Reaction Centers",
                "Manganese Catalase",
                "Copper-Zinc Superoxide Dismutase",
                "Iron-Sulfur Cluster Proteins",
                "Vanadium Haloperoxidases",
                "Molybdenum Cofactor Containing Enzymes"
            ],
            "mismatches": [],
            "true_referents": [
                "Argonautes (AGO)",
                "Carbonic Anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Copper-Zinc Superoxide Dismutase",
                "Cytochrome c (Cyt c)",
                "Cytochrome c Oxidase (CcO)",
                "Ferritin",
                "Heme Peroxidases",
                "Hemoglobin (Hb)",
                "Iron-Sulfur Cluster Proteins",
                "Manganese Catalase",
                "Metallothioneins",
                "Molybdenum Cofactor Containing Enzymes",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Plastocyanin",
                "Purple Phototrophic Bacteria Reaction Centers",
                "Ribonucleotide Reductase (RNR)",
                "Siderophores (bacterial)",
                "Superoxide Dismutase (SOD)",
                "Transferrin",
                "Vanadium Haloperoxidases",
                "Zinc Finger Proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Cytochrome c (Cyt c)",
            "Carbonic anhydrase (CA)",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Ferritin",
            "Myoglobin",
            "Nitrogenase",
            "Ribonucleotide reductase (RNR)",
            "Laccase",
            "Ceruloplasmin",
            "Azurin",
            "Plastocyanin",
            "Nitric oxide synthase (NOS)",
            "Peroxidase",
            "Alcohol dehydrogenase (ADH)",
            "Xanthine oxidase (XO)",
            "Methionine synthase",
            "Lactoferrin",
            "Transferrin",
            "Zinc finger protein",
            "Blue copper protein",
            "Glutathione peroxidase (GPx)",
            "Iron-sulfur protein"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Cytochrome c (Cyt c)",
                "Carbonic anhydrase (CA)",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Ferritin",
                "Myoglobin",
                "Nitrogenase",
                "Ribonucleotide reductase (RNR)",
                "Laccase",
                "Ceruloplasmin",
                "Azurin",
                "Plastocyanin",
                "Nitric oxide synthase (NOS)",
                "Peroxidase",
                "Alcohol dehydrogenase (ADH)",
                "Xanthine oxidase (XO)",
                "Methionine synthase",
                "Lactoferrin",
                "Transferrin",
                "Zinc finger protein",
                "Blue copper protein",
                "Glutathione peroxidase (GPx)",
                "Iron-sulfur protein"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Azurin",
                "Blue copper protein",
                "Carbonic anhydrase (CA)",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Glutathione peroxidase (GPx)",
                "Hemoglobin (Hb)",
                "Iron-sulfur protein",
                "Laccase",
                "Lactoferrin",
                "Methionine synthase",
                "Myoglobin",
                "Nitric oxide synthase (NOS)",
                "Nitrogenase",
                "Peroxidase",
                "Plastocyanin",
                "Ribonucleotide reductase (RNR)",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Xanthine oxidase (XO)",
                "Zinc finger protein"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Carbonic anhydrase (CA)",
            "Superoxide dismutase (SOD)",
            "Catalase (CAT)",
            "Ferritin (Ft)",
            "Transferrin (Tf)",
            "Metallothionein (MT)",
            "Nitric oxide synthase (NOS)",
            "Alcohol dehydrogenase (ADH)",
            "Aconitase (ACO)",
            "Phospholipase C (PLC)",
            "Arginase (ARG)",
            "Alkaline phosphatase (ALP)",
            "Glutamate dehydrogenase (GDH)",
            "Ribonucleotide reductase (RNR)",
            "Choline acetyltransferase (ChAT)",
            "Lactate dehydrogenase (LDH)",
            "Creatine kinase (CK)",
            "Dipeptidyl peptidase (DPP)",
            "Urease (URE)",
            "Cytochrome P450 (CYP)",
            "Nitric oxide reductase (NOR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Carbonic anhydrase (CA)",
                "Superoxide dismutase (SOD)",
                "Catalase (CAT)",
                "Ferritin (Ft)",
                "Transferrin (Tf)",
                "Metallothionein (MT)",
                "Nitric oxide synthase (NOS)",
                "Alcohol dehydrogenase (ADH)",
                "Aconitase (ACO)",
                "Phospholipase C (PLC)",
                "Arginase (ARG)",
                "Alkaline phosphatase (ALP)",
                "Glutamate dehydrogenase (GDH)",
                "Ribonucleotide reductase (RNR)",
                "Choline acetyltransferase (ChAT)",
                "Lactate dehydrogenase (LDH)",
                "Creatine kinase (CK)",
                "Dipeptidyl peptidase (DPP)",
                "Urease (URE)",
                "Cytochrome P450 (CYP)",
                "Nitric oxide reductase (NOR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase (ACO)",
                "Alcohol dehydrogenase (ADH)",
                "Alkaline phosphatase (ALP)",
                "Arginase (ARG)",
                "Carbonic anhydrase (CA)",
                "Catalase (CAT)",
                "Choline acetyltransferase (ChAT)",
                "Creatine kinase (CK)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (Cyt c)",
                "Dipeptidyl peptidase (DPP)",
                "Ferritin (Ft)",
                "Glutamate dehydrogenase (GDH)",
                "Hemoglobin (Hb)",
                "Lactate dehydrogenase (LDH)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Nitric oxide reductase (NOR)",
                "Nitric oxide synthase (NOS)",
                "Phospholipase C (PLC)",
                "Ribonucleotide reductase (RNR)",
                "Superoxide dismutase (SOD)",
                "Transferrin (Tf)",
                "Urease (URE)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Ferritin",
            "Transferrin",
            "Cytochrome c",
            "Metallothionein",
            "Superoxide dismutase (SOD)",
            "Zinc finger proteins",
            "Calmodulin",
            "Photosystem II protein complex",
            "Photosystem I protein complex",
            "Plastocyanin",
            "Rubisco",
            "Alcohol dehydrogenase",
            "Nitrogenase",
            "Carbonic anhydrase",
            "Cytochrome P450",
            "Lactase",
            "Collagen",
            "Insulin",
            "Leptin",
            "Melanin-concentrating hormone",
            "Ceruloplasmin",
            "Galactosidase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Ferritin",
                "Transferrin",
                "Cytochrome c",
                "Metallothionein",
                "Superoxide dismutase (SOD)",
                "Zinc finger proteins",
                "Calmodulin",
                "Photosystem II protein complex",
                "Photosystem I protein complex",
                "Plastocyanin",
                "Rubisco",
                "Alcohol dehydrogenase",
                "Nitrogenase",
                "Carbonic anhydrase",
                "Cytochrome P450",
                "Lactase",
                "Collagen",
                "Insulin",
                "Leptin",
                "Melanin-concentrating hormone",
                "Ceruloplasmin",
                "Galactosidase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase",
                "Calmodulin",
                "Carbonic anhydrase",
                "Ceruloplasmin",
                "Collagen",
                "Cytochrome P450",
                "Cytochrome c",
                "Ferritin",
                "Galactosidase",
                "Hemoglobin (Hb)",
                "Insulin",
                "Lactase",
                "Leptin",
                "Melanin-concentrating hormone",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Photosystem I protein complex",
                "Photosystem II protein complex",
                "Plastocyanin",
                "Rubisco",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin",
            "Myoglobin",
            "Cytochrome c",
            "Ferritin",
            "Superoxide dismutase",
            "Carbonic anhydrase",
            "Metallothionein",
            "Ceruloplasmin",
            "Transferrin",
            "Lactoferrin",
            "Catalase",
            "Manganese superoxide dismutase",
            "Zinc finger protein",
            "Metalloproteinase",
            "Metallochaperone",
            "Metalloregulatory protein",
            "Metalloenzyme",
            "Metalloporphyrin",
            "Metalloregulator",
            "Metalloprotein",
            "Metalloregulatory transcription factor",
            "Metalloregulatory sensor kinase",
            "Metalloregulatory protein binding site",
            "Metalloregulatory protein binding motif",
            "Metalloregulatory protein binding domain"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin",
                "Myoglobin",
                "Cytochrome c",
                "Ferritin",
                "Superoxide dismutase",
                "Carbonic anhydrase",
                "Metallothionein",
                "Ceruloplasmin",
                "Transferrin",
                "Lactoferrin",
                "Catalase",
                "Manganese superoxide dismutase",
                "Zinc finger protein",
                "Metalloproteinase",
                "Metallochaperone",
                "Metalloregulatory protein",
                "Metalloenzyme",
                "Metalloporphyrin",
                "Metalloregulator",
                "Metalloprotein",
                "Metalloregulatory transcription factor",
                "Metalloregulatory sensor kinase",
                "Metalloregulatory protein binding site",
                "Metalloregulatory protein binding motif"
            ],
            "mismatches": [
                "Metalloregulatory protein binding domain"
            ],
            "true_referents": [
                "Carbonic anhydrase",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome c",
                "Ferritin",
                "Hemoglobin",
                "Lactoferrin",
                "Manganese superoxide dismutase",
                "Metallochaperone",
                "Metalloenzyme",
                "Metalloporphyrin",
                "Metalloprotein",
                "Metalloproteinase",
                "Metalloregulator",
                "Metalloregulatory protein",
                "Metalloregulatory protein binding motif",
                "Metalloregulatory protein binding site",
                "Metalloregulatory sensor kinase",
                "Metalloregulatory transcription factor",
                "Metallothionein",
                "Myoglobin",
                "Superoxide dismutase",
                "Transferrin",
                "Zinc finger protein"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Ferritin",
            "Transferrin",
            "Superoxide Dismutase (SOD)",
            "Carbonic Anhydrase",
            "Nitrogenase",
            "Catalase",
            "Zinc Finger Protein",
            "Alcohol Dehydrogenase",
            "Glutamine Synthetase",
            "Metallothionein",
            "Ribulose-1,5-bisphosphate carboxylase/oxygenase (RuBisCO)",
            "DNA Polymerase",
            "ATP Synthase",
            "Photosystem II",
            "Plastocyanin",
            "Cytochrome P450",
            "Magnesium Chelatase",
            "Urease",
            "Copper Transporter Protein",
            "Iron Regulatory Protein",
            "Methionine Sulfoxide Reductase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Ferritin",
                "Transferrin",
                "Superoxide Dismutase (SOD)",
                "Carbonic Anhydrase",
                "Nitrogenase",
                "Catalase",
                "Zinc Finger Protein",
                "Alcohol Dehydrogenase",
                "Glutamine Synthetase",
                "Metallothionein",
                "Ribulose-1,5-bisphosphate carboxylase/oxygenase (RuBisCO)",
                "DNA Polymerase",
                "ATP Synthase",
                "Photosystem II",
                "Plastocyanin",
                "Cytochrome P450",
                "Magnesium Chelatase",
                "Urease",
                "Copper Transporter Protein",
                "Iron Regulatory Protein",
                "Methionine Sulfoxide Reductase"
            ],
            "mismatches": [],
            "true_referents": [
                "ATP Synthase",
                "Alcohol Dehydrogenase",
                "Carbonic Anhydrase",
                "Catalase",
                "Copper Transporter Protein",
                "Cytochrome P450",
                "Cytochrome c",
                "DNA Polymerase",
                "Ferritin",
                "Glutamine Synthetase",
                "Hemoglobin (Hb)",
                "Iron Regulatory Protein",
                "Magnesium Chelatase",
                "Metallothionein",
                "Methionine Sulfoxide Reductase",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Photosystem II",
                "Plastocyanin",
                "Ribulose-1,5-bisphosphate carboxylase/oxygenase (RuBisCO)",
                "Superoxide Dismutase (SOD)",
                "Transferrin",
                "Urease",
                "Zinc Finger Protein"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Carbonic anhydrase",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Cytochrome P450",
            "Metallothionein",
            "Nitrogenase",
            "Alcohol dehydrogenase (ADH)",
            "Carboxypeptidase A",
            "Thermolysin",
            "Zinc finger proteins",
            "Ferredoxin",
            "Plastocyanin",
            "Azurin",
            "Rubredoxin",
            "Calmodulin",
            "Troponin C",
            "Lactoferrin",
            "Peroxidase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Carbonic anhydrase",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Cytochrome P450",
                "Metallothionein",
                "Nitrogenase",
                "Alcohol dehydrogenase (ADH)",
                "Carboxypeptidase A",
                "Thermolysin",
                "Zinc finger proteins",
                "Ferredoxin",
                "Plastocyanin",
                "Azurin",
                "Rubredoxin",
                "Calmodulin",
                "Troponin C",
                "Lactoferrin",
                "Peroxidase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Azurin",
                "Calmodulin",
                "Carbonic anhydrase",
                "Carboxypeptidase A",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome P450",
                "Cytochrome c",
                "Ferredoxin",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Lactoferrin",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Nitrogenase",
                "Peroxidase",
                "Plastocyanin",
                "Rubredoxin",
                "Superoxide dismutase (SOD)",
                "Thermolysin",
                "Transferrin",
                "Troponin C",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Ferritin",
            "Transferrin",
            "Ceruloplasmin",
            "Superoxide dismutase (SOD)",
            "Catalase",
            "Carbonic anhydrase (CA)",
            "Alcohol dehydrogenase (ADH)",
            "Alkaline phosphatase (ALP)",
            "Carboxypeptidase A (CPA)",
            "Thermolysin",
            "Nitrogenase",
            "Hydrogenase",
            "Vitamin B12-dependent enzymes",
            "Zinc finger proteins",
            "Metallothionein (MT)",
            "Calmodulin (CaM)",
            "Troponin C (TnC)",
            "Protein kinase C (PKC)",
            "Matrix metalloproteinases (MMPs)",
            "Cytochrome P450 enzymes (CYPs)",
            "Nitric oxide synthase (NOS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Transferrin",
                "Ceruloplasmin",
                "Superoxide dismutase (SOD)",
                "Catalase",
                "Carbonic anhydrase (CA)",
                "Alcohol dehydrogenase (ADH)",
                "Alkaline phosphatase (ALP)",
                "Carboxypeptidase A (CPA)",
                "Thermolysin",
                "Nitrogenase",
                "Hydrogenase",
                "Vitamin B12-dependent enzymes",
                "Zinc finger proteins",
                "Metallothionein (MT)",
                "Calmodulin (CaM)",
                "Troponin C (TnC)",
                "Protein kinase C (PKC)",
                "Matrix metalloproteinases (MMPs)",
                "Cytochrome P450 enzymes (CYPs)",
                "Nitric oxide synthase (NOS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Alkaline phosphatase (ALP)",
                "Calmodulin (CaM)",
                "Carbonic anhydrase (CA)",
                "Carboxypeptidase A (CPA)",
                "Catalase",
                "Ceruloplasmin",
                "Cytochrome P450 enzymes (CYPs)",
                "Cytochrome c (Cyt c)",
                "Ferritin",
                "Hemoglobin (Hb)",
                "Hydrogenase",
                "Matrix metalloproteinases (MMPs)",
                "Metallothionein (MT)",
                "Myoglobin (Mb)",
                "Nitric oxide synthase (NOS)",
                "Nitrogenase",
                "Protein kinase C (PKC)",
                "Superoxide dismutase (SOD)",
                "Thermolysin",
                "Transferrin",
                "Troponin C (TnC)",
                "Vitamin B12-dependent enzymes",
                "Zinc finger proteins"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Ferritin (FTH1)",
            "Transferrin (TF)",
            "Ceruloplasmin (CP)",
            "Lactoferrin (LTF)",
            "Cytochrome c (CYCS)",
            "Cytochrome c oxidase (COX)",
            "Catalase (CAT)",
            "Superoxide dismutase (SOD)",
            "Hemoglobin (HBB)",
            "Myoglobin (MB)",
            "Nitrogenase (NifH)",
            "Hydrogenase (HydA)",
            "Alcohol dehydrogenase (ADH)",
            "Ribonucleotide reductase (RNR)",
            "Cytochrome P450 (CYP)",
            "Methane monooxygenase (MMO)",
            "Urease (URE)",
            "Carbonic anhydrase (CA)",
            "Hemerythrin (HER)",
            "Plastocyanin (PC)",
            "Azurin (AZU)",
            "Rubredoxin (RD)",
            "Ferredoxin (FD)",
            "Metallothionein (MT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ferritin (FTH1)",
                "Transferrin (TF)",
                "Ceruloplasmin (CP)",
                "Lactoferrin (LTF)",
                "Cytochrome c (CYCS)",
                "Cytochrome c oxidase (COX)",
                "Catalase (CAT)",
                "Superoxide dismutase (SOD)",
                "Hemoglobin (HBB)",
                "Myoglobin (MB)",
                "Nitrogenase (NifH)",
                "Hydrogenase (HydA)",
                "Alcohol dehydrogenase (ADH)",
                "Ribonucleotide reductase (RNR)",
                "Cytochrome P450 (CYP)",
                "Methane monooxygenase (MMO)",
                "Urease (URE)",
                "Carbonic anhydrase (CA)",
                "Hemerythrin (HER)",
                "Plastocyanin (PC)",
                "Azurin (AZU)",
                "Rubredoxin (RD)",
                "Ferredoxin (FD)",
                "Metallothionein (MT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase (ADH)",
                "Azurin (AZU)",
                "Carbonic anhydrase (CA)",
                "Catalase (CAT)",
                "Ceruloplasmin (CP)",
                "Cytochrome P450 (CYP)",
                "Cytochrome c (CYCS)",
                "Cytochrome c oxidase (COX)",
                "Ferredoxin (FD)",
                "Ferritin (FTH1)",
                "Hemerythrin (HER)",
                "Hemoglobin (HBB)",
                "Hydrogenase (HydA)",
                "Lactoferrin (LTF)",
                "Metallothionein (MT)",
                "Methane monooxygenase (MMO)",
                "Myoglobin (MB)",
                "Nitrogenase (NifH)",
                "Plastocyanin (PC)",
                "Ribonucleotide reductase (RNR)",
                "Rubredoxin (RD)",
                "Superoxide dismutase (SOD)",
                "Transferrin (TF)",
                "Urease (URE)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Hemoglobin (Hb)",
            "Myoglobin (Mb)",
            "Cytochrome c (Cyt c)",
            "Catalase",
            "Peroxidase",
            "Superoxide dismutase (SOD)",
            "Ferredoxin",
            "Nitrogenase",
            "Vitamin B12 (Cobalamin)",
            "Chlorophyll",
            "Heme oxygenase",
            "Nitric oxide synthase (NOS)",
            "Cytochrome P450",
            "Laccase",
            "Tyrosinase",
            "Ceruloplasmin",
            "Transferrin",
            "Ferritin",
            "Metallothionein",
            "Carbonic anhydrase",
            "Alcohol dehydrogenase",
            "Cytochrome c oxidase",
            "Xanthine oxidase",
            "Nitrite reductase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hemoglobin (Hb)",
                "Myoglobin (Mb)",
                "Cytochrome c (Cyt c)",
                "Catalase",
                "Peroxidase",
                "Superoxide dismutase (SOD)",
                "Ferredoxin",
                "Nitrogenase",
                "Vitamin B12 (Cobalamin)",
                "Chlorophyll",
                "Heme oxygenase",
                "Nitric oxide synthase (NOS)",
                "Cytochrome P450",
                "Laccase",
                "Tyrosinase",
                "Ceruloplasmin",
                "Transferrin",
                "Ferritin",
                "Metallothionein",
                "Carbonic anhydrase",
                "Alcohol dehydrogenase",
                "Cytochrome c oxidase",
                "Xanthine oxidase",
                "Nitrite reductase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alcohol dehydrogenase",
                "Carbonic anhydrase",
                "Catalase",
                "Ceruloplasmin",
                "Chlorophyll",
                "Cytochrome P450",
                "Cytochrome c (Cyt c)",
                "Cytochrome c oxidase",
                "Ferredoxin",
                "Ferritin",
                "Heme oxygenase",
                "Hemoglobin (Hb)",
                "Laccase",
                "Metallothionein",
                "Myoglobin (Mb)",
                "Nitric oxide synthase (NOS)",
                "Nitrite reductase",
                "Nitrogenase",
                "Peroxidase",
                "Superoxide dismutase (SOD)",
                "Transferrin",
                "Tyrosinase",
                "Vitamin B12 (Cobalamin)",
                "Xanthine oxidase"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Alkaline Phosphatase (ALP)",
            "Carbonic Anhydrase",
            "Cytochrome C",
            "Cytochrome Oxidase",
            "Ferredoxin",
            "Ferritin",
            "Flavodoxin",
            "Lactate Dehydrogenase (LDH)",
            "Lysyl Oxidase",
            "Metallothionein",
            "Nitric Oxide Synthase (NOS)",
            "Pyruvate Oxidase",
            "Ribonuclease",
            "Ribozyme",
            "Superoxide Dismutase (SOD)",
            "Thioredoxin",
            "Xanthine Oxidase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alkaline Phosphatase (ALP)",
                "Carbonic Anhydrase",
                "Cytochrome C",
                "Cytochrome Oxidase",
                "Ferredoxin",
                "Ferritin",
                "Flavodoxin",
                "Lactate Dehydrogenase (LDH)",
                "Lysyl Oxidase",
                "Metallothionein",
                "Nitric Oxide Synthase (NOS)",
                "Pyruvate Oxidase",
                "Ribonuclease",
                "Ribozyme",
                "Superoxide Dismutase (SOD)",
                "Thioredoxin",
                "Xanthine Oxidase"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaline Phosphatase (ALP)",
                "Carbonic Anhydrase",
                "Cytochrome C",
                "Cytochrome Oxidase",
                "Ferredoxin",
                "Ferritin",
                "Flavodoxin",
                "Lactate Dehydrogenase (LDH)",
                "Lysyl Oxidase",
                "Metallothionein",
                "Nitric Oxide Synthase (NOS)",
                "Pyruvate Oxidase",
                "Ribonuclease",
                "Ribozyme",
                "Superoxide Dismutase (SOD)",
                "Thioredoxin",
                "Xanthine Oxidase"
            ],
            "TP": 17,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": [
            "Cytochrome c Oxidase (COX)",
            "Hemoglobin (Hb)",
            "Catalase",
            "Superoxide Dismutase (SOD)",
            "Ferritin",
            "Transferrin",
            "Lactoferrin",
            "Ceruloplasmin",
            "Tyrosinase",
            "Catechol oxidase",
            "Phospholipase A2",
            "Carbonic Anhydrase",
            "Peptidylglycine alpha-amidating monooxygenase",
            "Dopamine beta-hydroxylase",
            "Cytochrome P450",
            "Lipoprotein lipase",
            "Fatty acid desaturase",
            "Alcohol dehydrogenase",
            "Aconitase",
            "Isocitrate dehydrogenase",
            "Alpha-ketoglutarate dehydrogenase",
            "Pyruvate dehydrogenase",
            "Dihydroorotate dehydrogenase",
            "Fumarase"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cytochrome c Oxidase (COX)",
                "Hemoglobin (Hb)",
                "Catalase",
                "Superoxide Dismutase (SOD)",
                "Ferritin",
                "Transferrin",
                "Lactoferrin",
                "Ceruloplasmin",
                "Tyrosinase",
                "Catechol oxidase",
                "Phospholipase A2",
                "Carbonic Anhydrase",
                "Peptidylglycine alpha-amidating monooxygenase",
                "Dopamine beta-hydroxylase",
                "Cytochrome P450",
                "Lipoprotein lipase",
                "Fatty acid desaturase",
                "Alcohol dehydrogenase",
                "Aconitase",
                "Isocitrate dehydrogenase",
                "Alpha-ketoglutarate dehydrogenase",
                "Pyruvate dehydrogenase",
                "Dihydroorotate dehydrogenase",
                "Fumarase"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase",
                "Alcohol dehydrogenase",
                "Alpha-ketoglutarate dehydrogenase",
                "Carbonic Anhydrase",
                "Catalase",
                "Catechol oxidase",
                "Ceruloplasmin",
                "Cytochrome P450",
                "Cytochrome c Oxidase (COX)",
                "Dihydroorotate dehydrogenase",
                "Dopamine beta-hydroxylase",
                "Fatty acid desaturase",
                "Ferritin",
                "Fumarase",
                "Hemoglobin (Hb)",
                "Isocitrate dehydrogenase",
                "Lactoferrin",
                "Lipoprotein lipase",
                "Peptidylglycine alpha-amidating monooxygenase",
                "Phospholipase A2",
                "Pyruvate dehydrogenase",
                "Superoxide Dismutase (SOD)",
                "Transferrin",
                "Tyrosinase"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "metalloprotein",
        "domain": "biology",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]