[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-3 (IL-3)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-6 (IL-6)",
            "Interleukin-7 (IL-7)",
            "Interleukin-8 (IL-8)",
            "Interleukin-9 (IL-9)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-12 (IL-12)",
            "Interleukin-13 (IL-13)",
            "Interleukin-15 (IL-15)",
            "Interleukin-17 (IL-17)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interferon-gamma (IFN-\u03b3)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Erythropoietin (EPO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-3 (IL-3)",
                "Interleukin-4 (IL-4)",
                "Interleukin-5 (IL-5)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interleukin-13 (IL-13)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interferon-gamma (IFN-\u03b3)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Erythropoietin (EPO)"
            ],
            "mismatches": [],
            "true_referents": [
                "Colony-Stimulating Factors",
                "Epoetin Alfa",
                "Erythropoietin",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Thrombopoietin",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interleukin-10 (IL-10)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-23 (IL-23)",
            "Interleukin-27 (IL-27)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interferon-gamma (IFN-\u03b3)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Tumor Necrosis Factor-beta (TNF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Fibroblast Growth Factor (FGF)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Erythropoietin (EPO)",
            "Leukemia Inhibitory Factor (LIF)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "C-C Motif Chemokine Ligand 2 (CCL2)",
            "C-X-C Motif Chemokine Ligand 8 (CXCL8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1",
                "Interleukin-2",
                "Interleukin-4",
                "Interleukin-6",
                "Interleukin-8",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-17",
                "Interleukin-23",
                "Interleukin-27",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Tumor Necrosis Factor-alpha",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Granulocyte Colony-Stimulating Factor",
                "Macrophage Colony-Stimulating Factor",
                "Transforming Growth Factor beta",
                "Erythropoietin",
                "Leukemia Inhibitory Factor",
                "Monocyte Chemoattractant Proteins"
            ],
            "mismatches": [
                "Tumor Necrosis Factor-beta",
                "Fibroblast Growth Factor",
                "Vascular Endothelial Growth Factor",
                "C-C Motif Chemokine Ligand 2",
                "C-X-C Motif Chemokine Ligand 8"
            ],
            "true_referents": [
                "Chemokine CCL1",
                "Chemokine CXCL1",
                "Colony-Stimulating Factors",
                "Epoetin Alfa",
                "Erythropoietin",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-27",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Leukemia Inhibitory Factor",
                "Leukocyte Migration-Inhibitory Factors",
                "Macrophage Colony-Stimulating Factor",
                "Monocyte Chemoattractant Proteins",
                "Thrombopoietin",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 21,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-3 (IL-3)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-6 (IL-6)",
            "Interleukin-7 (IL-7)",
            "Interleukin-8 (IL-8)",
            "Interleukin-9 (IL-9)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-12 (IL-12)",
            "Interleukin-13 (IL-13)",
            "Interleukin-14 (IL-14)",
            "Interleukin-15 (IL-15)",
            "Interleukin-16 (IL-16)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-23 (IL-23)",
            "Interleukin-24 (IL-24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-3 (IL-3)",
                "Interleukin-4 (IL-4)",
                "Interleukin-5 (IL-5)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interleukin-13 (IL-13)",
                "Interleukin-15 (IL-15)",
                "Interleukin-16 (IL-16)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)"
            ],
            "mismatches": [
                "Interleukin-14 (IL-14)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-21 (IL-21)",
                "Interleukin-24 (IL-24)"
            ],
            "true_referents": [
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Tumor Necrosis Factor alpha (TNF-\u03b1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8) / CXCL8",
            "Interleukin-10 (IL-10)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17A (IL-17A)",
            "Interleukin-18 (IL-18)",
            "Interferon gamma (IFN-\u03b3)",
            "Interferon alpha (IFN-\u03b1)",
            "Interferon beta (IFN-\u03b2)",
            "Transforming Growth Factor beta (TGF-\u03b2)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Chemokine (C-C motif) ligand 2 (CCL2) / MCP-1",
            "Chemokine (C-X-C motif) ligand 1 (CXCL1) / GRO\u03b1",
            "Chemokine (C-X-C motif) ligand 10 (CXCL10) / IP-10",
            "B Lymphocyte Stimulator (BLyS / BAFF)",
            "April (A Proliferation-Inducing Ligand)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha",
                "Tumor Necrosis Factor alpha",
                "Interleukin-2",
                "Interleukin-4",
                "Interleukin-6",
                "Interleukin-8",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-18",
                "Interferon gamma",
                "Interferon alpha",
                "Interferon beta",
                "Transforming Growth Factor beta",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Granulocyte Colony-Stimulating Factor",
                "Chemokine CCL2",
                "Chemokine CXCL1",
                "Chemokine CXCL10",
                "B-Cell Activating Factor"
            ],
            "mismatches": [
                "Interleukin-1 beta",
                "Interleukin-17A",
                "Platelet-Derived Growth Factor",
                "Vascular Endothelial Growth Factor",
                "B Lymphocyte Stimulator",
                "April"
            ],
            "true_referents": [
                "4-1BB Ligand",
                "B-Cell Activating Factor",
                "Chemokine CCL1",
                "Chemokine CCL2",
                "Chemokine CCL8",
                "Chemokine CXCL1",
                "Chemokine CXCL10",
                "Chemokine CXCL11",
                "Chemokine CXCL2",
                "Colony-Stimulating Factors",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-8",
                "Lymphotoxin-beta",
                "Macrophage Colony-Stimulating Factor",
                "RANK Ligand",
                "TNF-Related Apoptosis-Inducing Ligand",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 19,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-2 (IL-2)",
            "Interleukin-3 (IL-3)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-6 (IL-6)",
            "Interleukin-7 (IL-7)",
            "Interleukin-8 (IL-8)",
            "Interleukin-9 (IL-9)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-12 (IL-12)",
            "Interleukin-13 (IL-13)",
            "Interleukin-15 (IL-15)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-22 (IL-22)",
            "Interleukin-23 (IL-23)",
            "Tumor Necrosis Factor alpha (TNF-\u03b1)",
            "Interferon gamma (IFN-\u03b3)",
            "Transforming Growth Factor beta (TGF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-22",
                "Interleukin-23",
                "Tumor Necrosis Factor-alpha",
                "Interferon-gamma",
                "Transforming Growth Factor beta",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Monocyte Chemoattractant Proteins"
            ],
            "mismatches": [
                "Interleukin-1 beta (IL-1\u03b2)"
            ],
            "true_referents": [
                "Chemokine CCL1",
                "Chemokine CXCL1",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Monocyte Chemoattractant Proteins",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-10 (IL-10)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Erythropoietin (EPO)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Chemokine (C-C motif) ligand 2 (CCL2)",
            "Chemokine (C-X-C motif) ligand 8 (CXCL8)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-8 (IL-8)",
            "Interleukin-15 (IL-15)",
            "Interleukin-18 (IL-18)",
            "Interleukin-21 (IL-21)",
            "Fms-related Tyrosine Kinase 3 Ligand (Flt3L)",
            "Oncostatin M (OSM)",
            "Lymphotoxin-alpha (LT-\u03b1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-10 (IL-10)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Erythropoietin (EPO)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Chemokine (C-C motif) ligand 2 (CCL2)",
                "Oncostatin M (OSM)",
                "Interleukin-4 (IL-4)",
                "Interleukin-5 (IL-5)",
                "Interleukin-8 (IL-8)",
                "Interleukin-15 (IL-15)",
                "Interleukin-18 (IL-18)"
            ],
            "mismatches": [
                "Chemokine (C-X-C motif) ligand 8 (CXCL8)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Interleukin-21 (IL-21)",
                "Fms-related Tyrosine Kinase 3 Ligand (Flt3L)",
                "Lymphotoxin-alpha (LT-\u03b1)"
            ],
            "true_referents": [
                "Chemokine CCL1",
                "Chemokine CCL2",
                "Chemokine CCL8",
                "Chemokine CXCL1",
                "Chemokine CXCL2",
                "Chemokine CXCL9",
                "Epoetin Alfa",
                "Erythropoietin",
                "Fas Ligand Protein",
                "Filgrastim",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Lenograstim",
                "Macrophage Colony-Stimulating Factor",
                "Macrophage Inflammatory Proteins",
                "Oncostatin M",
                "RANK Ligand",
                "Thrombopoietin",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor Ligand Superfamily Member 13",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 18,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 1 (IL-1)",
            "Interleukin 2 (IL-2)",
            "Interleukin 3 (IL-3)",
            "Interleukin 4 (IL-4)",
            "Interleukin 5 (IL-5)",
            "Interleukin 6 (IL-6)",
            "Interleukin 7 (IL-7)",
            "Interleukin 8 (IL-8)",
            "Interleukin 9 (IL-9)",
            "Interleukin 10 (IL-10)",
            "Interleukin 11 (IL-11)",
            "Interleukin 12 (IL-12)",
            "Interleukin 13 (IL-13)",
            "Interleukin 14 (IL-14)",
            "Interleukin 15 (IL-15)",
            "Interleukin 16 (IL-16)",
            "Interferon gamma (IFN-\u03b3)",
            "Interferon alpha (IFN-\u03b1)",
            "Interferon beta (IFN-\u03b2)",
            "Tumor Necrosis Factor Alpha (TNF-\u03b1)",
            "Tumor Necrosis Factor Beta (TNF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin 1 (IL-1)",
                "Interleukin 2 (IL-2)",
                "Interleukin 3 (IL-3)",
                "Interleukin 4 (IL-4)",
                "Interleukin 5 (IL-5)",
                "Interleukin 6 (IL-6)",
                "Interleukin 7 (IL-7)",
                "Interleukin 8 (IL-8)",
                "Interleukin 9 (IL-9)",
                "Interleukin 10 (IL-10)",
                "Interleukin 11 (IL-11)",
                "Interleukin 12 (IL-12)",
                "Interleukin 13 (IL-13)",
                "Interleukin 15 (IL-15)",
                "Interleukin 16 (IL-16)",
                "Interferon gamma (IFN-\u03b3)",
                "Interferon alpha (IFN-\u03b1)",
                "Interferon beta (IFN-\u03b2)",
                "Tumor Necrosis Factor Alpha (TNF-\u03b1)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)"
            ],
            "mismatches": [
                "Interleukin 14 (IL-14)",
                "Tumor Necrosis Factor Beta (TNF-\u03b2)"
            ],
            "true_referents": [
                "Colony-Stimulating Factors",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-6 (IL-6)",
            "Tumor necrosis factor-alpha (TNF-alpha)",
            "Interferon-gamma (IFN-gamma)",
            "Interleukin-10 (IL-10)",
            "Transforming growth factor-beta (TGF-beta)",
            "Interleukin-4 (IL-4)",
            "Interleukin-17 (IL-17)",
            "Interleukin-12 (IL-12)",
            "Interleukin-8 (IL-8)",
            "Interleukin-18 (IL-18)",
            "Interleukin-13 (IL-13)",
            "Interleukin-5 (IL-5)",
            "Interleukin-23 (IL-23)",
            "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
            "Macrophage colony-stimulating factor (M-CSF)",
            "Interleukin-7 (IL-7)",
            "Interleukin-9 (IL-9)",
            "Interleukin-15 (IL-15)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-33 (IL-33)",
            "Interleukin-27 (IL-27)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-6 (IL-6)",
                "Tumor necrosis factor-alpha (TNF-alpha)",
                "Interferon-gamma (IFN-gamma)",
                "Interleukin-10 (IL-10)",
                "Transforming growth factor-beta (TGF-beta)",
                "Interleukin-4 (IL-4)",
                "Interleukin-17 (IL-17)",
                "Interleukin-12 (IL-12)",
                "Interleukin-8 (IL-8)",
                "Interleukin-18 (IL-18)",
                "Interleukin-13 (IL-13)",
                "Interleukin-5 (IL-5)",
                "Interleukin-23 (IL-23)",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Macrophage colony-stimulating factor (M-CSF)",
                "Interleukin-7 (IL-7)",
                "Interleukin-9 (IL-9)",
                "Interleukin-15 (IL-15)",
                "Interleukin-22 (IL-22)",
                "Interleukin-33 (IL-33)",
                "Interleukin-27 (IL-27)"
            ],
            "mismatches": [
                "Interleukin-21 (IL-21)"
            ],
            "true_referents": [
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-27",
                "Interleukin-3",
                "Interleukin-33",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-6 (IL-6)",
            "Interleukin-10 (IL-10)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interferon-gamma (IFN-\u03b3)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Tumor Necrosis Factor-beta (TNF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Chemokine (C-C motif) Ligand 2 (CCL2)",
            "Chemokine (C-X-C motif) Ligand 8 (CXCL8)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Erythropoietin (EPO)",
            "Stem Cell Factor (SCF)",
            "Leukemia Inhibitory Factor (LIF)",
            "Oncostatin M (OSM)",
            "Growth Differentiation Factor 15 (GDF-15)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-6 (IL-6)",
                "Interleukin-10 (IL-10)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interferon-gamma (IFN-\u03b3)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Erythropoietin (EPO)",
                "Stem Cell Factor (SCF)",
                "Leukemia Inhibitory Factor (LIF)",
                "Oncostatin M (OSM)",
                "Growth Differentiation Factor 15 (GDF-15)"
            ],
            "mismatches": [
                "Tumor Necrosis Factor-beta (TNF-\u03b2)",
                "Chemokine (C-C motif) Ligand 2 (CCL2)",
                "Chemokine (C-X-C motif) Ligand 8 (CXCL8)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Platelet-Derived Growth Factor (PDGF)"
            ],
            "true_referents": [
                "Chemokine CCL2",
                "Chemokine CCL8",
                "Chemokine CXCL1",
                "Chemokine CXCL2",
                "Chemokine CXCL9",
                "Colony-Stimulating Factors",
                "Epoetin Alfa",
                "Erythropoietin",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Growth Differentiation Factor 15",
                "Hematopoietic Cell Growth Factors",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Lenograstim",
                "Leukemia Inhibitory Factor",
                "Leukocyte Migration-Inhibitory Factors",
                "Macrophage Colony-Stimulating Factor",
                "Oncostatin M",
                "RANK Ligand",
                "Stem Cell Factor",
                "Thrombopoietin",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interleukin-6 (IL-6)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Interleukin-10 (IL-10)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interleukin-2 (IL-2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-8 (IL-8)",
            "Chemokine (C-C motif) Ligand 2 (CCL2)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Epidermal Growth Factor (EGF)",
            "Fibroblast Growth Factor (FGF)",
            "Leukemia Inhibitory Factor (LIF)",
            "Oncostatin M (OSM)",
            "Ciliary Neurotrophic Factor (CNTF)",
            "Erythropoietin (EPO)",
            "Thrombopoietin (TPO)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-6 (IL-6)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Interleukin-10 (IL-10)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interleukin-2 (IL-2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-8 (IL-8)",
                "Leukemia Inhibitory Factor (LIF)",
                "Oncostatin M (OSM)",
                "Erythropoietin (EPO)",
                "Thrombopoietin (TPO)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)"
            ],
            "mismatches": [
                "Chemokine (C-C motif) Ligand 2 (CCL2)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Epidermal Growth Factor (EGF)",
                "Fibroblast Growth Factor (FGF)",
                "Ciliary Neurotrophic Factor (CNTF)"
            ],
            "true_referents": [
                "Chemokine CCL2",
                "Chemokine CXCL2",
                "Colony-Stimulating Factors",
                "Epoetin Alfa",
                "Erythropoietin",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Hepatocyte Growth Factor",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-12",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-8",
                "Lenograstim",
                "Leukemia Inhibitory Factor",
                "Leukocyte Migration-Inhibitory Factors",
                "Macrophage Colony-Stimulating Factor",
                "Oncostatin M",
                "Platelet Factor 4",
                "RANK Ligand",
                "Stem Cell Factor",
                "Thrombopoietin",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors",
                "beta-Thromboglobulin"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "interleukin-1 (IL-1)",
            "interleukin-2 (IL-2)",
            "interleukin-3 (IL-3)",
            "interleukin-4 (IL-4)",
            "interleukin-5 (IL-5)",
            "interleukin-6 (IL-6)",
            "interleukin-7 (IL-7)",
            "interleukin-8 (IL-8)",
            "interleukin-9 (IL-9)",
            "interleukin-10 (IL-10)",
            "interleukin-11 (IL-11)",
            "interleukin-12 (IL-12)",
            "interleukin-13 (IL-13)",
            "interleukin-15 (IL-15)",
            "interleukin-17 (IL-17)",
            "interleukin-18 (IL-18)",
            "tumor necrosis factor-alpha (TNF-\u03b1)",
            "tumor necrosis factor-beta (TNF-\u03b2)",
            "interferon-alpha (IFN-\u03b1)",
            "interferon-beta (IFN-\u03b2)",
            "interferon-gamma (IFN-\u03b3)",
            "granulocyte colony-stimulating factor (G-CSF)",
            "granulocyte-macrophage colony-stimulating factor (GM-CSF)",
            "transforming growth factor-beta (TGF-\u03b2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "interleukin-1 (IL-1)",
                "interleukin-2 (IL-2)",
                "interleukin-3 (IL-3)",
                "interleukin-4 (IL-4)",
                "interleukin-5 (IL-5)",
                "interleukin-6 (IL-6)",
                "interleukin-7 (IL-7)",
                "interleukin-8 (IL-8)",
                "interleukin-9 (IL-9)",
                "interleukin-10 (IL-10)",
                "interleukin-11 (IL-11)",
                "interleukin-12 (IL-12)",
                "interleukin-13 (IL-13)",
                "interleukin-15 (IL-15)",
                "interleukin-17 (IL-17)",
                "interleukin-18 (IL-18)",
                "tumor necrosis factor-alpha (TNF-\u03b1)",
                "interferon-alpha (IFN-\u03b1)",
                "interferon-beta (IFN-\u03b2)",
                "interferon-gamma (IFN-\u03b3)",
                "granulocyte colony-stimulating factor (G-CSF)",
                "granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "transforming growth factor-beta (TGF-\u03b2)"
            ],
            "mismatches": [
                "tumor necrosis factor-beta (TNF-\u03b2)"
            ],
            "true_referents": [
                "Colony-Stimulating Factors",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon beta-1a",
                "Interferon beta-1b",
                "Interferon-alpha",
                "Interferon-beta",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-2 (IL-2)",
            "Interleukin-3 (IL-3)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-6 (IL-6)",
            "Interleukin-7 (IL-7)",
            "Interleukin-8 (IL-8)",
            "Interleukin-9 (IL-9)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-12 (IL-12)",
            "Interleukin-13 (IL-13)",
            "Interleukin-14 (IL-14)",
            "Interleukin-15 (IL-15)",
            "Interleukin-16 (IL-16)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-23 (IL-23)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-3 (IL-3)",
                "Interleukin-4 (IL-4)",
                "Interleukin-5 (IL-5)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interleukin-13 (IL-13)",
                "Interleukin-15 (IL-15)",
                "Interleukin-16 (IL-16)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)"
            ],
            "mismatches": [
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-14 (IL-14)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-21 (IL-21)"
            ],
            "true_referents": [
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Interleukin-2 (IL-2)",
            "Interleukin-3 (IL-3)",
            "Interleukin-4 (IL-4)",
            "Interleukin-5 (IL-5)",
            "Interleukin-6 (IL-6)",
            "Interleukin-7 (IL-7)",
            "Interleukin-8 (IL-8)",
            "Interleukin-9 (IL-9)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-12 (IL-12)",
            "Interferon-gamma (IFN-\u03b3)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Regulated on Activation, Normal T Expressed and Secreted (RANTES)",
            "Eotaxin",
            "Fibroblast Growth Factor (FGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Epidermal Growth Factor (EGF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Interleukin-2 (IL-2)",
                "Interleukin-3 (IL-3)",
                "Interleukin-4 (IL-4)",
                "Interleukin-5 (IL-5)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interferon-gamma (IFN-\u03b3)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)"
            ],
            "mismatches": [
                "Regulated on Activation, Normal T Expressed and Secreted (RANTES)",
                "Eotaxin",
                "Fibroblast Growth Factor (FGF)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Epidermal Growth Factor (EGF)"
            ],
            "true_referents": [
                "Chemokine CCL1",
                "Chemokine CXCL1",
                "Chemokine CXCL2",
                "Chemokines",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Macrophage Colony-Stimulating Factor",
                "Macrophage Inflammatory Proteins",
                "Monocyte Chemoattractant Proteins",
                "Monokines",
                "Transforming Growth Factor beta",
                "Transforming Growth Factor beta1",
                "Transforming Growth Factor beta2",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-6 (IL-6)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interleukin-12 (IL-12)",
            "Tumor Necrosis Factor-beta (TNF-\u03b2)",
            "Interleukin-10 (IL-10)",
            "Interleukin-2 (IL-2)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Interleukin-8 (IL-8)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interleukin-15 (IL-15)",
            "Interleukin-23 (IL-23)",
            "Tumor Necrosis Factor-like Weak Inducer of Apoptosis (TWEAK)",
            "Interleukin-17 (IL-17)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "Interleukin-31 (IL-31)",
            "Interleukin-33 (IL-33)",
            "Eotaxin (CCL11)",
            "Rantes (CCL5)",
            "MIP-1 alpha (CCL3)",
            "MIP-1 beta (CCL4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-6 (IL-6)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-12 (IL-12)",
                "Interleukin-10 (IL-10)",
                "Interleukin-2 (IL-2)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Interleukin-8 (IL-8)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interleukin-15 (IL-15)",
                "Interleukin-23 (IL-23)",
                "Interleukin-17 (IL-17)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Eotaxin (CCL11)",
                "Rantes (CCL5)"
            ],
            "mismatches": [
                "Tumor Necrosis Factor-beta (TNF-\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Tumor Necrosis Factor-like Weak Inducer of Apoptosis (TWEAK)",
                "Interleukin-31 (IL-31)",
                "MIP-1 alpha (CCL3)",
                "MIP-1 beta (CCL4)"
            ],
            "true_referents": [
                "Chemokine CCL1",
                "Chemokine CCL11",
                "Chemokine CCL5",
                "Chemokine CXCL1",
                "Chemokine CXCL11",
                "Chemokine CXCL5",
                "Colony-Stimulating Factors",
                "Cytokine TWEAK",
                "Granulocyte Colony-Stimulating Factor",
                "Granulocyte-Macrophage Colony-Stimulating Factor",
                "Interferon alpha-2",
                "Interferon-alpha",
                "Interferon-gamma",
                "Interferons",
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-27",
                "Interleukin-3",
                "Interleukin-33",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Macrophage Colony-Stimulating Factor",
                "Monocyte Chemoattractant Proteins",
                "TNF-Related Apoptosis-Inducing Ligand",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 20,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 1 beta (IL-1\u03b2)",
            "Tumor necrosis factor-alpha (TNF-\u03b1)",
            "Interleukin 2 (IL-2)",
            "Interleukin 3 (IL-3)",
            "Interleukin 4 (IL-4)",
            "Interleukin 5 (IL-5)",
            "Interleukin 6 (IL-6)",
            "Interleukin 7 (IL-7)",
            "Interleukin 8 (IL-8)",
            "Interleukin 9 (IL-9)",
            "Interleukin 10 (IL-10)",
            "Interleukin 11 (IL-11)",
            "Interleukin 12 (IL-12)",
            "Interleukin 13 (IL-13)",
            "Interleukin 14 (IL-14)",
            "Interleukin 15 (IL-15)",
            "Interleukin 16 (IL-16)",
            "Interleukin 17 (IL-17)",
            "Interleukin 18 (IL-18)",
            "Interleukin 19 (IL-19)",
            "Interleukin 20 (IL-20)",
            "Interleukin 21 (IL-21)",
            "Interleukin 22 (IL-22)",
            "Interleukin 23 (IL-23)",
            "Interleukin 24 (IL-24)",
            "Interleukin 26 (IL-26)",
            "Interleukin 27 (IL-27)",
            "Interleukin 28 (IL-28)",
            "Interleukin 29 (IL-29)",
            "Interleukin 30 (IL-30)",
            "Interleukin 31 (IL-31)",
            "Interleukin 32 (IL-32)",
            "Interleukin 33 (IL-33)",
            "Granulocyte colony-stimulating factor (G-CSF)",
            "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
            "Macrophage colony-stimulating factor (M-CSF)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interferon-gamma (IFN-\u03b3)",
            "Lymphotactin (XCL1)",
            "Fractalkine (CX3CL1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor necrosis factor-alpha (TNF-\u03b1)",
                "Interleukin 2 (IL-2)",
                "Interleukin 3 (IL-3)",
                "Interleukin 4 (IL-4)",
                "Interleukin 5 (IL-5)",
                "Interleukin 6 (IL-6)",
                "Interleukin 7 (IL-7)",
                "Interleukin 8 (IL-8)",
                "Interleukin 9 (IL-9)",
                "Interleukin 10 (IL-10)",
                "Interleukin 11 (IL-11)",
                "Interleukin 12 (IL-12)",
                "Interleukin 13 (IL-13)",
                "Interleukin 15 (IL-15)",
                "Interleukin 16 (IL-16)",
                "Interleukin 17 (IL-17)",
                "Interleukin 18 (IL-18)",
                "Interleukin 22 (IL-22)",
                "Interleukin 23 (IL-23)"
            ],
            "mismatches": [
                "Interleukin 1 beta (IL-1\u03b2)",
                "Interleukin 14 (IL-14)",
                "Interleukin 19 (IL-19)",
                "Interleukin 20 (IL-20)",
                "Interleukin 21 (IL-21)",
                "Interleukin 24 (IL-24)",
                "Interleukin 26 (IL-26)",
                "Interleukin 27 (IL-27)",
                "Interleukin 28 (IL-28)",
                "Interleukin 29 (IL-29)",
                "Interleukin 30 (IL-30)",
                "Interleukin 31 (IL-31)",
                "Interleukin 32 (IL-32)",
                "Interleukin 33 (IL-33)",
                "Granulocyte colony-stimulating factor (G-CSF)",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Macrophage colony-stimulating factor (M-CSF)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interferon-gamma (IFN-\u03b3)",
                "Lymphotactin (XCL1)",
                "Fractalkine (CX3CL1)"
            ],
            "true_referents": [
                "Interleukin-1",
                "Interleukin-10",
                "Interleukin-11",
                "Interleukin-12",
                "Interleukin-13",
                "Interleukin-15",
                "Interleukin-16",
                "Interleukin-17",
                "Interleukin-18",
                "Interleukin-1alpha",
                "Interleukin-2",
                "Interleukin-22",
                "Interleukin-23",
                "Interleukin-3",
                "Interleukin-4",
                "Interleukin-5",
                "Interleukin-6",
                "Interleukin-7",
                "Interleukin-8",
                "Interleukin-9",
                "Tumor Necrosis Factor-alpha",
                "Tumor Necrosis Factors"
            ],
            "TP": 19,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "cytokine",
        "domain": "biology",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]