[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-15 (IL-15)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Fibroblast Growth Factor-2 (FGF-2)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interferon-gamma (IFN-\u03b3)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Platelet-Derived Growth Factor (PDGF)",
            "RANTES (Regulated upon Activation, Normal T cell Expressed and Secreted)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Interleukin-10 (IL-10)",
            "Interleukin-13 (IL-13)",
            "Interleukin-33 (IL-33)",
            "Chemokine (C-X-C motif) ligand 1 (CXCL1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Fibroblast Growth Factor-2 (FGF-2)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Platelet-Derived Growth Factor (PDGF)",
                "RANTES (Regulated upon Activation, Normal T cell Expressed and Secreted)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Interleukin-10 (IL-10)",
                "Interleukin-13 (IL-13)",
                "Interleukin-33 (IL-33)",
                "Chemokine (C-X-C motif) ligand 1 (CXCL1)"
            ],
            "mismatches": [],
            "true_referents": [
                "Chemokine (C-X-C motif) ligand 1 (CXCL1)",
                "Fibroblast Growth Factor-2 (FGF-2)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-10 (IL-10)",
                "Interleukin-12 (IL-12)",
                "Interleukin-13 (IL-13)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Platelet-Derived Growth Factor (PDGF)",
                "RANTES (Regulated upon Activation, Normal T cell Expressed and Secreted)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
            "Interleukin-8 (IL-8)",
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-15 (IL-15)",
            "Interleukin-16 (IL-16)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "CCL2 (MCP-1)",
            "CCL3 (MIP-1\u03b1)",
            "CCL5 (RANTES)",
            "CXCL1 (GRO-\u03b1)",
            "CXCL2 (GRO-\u03b2)",
            "CXCL8 (IL-8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Interleukin-8 (IL-8)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-15 (IL-15)",
                "Interleukin-16 (IL-16)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "CCL2 (MCP-1)",
                "CCL3 (MIP-1\u03b1)",
                "CCL5 (RANTES)",
                "CXCL1 (GRO-\u03b1)",
                "CXCL2 (GRO-\u03b2)",
                "CXCL8 (IL-8)"
            ],
            "mismatches": [],
            "true_referents": [
                "CCL2 (MCP-1)",
                "CCL3 (MIP-1\u03b1)",
                "CCL5 (RANTES)",
                "CXCL1 (GRO-\u03b1)",
                "CXCL2 (GRO-\u03b2)",
                "CXCL8 (IL-8)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-16 (IL-16)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-27 (IL-27)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Interleukin-27 (IL-27)",
            "Interleukin-33 (IL-33)",
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-25 (IL-25)",
            "Interleukin-31 (IL-31)",
            "Interleukin-32 (IL-32)",
            "Interleukin-35 (IL-35)",
            "Lymphotoxin-alpha (LT-\u03b1)",
            "Leukemia Inhibitory Factor (LIF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-25 (IL-25)",
                "Interleukin-31 (IL-31)",
                "Interleukin-32 (IL-32)",
                "Interleukin-35 (IL-35)",
                "Lymphotoxin-alpha (LT-\u03b1)",
                "Leukemia Inhibitory Factor (LIF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-25 (IL-25)",
                "Interleukin-27 (IL-27)",
                "Interleukin-31 (IL-31)",
                "Interleukin-32 (IL-32)",
                "Interleukin-33 (IL-33)",
                "Interleukin-35 (IL-35)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Leukemia Inhibitory Factor (LIF)",
                "Lymphotoxin-alpha (LT-\u03b1)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-8 (IL-8/CXCL8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-15 (IL-15)",
            "Interleukin-17A (IL-17A)",
            "Interleukin-17F (IL-17F)",
            "Interleukin-18 (IL-18)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-23 (IL-23)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Oncostatin M (OSM)",
            "Lymphotoxin-alpha (LT-\u03b1/TNF-\u03b2)",
            "Transforming Growth Factor beta (TGF-\u03b2) - *context dependent*",
            "High Mobility Group Box 1 (HMGB1)",
            "Prostaglandin E2 (PGE2)",
            "Leukotriene B4 (LTB4)",
            "Platelet Activating Factor (PAF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-8 (IL-8/CXCL8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17A (IL-17A)",
                "Interleukin-17F (IL-17F)",
                "Interleukin-18 (IL-18)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Oncostatin M (OSM)",
                "Lymphotoxin-alpha (LT-\u03b1/TNF-\u03b2)",
                "Transforming Growth Factor beta (TGF-\u03b2) - *context dependent*",
                "High Mobility Group Box 1 (HMGB1)",
                "Prostaglandin E2 (PGE2)",
                "Leukotriene B4 (LTB4)",
                "Platelet Activating Factor (PAF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "High Mobility Group Box 1 (HMGB1)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17A (IL-17A)",
                "Interleukin-17F (IL-17F)",
                "Interleukin-18 (IL-18)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8/CXCL8)",
                "Leukotriene B4 (LTB4)",
                "Lymphotoxin-alpha (LT-\u03b1/TNF-\u03b2)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Oncostatin M (OSM)",
                "Platelet Activating Factor (PAF)",
                "Prostaglandin E2 (PGE2)",
                "Transforming Growth Factor beta (TGF-\u03b2) - *context dependent*",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Tumor Necrosis Factor alpha (TNF-\u03b1)",
            "Tumor Necrosis Factor beta (TNF-\u03b2)",
            "Interferon gamma (IFN-\u03b3)",
            "Macrophage Migration Inhibitory Factor (MIF)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interleukin-15 (IL-15)",
            "Interleukin-22 (IL-22)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Interleukin-31 (IL-31)",
            "Interleukin-33 (IL-33)",
            "Interleukin-36 (IL-36)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Interleukin-39 (IL-39)",
            "Interleukin-41 (IL-41)",
            "Interleukin-42 (IL-42)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Tumor Necrosis Factor alpha (TNF-\u03b1)",
                "Tumor Necrosis Factor beta (TNF-\u03b2)",
                "Interferon gamma (IFN-\u03b3)",
                "Macrophage Migration Inhibitory Factor (MIF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interleukin-15 (IL-15)",
                "Interleukin-22 (IL-22)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-31 (IL-31)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interleukin-41 (IL-41)",
                "Interleukin-42 (IL-42)"
            ],
            "mismatches": [],
            "true_referents": [
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-31 (IL-31)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interleukin-41 (IL-41)",
                "Interleukin-42 (IL-42)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Macrophage Migration Inhibitory Factor (MIF)",
                "Tumor Necrosis Factor alpha (TNF-\u03b1)",
                "Tumor Necrosis Factor beta (TNF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-18 (IL-18)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interleukin-17 (IL-17)",
            "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
            "Interleukin-23 (IL-23)",
            "Interleukin-33 (IL-33)",
            "Monocyte chemoattractant protein-1 (MCP-1)",
            "Macrophage inflammatory protein-1 alpha (MIP-1\u03b1)",
            "C-C motif chemokine ligand 2 (CCL2)",
            "C-C motif chemokine ligand 3 (CCL3)",
            "C-C motif chemokine ligand 5 (CCL5)",
            "Interleukin-15 (IL-15)",
            "Interleukin-21 (IL-21)",
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Vascular endothelial growth factor (VEGF)",
            "Chemokine (C-X-C motif) ligand 1 (CXCL1)",
            "Chemokine (C-X-C motif) ligand 2 (CXCL2)",
            "Tumor necrosis factor receptor 1 (TNFR1)",
            "Interleukin-4 (IL-4)",
            "Interleukin-2 (IL-2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-18 (IL-18)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-17 (IL-17)",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Monocyte chemoattractant protein-1 (MCP-1)",
                "Macrophage inflammatory protein-1 alpha (MIP-1\u03b1)",
                "C-C motif chemokine ligand 2 (CCL2)",
                "C-C motif chemokine ligand 3 (CCL3)",
                "C-C motif chemokine ligand 5 (CCL5)",
                "Interleukin-15 (IL-15)",
                "Interleukin-21 (IL-21)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Vascular endothelial growth factor (VEGF)",
                "Chemokine (C-X-C motif) ligand 1 (CXCL1)",
                "Chemokine (C-X-C motif) ligand 2 (CXCL2)",
                "Tumor necrosis factor receptor 1 (TNFR1)",
                "Interleukin-4 (IL-4)"
            ],
            "mismatches": [
                "Interleukin-2 (IL-2)"
            ],
            "true_referents": [
                "C-C motif chemokine ligand 2 (CCL2)",
                "C-C motif chemokine ligand 3 (CCL3)",
                "C-C motif chemokine ligand 5 (CCL5)",
                "Chemokine (C-X-C motif) ligand 1 (CXCL1)",
                "Chemokine (C-X-C motif) ligand 2 (CXCL2)",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-21 (IL-21)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-4 (IL-4)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Macrophage inflammatory protein-1 alpha (MIP-1\u03b1)",
                "Monocyte chemoattractant protein-1 (MCP-1)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Tumor necrosis factor receptor 1 (TNFR1)",
                "Vascular endothelial growth factor (VEGF)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 1 (IL-1)",
            "Interleukin 2 (IL-2)",
            "Interleukin 6 (IL-6)",
            "Interleukin 8 (IL-8)",
            "Interleukin 12 (IL-12)",
            "Interleukin 17 (IL-17)",
            "Interleukin 18 (IL-18)",
            "Interleukin 23 (IL-23)",
            "Interleukin 32 (IL-32)",
            "Tumor Necrosis Factor alpha (TNF-\u03b1)",
            "Tumor Necrosis Factor beta (TNF-\u03b2)",
            "Interferon gamma (IFN-\u03b3)",
            "Interferon beta (IFN-\u03b2)",
            "Interferon lambda (IFN-\u03bb)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Granulocyte Colony-Stimulating Factor (G-CSF)",
            "Macrophage Colony-Stimulating Factor (M-CSF)",
            "Macrophage Inflammatory Protein 1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein 1 beta (MIP-1\u03b2)",
            "Monocyte Chemoattractant Protein 1 (MCP-1)",
            "Regulated on Activation, Normal T Cell Expressed and Secreted (RANTES)",
            "Leukemia Inhibitory Factor (LIF)",
            "Oncostatin M (OSM)",
            "Vascular Endothelial Growth Factor (VEGF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin 1 (IL-1)",
                "Interleukin 2 (IL-2)",
                "Interleukin 6 (IL-6)",
                "Interleukin 8 (IL-8)",
                "Interleukin 12 (IL-12)",
                "Interleukin 17 (IL-17)",
                "Interleukin 18 (IL-18)",
                "Interleukin 23 (IL-23)",
                "Interleukin 32 (IL-32)",
                "Tumor Necrosis Factor alpha (TNF-\u03b1)",
                "Tumor Necrosis Factor beta (TNF-\u03b2)",
                "Interferon gamma (IFN-\u03b3)",
                "Interferon beta (IFN-\u03b2)",
                "Interferon lambda (IFN-\u03bb)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Macrophage Inflammatory Protein 1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein 1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein 1 (MCP-1)",
                "Regulated on Activation, Normal T Cell Expressed and Secreted (RANTES)",
                "Leukemia Inhibitory Factor (LIF)",
                "Oncostatin M (OSM)",
                "Vascular Endothelial Growth Factor (VEGF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Granulocyte Colony-Stimulating Factor (G-CSF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon beta (IFN-\u03b2)",
                "Interferon gamma (IFN-\u03b3)",
                "Interferon lambda (IFN-\u03bb)",
                "Interleukin 1 (IL-1)",
                "Interleukin 12 (IL-12)",
                "Interleukin 17 (IL-17)",
                "Interleukin 18 (IL-18)",
                "Interleukin 2 (IL-2)",
                "Interleukin 23 (IL-23)",
                "Interleukin 32 (IL-32)",
                "Interleukin 6 (IL-6)",
                "Interleukin 8 (IL-8)",
                "Leukemia Inhibitory Factor (LIF)",
                "Macrophage Colony-Stimulating Factor (M-CSF)",
                "Macrophage Inflammatory Protein 1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein 1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein 1 (MCP-1)",
                "Oncostatin M (OSM)",
                "Regulated on Activation, Normal T Cell Expressed and Secreted (RANTES)",
                "Tumor Necrosis Factor alpha (TNF-\u03b1)",
                "Tumor Necrosis Factor beta (TNF-\u03b2)",
                "Vascular Endothelial Growth Factor (VEGF)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Tumor Necrosis Factor-alpha (TNF-alpha)",
            "Interleukin-1 beta (IL-1beta)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Interleukin-33 (IL-33)",
            "Interferon-gamma (IFN-gamma)",
            "Interferon-alpha (IFN-alpha)",
            "Interferon-beta (IFN-beta)",
            "Tumor Necrosis Factor-beta (TNF-beta)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1alpha)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1beta)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Transforming Growth Factor-beta (TGF-beta)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Nuclear Factor-kappa B (NF-kB)",
            "C-reactive protein (CRP)",
            "Prostaglandin E2 (PGE2)",
            "Leukotriene B4 (LTB4)",
            "Toll-like receptor 4 (TLR4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor Necrosis Factor-alpha (TNF-alpha)",
                "Interleukin-1 beta (IL-1beta)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interferon-gamma (IFN-gamma)",
                "Interferon-alpha (IFN-alpha)",
                "Interferon-beta (IFN-beta)",
                "Tumor Necrosis Factor-beta (TNF-beta)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1alpha)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1beta)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Transforming Growth Factor-beta (TGF-beta)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Nuclear Factor-kappa B (NF-kB)",
                "C-reactive protein (CRP)",
                "Prostaglandin E2 (PGE2)",
                "Leukotriene B4 (LTB4)"
            ],
            "mismatches": [
                "Toll-like receptor 4 (TLR4)"
            ],
            "true_referents": [
                "C-reactive protein (CRP)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-alpha (IFN-alpha)",
                "Interferon-beta (IFN-beta)",
                "Interferon-gamma (IFN-gamma)",
                "Interleukin-1 beta (IL-1beta)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Leukotriene B4 (LTB4)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1alpha)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1beta)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Nuclear Factor-kappa B (NF-kB)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Prostaglandin E2 (PGE2)",
                "Transforming Growth Factor-beta (TGF-beta)",
                "Tumor Necrosis Factor-alpha (TNF-alpha)",
                "Tumor Necrosis Factor-beta (TNF-beta)",
                "Vascular Endothelial Growth Factor (VEGF)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interferon gamma (IFN-\u03b3)",
            "Interleukin-8 (IL-8)",
            "Interleukin-23 (IL-23)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interleukin-18 (IL-18)",
            "Interleukin-33 (IL-33)",
            "Interleukin-2 (IL-2)",
            "Interleukin-15 (IL-15)",
            "Chemokine (C-C motif) ligand 2 (CCL2)",
            "Chemokine (C-C motif) ligand 5 (CCL5)",
            "Chemokine (C-C motif) ligand 20 (CCL20)",
            "Chemokine (C-X-C motif) ligand 10 (CXCL10)",
            "Interleukin-21 (IL-21)",
            "Interleukin-22 (IL-22)",
            "Interleukin-33 (IL-33)",
            "Interferon alpha (IFN-\u03b1)",
            "Interleukin-7 (IL-7)",
            "Interleukin-9 (IL-9)",
            "Interleukin-27 (IL-27)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interferon gamma (IFN-\u03b3)",
                "Interleukin-8 (IL-8)",
                "Interleukin-23 (IL-23)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interleukin-18 (IL-18)",
                "Interleukin-33 (IL-33)",
                "Interleukin-2 (IL-2)",
                "Interleukin-15 (IL-15)",
                "Chemokine (C-C motif) ligand 2 (CCL2)",
                "Chemokine (C-C motif) ligand 5 (CCL5)",
                "Chemokine (C-C motif) ligand 20 (CCL20)",
                "Chemokine (C-X-C motif) ligand 10 (CXCL10)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interferon alpha (IFN-\u03b1)",
                "Interleukin-7 (IL-7)",
                "Interleukin-9 (IL-9)",
                "Interleukin-27 (IL-27)"
            ],
            "mismatches": [],
            "true_referents": [
                "Chemokine (C-C motif) ligand 2 (CCL2)",
                "Chemokine (C-C motif) ligand 20 (CCL20)",
                "Chemokine (C-C motif) ligand 5 (CCL5)",
                "Chemokine (C-X-C motif) ligand 10 (CXCL10)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon alpha (IFN-\u03b1)",
                "Interferon gamma (IFN-\u03b3)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-2 (IL-2)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
            "RANTES (CCL5)",
            "Interleukin-2 (IL-2)",
            "Interleukin-15 (IL-15)",
            "Interleukin-32 (IL-32)",
            "Oncostatin M (OSM)",
            "Leukemia Inhibitory Factor (LIF)",
            "High Mobility Group Box 1 (HMGB1)",
            "C-X-C Motif Chemokine Ligand 10 (CXCL10)",
            "Interleukin-36 (IL-36)",
            "Thymic Stromal Lymphopoietin (TSLP)",
            "Interleukin-33 (IL-33)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "RANTES (CCL5)",
                "Interleukin-2 (IL-2)",
                "Interleukin-15 (IL-15)",
                "Interleukin-32 (IL-32)",
                "Oncostatin M (OSM)",
                "Leukemia Inhibitory Factor (LIF)",
                "High Mobility Group Box 1 (HMGB1)",
                "C-X-C Motif Chemokine Ligand 10 (CXCL10)",
                "Interleukin-36 (IL-36)",
                "Thymic Stromal Lymphopoietin (TSLP)",
                "Interleukin-33 (IL-33)"
            ],
            "mismatches": [],
            "true_referents": [
                "C-X-C Motif Chemokine Ligand 10 (CXCL10)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "High Mobility Group Box 1 (HMGB1)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-2 (IL-2)",
                "Interleukin-23 (IL-23)",
                "Interleukin-32 (IL-32)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Leukemia Inhibitory Factor (LIF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Oncostatin M (OSM)",
                "RANTES (CCL5)",
                "Thymic Stromal Lymphopoietin (TSLP)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-15 (IL-15)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Tumor necrosis factor alpha (TNF-\u03b1)",
            "Tumor necrosis factor beta (TNF-\u03b2)",
            "Interferon alpha (IFN-\u03b1)",
            "Interferon beta (IFN-\u03b2)",
            "Interferon gamma (IFN-\u03b3)",
            "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
            "Macrophage inflammatory protein 1 alpha (MIP-1\u03b1)",
            "Macrophage inflammatory protein 1 beta (MIP-1\u03b2)",
            "Regulated upon activation, normal T cell expressed and secreted (RANTES)",
            "Monocyte chemoattractant protein 1 (MCP-1)",
            "Eotaxin",
            "Oncostatin M (OSM)",
            "Leukemia inhibitory factor (LIF)",
            "Cardiotrophin-1 (CT-1)",
            "Ciliary neurotrophic factor (CNTF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Tumor necrosis factor alpha (TNF-\u03b1)",
                "Tumor necrosis factor beta (TNF-\u03b2)",
                "Interferon alpha (IFN-\u03b1)",
                "Interferon beta (IFN-\u03b2)",
                "Interferon gamma (IFN-\u03b3)",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Macrophage inflammatory protein 1 alpha (MIP-1\u03b1)",
                "Macrophage inflammatory protein 1 beta (MIP-1\u03b2)",
                "Regulated upon activation, normal T cell expressed and secreted (RANTES)",
                "Monocyte chemoattractant protein 1 (MCP-1)",
                "Eotaxin",
                "Oncostatin M (OSM)",
                "Leukemia inhibitory factor (LIF)",
                "Cardiotrophin-1 (CT-1)",
                "Ciliary neurotrophic factor (CNTF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cardiotrophin-1 (CT-1)",
                "Ciliary neurotrophic factor (CNTF)",
                "Eotaxin",
                "Granulocyte-macrophage colony-stimulating factor (GM-CSF)",
                "Interferon alpha (IFN-\u03b1)",
                "Interferon beta (IFN-\u03b2)",
                "Interferon gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-15 (IL-15)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Leukemia inhibitory factor (LIF)",
                "Macrophage inflammatory protein 1 alpha (MIP-1\u03b1)",
                "Macrophage inflammatory protein 1 beta (MIP-1\u03b2)",
                "Monocyte chemoattractant protein 1 (MCP-1)",
                "Oncostatin M (OSM)",
                "Regulated upon activation, normal T cell expressed and secreted (RANTES)",
                "Tumor necrosis factor alpha (TNF-\u03b1)",
                "Tumor necrosis factor beta (TNF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-8 (IL-8)",
            "Interleukin-17 (IL-17)",
            "Interleukin-18 (IL-18)",
            "Interleukin-12 (IL-12)",
            "Interleukin-23 (IL-23)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Migration Inhibitory Factor (MIF)",
            "High Mobility Group Box 1 (HMGB1)",
            "Interleukin-1 alpha (IL-1\u03b1)",
            "Interleukin-33 (IL-33)",
            "Interleukin-36 (IL-36)",
            "Oncostatin M (OSM)",
            "Leukemia Inhibitory Factor (LIF)",
            "Interleukin-11 (IL-11)",
            "Interleukin-32 (IL-32)",
            "Interleukin-37 (IL-37)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interleukin-7 (IL-7)",
            "Interleukin-9 (IL-9)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-8 (IL-8)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-12 (IL-12)",
                "Interleukin-23 (IL-23)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Migration Inhibitory Factor (MIF)",
                "High Mobility Group Box 1 (HMGB1)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Oncostatin M (OSM)",
                "Leukemia Inhibitory Factor (LIF)",
                "Interleukin-11 (IL-11)",
                "Interleukin-32 (IL-32)",
                "Interleukin-37 (IL-37)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-7 (IL-7)",
                "Interleukin-9 (IL-9)"
            ],
            "mismatches": [],
            "true_referents": [
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "High Mobility Group Box 1 (HMGB1)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 alpha (IL-1\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-27 (IL-27)",
                "Interleukin-32 (IL-32)",
                "Interleukin-33 (IL-33)",
                "Interleukin-35 (IL-35)",
                "Interleukin-36 (IL-36)",
                "Interleukin-37 (IL-37)",
                "Interleukin-6 (IL-6)",
                "Interleukin-7 (IL-7)",
                "Interleukin-8 (IL-8)",
                "Interleukin-9 (IL-9)",
                "Leukemia Inhibitory Factor (LIF)",
                "Macrophage Migration Inhibitory Factor (MIF)",
                "Oncostatin M (OSM)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 (IL-1)",
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Interleukin-12 (IL-12)",
            "Interleukin-17 (IL-17)",
            "Interferon-gamma (IFN-\u03b3)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Regulated on Activation, Normal T Expressed and Secreted (RANTES)",
            "Eotaxin",
            "Interleukin-18 (IL-18)",
            "Interleukin-23 (IL-23)",
            "Interleukin-33 (IL-33)",
            "High Mobility Group Box 1 (HMGB1)",
            "Prostaglandin E2 (PGE2)",
            "Leukotriene B4 (LTB4)",
            "Platelet-Activating Factor (PAF)",
            "Thromboxane A2 (TXA2)",
            "Bradykinin",
            "Histamine",
            "Serotonin",
            "Substance P"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 (IL-1)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interferon-gamma (IFN-\u03b3)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Regulated on Activation, Normal T Expressed and Secreted (RANTES)",
                "Eotaxin",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "High Mobility Group Box 1 (HMGB1)",
                "Prostaglandin E2 (PGE2)",
                "Leukotriene B4 (LTB4)",
                "Platelet-Activating Factor (PAF)",
                "Thromboxane A2 (TXA2)",
                "Bradykinin",
                "Histamine",
                "Serotonin",
                "Substance P"
            ],
            "mismatches": [],
            "true_referents": [
                "Bradykinin",
                "Eotaxin",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "High Mobility Group Box 1 (HMGB1)",
                "Histamine",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 (IL-1)",
                "Interleukin-12 (IL-12)",
                "Interleukin-17 (IL-17)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Leukotriene B4 (LTB4)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Platelet-Activating Factor (PAF)",
                "Prostaglandin E2 (PGE2)",
                "Regulated on Activation, Normal T Expressed and Secreted (RANTES)",
                "Serotonin",
                "Substance P",
                "Thromboxane A2 (TXA2)",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
            "Interleukin-1 beta (IL-1\u03b2)",
            "Interleukin-6 (IL-6)",
            "Interleukin-8 (IL-8)",
            "Tumor Necrosis Factor-beta (TNF-\u03b2)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interleukin-12 (IL-12)",
            "Interleukin-18 (IL-18)",
            "Monocyte Chemoattractant Protein-1 (MCP-1)",
            "Interleukin-23 (IL-23)",
            "Interleukin-33 (IL-33)",
            "Interleukin-36 (IL-36)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Interleukin-39 (IL-39)",
            "Interleukin-40 (IL-40)",
            "Interleukin-41 (IL-41)",
            "Granulocyte-Colony Stimulating Factor (G-CSF)",
            "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
            "Macrophage Inflammatory Protein-1 gamma (MIP-1\u03b3)",
            "Eotaxin",
            "RANTES"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Tumor Necrosis Factor-beta (TNF-\u03b2)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-12 (IL-12)",
                "Interleukin-18 (IL-18)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interleukin-40 (IL-40)",
                "Interleukin-41 (IL-41)",
                "Granulocyte-Colony Stimulating Factor (G-CSF)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Macrophage Inflammatory Protein-1 gamma (MIP-1\u03b3)",
                "Eotaxin",
                "RANTES"
            ],
            "mismatches": [],
            "true_referents": [
                "Eotaxin",
                "Granulocyte-Colony Stimulating Factor (G-CSF)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interleukin-1 beta (IL-1\u03b2)",
                "Interleukin-12 (IL-12)",
                "Interleukin-18 (IL-18)",
                "Interleukin-23 (IL-23)",
                "Interleukin-33 (IL-33)",
                "Interleukin-36 (IL-36)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interleukin-40 (IL-40)",
                "Interleukin-41 (IL-41)",
                "Interleukin-6 (IL-6)",
                "Interleukin-8 (IL-8)",
                "Macrophage Inflammatory Protein-1 alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein-1 beta (MIP-1\u03b2)",
                "Macrophage Inflammatory Protein-1 gamma (MIP-1\u03b3)",
                "Monocyte Chemoattractant Protein-1 (MCP-1)",
                "RANTES",
                "Tumor Necrosis Factor-alpha (TNF-\u03b1)",
                "Tumor Necrosis Factor-beta (TNF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 1 alpha (IL-1\u03b1)",
            "Interleukin 1 beta (IL-1\u03b2)",
            "Interleukin 2 (IL-2)",
            "Interleukin 6 (IL-6)",
            "Interleukin 8 (IL-8)",
            "Interleukin 11 (IL-11)",
            "Interleukin 12 (IL-12)",
            "Interleukin 15 (IL-15)",
            "Interleukin 17A (IL-17A)",
            "Interleukin 17F (IL-17F)",
            "Interleukin 21 (IL-21)",
            "Interleukin 22 (IL-22)",
            "Interleukin 23 (IL-23)",
            "Tumor Necrosis Factor Alpha (TNF-\u03b1)",
            "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
            "Macrophage Inflammatory Protein 1 Alpha (MIP-1\u03b1)",
            "Macrophage Inflammatory Protein 1 Beta (MIP-1\u03b2)",
            "Monocyte Chemoattractant Protein 1 (MCP-1)",
            "Interferon Gamma (IFN-\u03b3)",
            "Oncostatin M (OSM)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Epiregulin (EPR)",
            "Thrombopoietin (TPO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin 1 alpha (IL-1\u03b1)",
                "Interleukin 1 beta (IL-1\u03b2)",
                "Interleukin 2 (IL-2)",
                "Interleukin 6 (IL-6)",
                "Interleukin 8 (IL-8)",
                "Interleukin 11 (IL-11)",
                "Interleukin 12 (IL-12)",
                "Interleukin 15 (IL-15)",
                "Interleukin 17A (IL-17A)",
                "Interleukin 17F (IL-17F)",
                "Interleukin 21 (IL-21)",
                "Interleukin 22 (IL-22)",
                "Interleukin 23 (IL-23)",
                "Tumor Necrosis Factor Alpha (TNF-\u03b1)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Macrophage Inflammatory Protein 1 Alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein 1 Beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein 1 (MCP-1)",
                "Interferon Gamma (IFN-\u03b3)",
                "Oncostatin M (OSM)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Epiregulin (EPR)",
                "Thrombopoietin (TPO)"
            ],
            "mismatches": [],
            "true_referents": [
                "Epiregulin (EPR)",
                "Granulocyte-Macrophage Colony-Stimulating Factor (GM-CSF)",
                "Interferon Gamma (IFN-\u03b3)",
                "Interleukin 1 alpha (IL-1\u03b1)",
                "Interleukin 1 beta (IL-1\u03b2)",
                "Interleukin 11 (IL-11)",
                "Interleukin 12 (IL-12)",
                "Interleukin 15 (IL-15)",
                "Interleukin 17A (IL-17A)",
                "Interleukin 17F (IL-17F)",
                "Interleukin 2 (IL-2)",
                "Interleukin 21 (IL-21)",
                "Interleukin 22 (IL-22)",
                "Interleukin 23 (IL-23)",
                "Interleukin 6 (IL-6)",
                "Interleukin 8 (IL-8)",
                "Macrophage Inflammatory Protein 1 Alpha (MIP-1\u03b1)",
                "Macrophage Inflammatory Protein 1 Beta (MIP-1\u03b2)",
                "Monocyte Chemoattractant Protein 1 (MCP-1)",
                "Oncostatin M (OSM)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Thrombopoietin (TPO)",
                "Tumor Necrosis Factor Alpha (TNF-\u03b1)",
                "Vascular Endothelial Growth Factor (VEGF)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "pro-inflammatory cytokine",
        "domain": "biology",
        "response": "[\n        {\n            \"Referent\": \"Interleukin 1\" (IL-1),\n            \"Canonical name\": \"Interleukin-1\"\n        },\n        {\n            \"Referent\": \"Interleukin 6\" (IL-6),\n            \"Canonical name\": \"Interleukin-6\"\n        },\n        {\n            \"Referent\": \"Tumor necrosis factor-alpha\" (TNF-\u03b1),\n            \"Canonical name\": \"Tumor necrosis factor-alpha\"\n        },\n        {\n            \"Referent\": \"Interleukin 17\" (IL-17),\n            \"Canonical name\": \"Interleukin-17\"\n        },\n        {\n            \"Referent\": \"Interleukin 23\" (IL-23),\n            \"Canonical name\": \"Interleukin-23\"\n        },\n        {\n            \"Referent\": \"Interleukin 33\" (IL-33),\n            \"Canonical name\": \"Interleukin-33\"\n        },\n        {\n            \"Referent\": \"Interleukin 18\" (IL-18),\n            \"Canonical name\": \"Interleukin-18\"\n        },\n        {\n            \"Referent\": \"Interleukin 36\" (IL-36),\n            \"Canonical name\": \"Interleukin-36\"\n        },\n        {\n            \"Referent\": \"Interleukin 4\" (IL-4),\n            \"Canonical name\": \"Interleukin-4\"\n        },\n        {\n            \"Referent\": \"Interleukin 5\" (IL-5),\n            \"Canonical name\": \"Interleukin-5\"\n        },\n        {\n            \"Referent\": \"Interleukin 12\" (IL-12),\n            \"Canonical name\": \"Interleukin-12\"\n        },\n        {\n            \"Referent\": \"Interleukin 10\" (IL-10),\n            \"Canonical name\": \"Interleukin-10\"\n        },\n        {\n            \"Referent\": \"Interleukin 13\" (IL-13),\n            \"Canonical name\": \"Interleukin-13\"\n        },\n        {\n            \"Referent\": \"Interleukin 27\" (IL-27),\n            \"Canonical name\": \"Interleukin-27\"\n        },\n        {\n            \"Referent\": \"Interleukin 35\" (IL-35),\n            \"Canonical name\": \"Interleukin-35\"\n        },\n        {\n            \"Referent\": \"Interleukin 37\" (IL-37),\n            \"Canonical name\": \"Interleukin-37\"\n        },\n        {\n            \"Referent\": \"Interleukin 38\" (IL-38),\n            \"Canonical name\": \"Interleukin-38\"\n        },\n        {\n            \"Referent\": \"Interleukin 15\" (IL-15),\n            \"Canonical name\": \"Interleukin-15\"\n        },\n        {\n            \"Referent\": \"Interleukin 16\" (IL-16),\n            \"Canonical name\": \"Interleukin-16\"\n        },\n        {\n            \"Referent\": \"Interleukin 11\" (IL-11),\n            \"Canonical name\": \"Interleukin-11\"\n        },\n        {\n            \"Referent\": \"Interleukin 22\" (IL-22),\n            \"Canonical name\": \"Interleukin-22\"\n        },\n        {\n            \"Referent\": \"Interleukin 21\" (IL-21),\n            \"Canonical name\": \"Interleukin-21\"\n        },\n        {\n            \"Referent\": \"Interleukin 13\" (IL-13),\n            \"Canonical name\": \"Interleukin-13\"\n        },\n        {\n            \"Referent\": \"Interleukin 20\" (IL-20),\n            \"Canonical name\": \"Interleukin-20\"\n        },\n        {\n            \"Referent\": \"Interleukin 14\" (IL-14),\n            \"Canonical name\": \"Interleukin-14\"\n        },\n        {\n            \"Referent\": \"Interleukin 25\" (IL-25),\n            \"Canonical name\": \"Interleukin-25\"\n        },\n        {\n            \"Referent\": \"Interleukin 28\" (IL-28),\n            \"Canonical name\": \"Interleukin-28\"\n        },\n        {\n            \"Referent\": \"Interleukin 29\" (IL-29),\n            \"Canonical name\": \"Interleukin-29\"\n        }\n    ]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]