[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Interleukin-18 binding protein (IL-18BP)",
            "Transforming growth factor-beta 1 (TGF-\u03b21)",
            "Transforming growth factor-beta 2 (TGF-\u03b22)",
            "Transforming growth factor-beta 3 (TGF-\u03b23)",
            "Interleukin-11 (IL-11)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interferon-gamma-induced protein 10 (IP-10)",
            "Monocyte chemoattractant protein-1 (MCP-1)",
            "Soluble TNF receptor type 1 (sTNF-R1)",
            "Soluble TNF receptor type 2 (sTNF-R2)",
            "Interleukin-1 decoy receptor (IL-1 decoy R)",
            "Interleukin-37 (IL-37)",
            "Interleukin-21 (IL-21)",
            "Interleukin-6 (IL-6)",
            "Interleukin-12 (IL-12)",
            "Interleukin-20 (IL-20)",
            "Interleukin-23 (IL-23)",
            "Interleukin-15 (IL-15)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-18 binding protein (IL-18BP)",
                "Transforming growth factor-beta 1 (TGF-\u03b21)",
                "Transforming growth factor-beta 2 (TGF-\u03b22)",
                "Transforming growth factor-beta 3 (TGF-\u03b23)",
                "Interleukin-11 (IL-11)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interferon-gamma-induced protein 10 (IP-10)",
                "Monocyte chemoattractant protein-1 (MCP-1)",
                "Soluble TNF receptor type 1 (sTNF-R1)",
                "Soluble TNF receptor type 2 (sTNF-R2)",
                "Interleukin-1 decoy receptor (IL-1 decoy R)",
                "Interleukin-37 (IL-37)",
                "Interleukin-21 (IL-21)",
                "Interleukin-6 (IL-6)",
                "Interleukin-12 (IL-12)",
                "Interleukin-20 (IL-20)",
                "Interleukin-23 (IL-23)",
                "Interleukin-15 (IL-15)"
            ],
            "mismatches": [],
            "true_referents": [
                "Interferon-gamma-induced protein 10 (IP-10)",
                "Interleukin-1 decoy receptor (IL-1 decoy R)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-12 (IL-12)",
                "Interleukin-13 (IL-13)",
                "Interleukin-15 (IL-15)",
                "Interleukin-18 binding protein (IL-18BP)",
                "Interleukin-20 (IL-20)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-4 (IL-4)",
                "Interleukin-6 (IL-6)",
                "Monocyte chemoattractant protein-1 (MCP-1)",
                "Soluble TNF receptor type 1 (sTNF-R1)",
                "Soluble TNF receptor type 2 (sTNF-R2)",
                "Transforming growth factor-beta 1 (TGF-\u03b21)",
                "Transforming growth factor-beta 2 (TGF-\u03b22)",
                "Transforming growth factor-beta 3 (TGF-\u03b23)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Transforming growth factor-beta (TGF-\u03b2)",
            "Interleukin-11 (IL-11)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Resistin-like molecule alpha (RELM\u03b1)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "Interleukin-9 (IL-9)",
            "Macrophage migration inhibitory factor (MIF)",
            "Interleukin-23 (IL-23)",
            "Interleukin-2 (IL-2)",
            "Interleukin-6 (IL-6) - in certain contexts",
            "Interleukin-17A (IL-17A) - in certain contexts",
            "Interleukin-21 (IL-21)",
            "Interleukin-23 (IL-23) - in certain contexts",
            "Interleukin-17F (IL-17F) - in certain contexts",
            "Interleukin-17E (IL-17E, also known as IL-25)",
            "Interleukin-29 (IL-29)",
            "Interleukin-28A (IL-28A)",
            "Interleukin-28B (IL-28B)",
            "Interleukin-30 (IL-30)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Interleukin-11 (IL-11)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Resistin-like molecule alpha (RELM\u03b1)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-9 (IL-9)",
                "Macrophage migration inhibitory factor (MIF)",
                "Interleukin-23 (IL-23)",
                "Interleukin-2 (IL-2)",
                "Interleukin-6 (IL-6) - in certain contexts",
                "Interleukin-17A (IL-17A) - in certain contexts",
                "Interleukin-21 (IL-21)",
                "Interleukin-23 (IL-23) - in certain contexts",
                "Interleukin-17F (IL-17F) - in certain contexts",
                "Interleukin-17E (IL-17E, also known as IL-25)",
                "Interleukin-29 (IL-29)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)"
            ],
            "mismatches": [
                "Interleukin-30 (IL-30)"
            ],
            "true_referents": [
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-17A (IL-17A) - in certain contexts",
                "Interleukin-17E (IL-17E, also known as IL-25)",
                "Interleukin-17F (IL-17F) - in certain contexts",
                "Interleukin-2 (IL-2)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-23 (IL-23) - in certain contexts",
                "Interleukin-27 (IL-27)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)",
                "Interleukin-29 (IL-29)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-4 (IL-4)",
                "Interleukin-6 (IL-6) - in certain contexts",
                "Interleukin-9 (IL-9)",
                "Macrophage migration inhibitory factor (MIF)",
                "Resistin-like molecule alpha (RELM\u03b1)",
                "Transforming growth factor-beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Transforming growth factor-beta (TGF-\u03b2)",
            "Interleukin-11 (IL-11)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-27 (IL-27)",
            "Interleukin-33 (IL-33)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Interleukin-39 (IL-39)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interferon-gamma (IFN-\u03b3)",
            "Interferon-omega (IFN-\u03c9)",
            "Interferon-lambda (IFN-\u03bb)",
            "Interferon-tau (IFN-\u03c4)",
            "Macrophage migration inhibitory factor (MIF)",
            "Vascular endothelial growth factor (VEGF)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Interleukin-11 (IL-11)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interferon-omega (IFN-\u03c9)",
                "Interferon-lambda (IFN-\u03bb)",
                "Interferon-tau (IFN-\u03c4)",
                "Macrophage migration inhibitory factor (MIF)",
                "Vascular endothelial growth factor (VEGF)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)"
            ],
            "mismatches": [],
            "true_referents": [
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interferon-gamma (IFN-\u03b3)",
                "Interferon-lambda (IFN-\u03bb)",
                "Interferon-omega (IFN-\u03c9)",
                "Interferon-tau (IFN-\u03c4)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-39 (IL-39)",
                "Interleukin-4 (IL-4)",
                "Macrophage migration inhibitory factor (MIF)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Vascular endothelial growth factor (VEGF)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-1 Receptor Antagonist (IL-1Ra)",
            "Interleukin-10 (IL-10)",
            "Transforming Growth Factor beta 1 (TGF-\u03b21)",
            "Transforming Growth Factor beta 2 (TGF-\u03b22)",
            "Transforming Growth Factor beta 3 (TGF-\u03b23)",
            "Interleukin-35 (IL-35)",
            "Lipoxin A4 (LXA4)",
            "Resolvin E1 (RvE1)",
            "Protectin D1 (PD1)",
            "Maresin 1 (MaR1)",
            "Prostaglandin D2 (PGD2)",
            "Interleukin-12 (IL-12) - induces IFN-\u03b3 production which has anti-inflammatory effects in certain contexts",
            "Fibroblast Growth Factor 21 (FGF21)",
            "Heme Oxygenase-1 (HO-1) - induces IL-10 production",
            "Cortisol (hydrocortisone) - a glucocorticoid with broad anti-inflammatory effects",
            "Resolvin D1 (RvD1)",
            "Resolvin D2 (RvD2)",
            "Protectin DX (PDX)",
            "Maresin 2 (MaR2)",
            "Interleukin-27 (IL-27)",
            "Adenosine",
            "Nitric Oxide (NO) - in specific contexts",
            "Galectin-1 (Gal-1)",
            "Suppressor of Cytokine Signaling 3 (SOCS3) - induced by IL-10"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Transforming Growth Factor beta 1 (TGF-\u03b21)",
                "Transforming Growth Factor beta 2 (TGF-\u03b22)",
                "Transforming Growth Factor beta 3 (TGF-\u03b23)",
                "Interleukin-35 (IL-35)",
                "Lipoxin A4 (LXA4)",
                "Resolvin E1 (RvE1)",
                "Protectin D1 (PD1)",
                "Maresin 1 (MaR1)",
                "Prostaglandin D2 (PGD2)",
                "Interleukin-12 (IL-12) - induces IFN-\u03b3 production which has anti-inflammatory effects in certain contexts",
                "Fibroblast Growth Factor 21 (FGF21)",
                "Heme Oxygenase-1 (HO-1) - induces IL-10 production",
                "Cortisol (hydrocortisone) - a glucocorticoid with broad anti-inflammatory effects",
                "Resolvin D1 (RvD1)",
                "Resolvin D2 (RvD2)",
                "Protectin DX (PDX)",
                "Maresin 2 (MaR2)",
                "Interleukin-27 (IL-27)",
                "Adenosine",
                "Nitric Oxide (NO) - in specific contexts",
                "Galectin-1 (Gal-1)",
                "Suppressor of Cytokine Signaling 3 (SOCS3) - induced by IL-10"
            ],
            "mismatches": [],
            "true_referents": [
                "Adenosine",
                "Cortisol (hydrocortisone) - a glucocorticoid with broad anti-inflammatory effects",
                "Fibroblast Growth Factor 21 (FGF21)",
                "Galectin-1 (Gal-1)",
                "Heme Oxygenase-1 (HO-1) - induces IL-10 production",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-12 (IL-12) - induces IFN-\u03b3 production which has anti-inflammatory effects in certain contexts",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Lipoxin A4 (LXA4)",
                "Maresin 1 (MaR1)",
                "Maresin 2 (MaR2)",
                "Nitric Oxide (NO) - in specific contexts",
                "Prostaglandin D2 (PGD2)",
                "Protectin D1 (PD1)",
                "Protectin DX (PDX)",
                "Resolvin D1 (RvD1)",
                "Resolvin D2 (RvD2)",
                "Resolvin E1 (RvE1)",
                "Suppressor of Cytokine Signaling 3 (SOCS3) - induced by IL-10",
                "Transforming Growth Factor beta 1 (TGF-\u03b21)",
                "Transforming Growth Factor beta 2 (TGF-\u03b22)",
                "Transforming Growth Factor beta 3 (TGF-\u03b23)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Transforming Growth Factor Beta (TGF-\u03b2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 Receptor Antagonist (IL-1Ra)",
            "Interleukin-11 (IL-11)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Interleukin-9 (IL-9)",
            "Interleukin-21 (IL-21)",
            "Interleukin-23 (IL-23)",
            "Interleukin-28A (IL-28A)",
            "Interleukin-28B (IL-28B)",
            "Interleukin-29 (IL-29)",
            "Interleukin-31 (IL-31)",
            "Interleukin-32 (IL-32)",
            "Interleukin-33 (IL-33)",
            "Interleukin-34 (IL-34)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Transforming Growth Factor Beta (TGF-\u03b2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-11 (IL-11)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-9 (IL-9)",
                "Interleukin-21 (IL-21)",
                "Interleukin-23 (IL-23)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)",
                "Interleukin-29 (IL-29)",
                "Interleukin-31 (IL-31)",
                "Interleukin-32 (IL-32)",
                "Interleukin-33 (IL-33)",
                "Interleukin-34 (IL-34)"
            ],
            "mismatches": [],
            "true_referents": [
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-21 (IL-21)",
                "Interleukin-22 (IL-22)",
                "Interleukin-23 (IL-23)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)",
                "Interleukin-29 (IL-29)",
                "Interleukin-31 (IL-31)",
                "Interleukin-32 (IL-32)",
                "Interleukin-33 (IL-33)",
                "Interleukin-34 (IL-34)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Interleukin-9 (IL-9)",
                "Transforming Growth Factor Beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Transforming growth factor-beta (TGF-\u03b2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 receptor antagonist (IL-1ra)",
            "Interleukin-6 (IL-6)",
            "Interleukin-11 (IL-11)",
            "Interleukin-35 (IL-35)",
            "Interleukin-9 (IL-9)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "Interleukin-33 (IL-33)",
            "Interleukin-25 (IL-25)",
            "Macrophage inhibitory factor (MIF)",
            "Vascular endothelial growth factor (VEGF)",
            "Prostaglandin E2 (PGE2)",
            "Resistin (RETN)",
            "Adiponectin (ADIPOQ)",
            "Nerve growth factor (NGF)",
            "Hepatocyte growth factor (HGF)",
            "Platelet-derived growth factor (PDGF)",
            "Fibroblast growth factor (FGF)",
            "Bone morphogenetic protein (BMP)",
            "Insulin-like growth factor (IGF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 receptor antagonist (IL-1ra)",
                "Interleukin-6 (IL-6)",
                "Interleukin-11 (IL-11)",
                "Interleukin-35 (IL-35)",
                "Interleukin-9 (IL-9)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-25 (IL-25)",
                "Macrophage inhibitory factor (MIF)",
                "Vascular endothelial growth factor (VEGF)",
                "Prostaglandin E2 (PGE2)",
                "Resistin (RETN)",
                "Adiponectin (ADIPOQ)",
                "Nerve growth factor (NGF)",
                "Hepatocyte growth factor (HGF)",
                "Platelet-derived growth factor (PDGF)",
                "Fibroblast growth factor (FGF)",
                "Bone morphogenetic protein (BMP)"
            ],
            "mismatches": [
                "Insulin-like growth factor (IGF)"
            ],
            "true_referents": [
                "Adiponectin (ADIPOQ)",
                "Bone morphogenetic protein (BMP)",
                "Fibroblast growth factor (FGF)",
                "Hepatocyte growth factor (HGF)",
                "Interleukin-1 receptor antagonist (IL-1ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-25 (IL-25)",
                "Interleukin-27 (IL-27)",
                "Interleukin-33 (IL-33)",
                "Interleukin-35 (IL-35)",
                "Interleukin-4 (IL-4)",
                "Interleukin-6 (IL-6)",
                "Interleukin-9 (IL-9)",
                "Macrophage inhibitory factor (MIF)",
                "Nerve growth factor (NGF)",
                "Platelet-derived growth factor (PDGF)",
                "Prostaglandin E2 (PGE2)",
                "Resistin (RETN)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Vascular endothelial growth factor (VEGF)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 4 (IL-4)",
            "Interleukin 10 (IL-10)",
            "Interleukin 11 (IL-11)",
            "Interleukin 13 (IL-13)",
            "Interleukin 1 Receptor Antagonist (IL-1Ra)",
            "Transforming Growth Factor Beta (TGF-\u03b2)",
            "Interleukin 35 (IL-35)",
            "Interleukin 37 (IL-37)",
            "Interleukin 38 (IL-38)",
            "Interferon Beta (IFN-\u03b2)",
            "Interleukin 19 (IL-19)",
            "Interleukin 20 (IL-20)",
            "Interleukin 22 (IL-22)",
            "Interleukin 24 (IL-24)",
            "Interleukin 26 (IL-26)",
            "Interleukin 27 (IL-27)",
            "Interleukin 28A (IL-28A)",
            "Interleukin 28B (IL-28B)",
            "Interleukin 29 (IL-29)",
            "Interleukin 33 (IL-33)",
            "Interleukin 36 Receptor Antagonist (IL-36Ra)",
            "Interleukin 1 Family, Member 10 (IL1F10)",
            "Interleukin 1 Family, Member 9 (IL1F9)",
            "Interleukin 1 Family, Member 8 (IL1F8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin 4 (IL-4)",
                "Interleukin 10 (IL-10)",
                "Interleukin 11 (IL-11)",
                "Interleukin 13 (IL-13)",
                "Interleukin 1 Receptor Antagonist (IL-1Ra)",
                "Transforming Growth Factor Beta (TGF-\u03b2)",
                "Interleukin 35 (IL-35)",
                "Interleukin 37 (IL-37)",
                "Interleukin 38 (IL-38)",
                "Interferon Beta (IFN-\u03b2)",
                "Interleukin 19 (IL-19)",
                "Interleukin 20 (IL-20)",
                "Interleukin 22 (IL-22)",
                "Interleukin 24 (IL-24)",
                "Interleukin 26 (IL-26)",
                "Interleukin 27 (IL-27)",
                "Interleukin 28A (IL-28A)",
                "Interleukin 28B (IL-28B)",
                "Interleukin 29 (IL-29)",
                "Interleukin 33 (IL-33)",
                "Interleukin 36 Receptor Antagonist (IL-36Ra)",
                "Interleukin 1 Family, Member 10 (IL1F10)",
                "Interleukin 1 Family, Member 9 (IL1F9)",
                "Interleukin 1 Family, Member 8 (IL1F8)"
            ],
            "mismatches": [],
            "true_referents": [
                "Interferon Beta (IFN-\u03b2)",
                "Interleukin 1 Family, Member 10 (IL1F10)",
                "Interleukin 1 Family, Member 8 (IL1F8)",
                "Interleukin 1 Family, Member 9 (IL1F9)",
                "Interleukin 1 Receptor Antagonist (IL-1Ra)",
                "Interleukin 10 (IL-10)",
                "Interleukin 11 (IL-11)",
                "Interleukin 13 (IL-13)",
                "Interleukin 19 (IL-19)",
                "Interleukin 20 (IL-20)",
                "Interleukin 22 (IL-22)",
                "Interleukin 24 (IL-24)",
                "Interleukin 26 (IL-26)",
                "Interleukin 27 (IL-27)",
                "Interleukin 28A (IL-28A)",
                "Interleukin 28B (IL-28B)",
                "Interleukin 29 (IL-29)",
                "Interleukin 33 (IL-33)",
                "Interleukin 35 (IL-35)",
                "Interleukin 36 Receptor Antagonist (IL-36Ra)",
                "Interleukin 37 (IL-37)",
                "Interleukin 38 (IL-38)",
                "Interleukin 4 (IL-4)",
                "Transforming Growth Factor Beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Transforming growth factor beta (TGF-\u03b2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 receptor antagonist (IL-1ra)",
            "Interleukin-11 (IL-11)",
            "Interleukin-37 (IL-37)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interleukin-38 (IL-38)",
            "Interleukin-19 (IL-19)",
            "Interleukin-24 (IL-24)",
            "Interleukin-1 receptor type 2 (IL-1R2)",
            "Interleukin-1 receptor type 4 (IL-1R4)",
            "Interleukin-1 receptor type 7 (IL-1R7)",
            "Interleukin-1 receptor type 9 (IL-1R9)",
            "Interleukin-1 receptor type 10 (IL-1R10)",
            "Interleukin-1 receptor type 11 (IL-1R11)",
            "Interleukin-1 receptor type 13 (IL-1R13)",
            "Interleukin-1 receptor type 18 (IL-1R18)",
            "Interleukin-1 receptor type 19 (IL-1R19)",
            "Interleukin-1 receptor type 20 (IL-1R20)",
            "Interleukin-1 receptor type 21 (IL-1R21)",
            "Interleukin-1 receptor type 22 (IL-1R22)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Transforming growth factor beta (TGF-\u03b2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 receptor antagonist (IL-1ra)",
                "Interleukin-11 (IL-11)",
                "Interleukin-37 (IL-37)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-38 (IL-38)",
                "Interleukin-19 (IL-19)",
                "Interleukin-24 (IL-24)",
                "Interleukin-1 receptor type 2 (IL-1R2)",
                "Interleukin-1 receptor type 4 (IL-1R4)",
                "Interleukin-1 receptor type 7 (IL-1R7)",
                "Interleukin-1 receptor type 9 (IL-1R9)",
                "Interleukin-1 receptor type 10 (IL-1R10)",
                "Interleukin-1 receptor type 11 (IL-1R11)",
                "Interleukin-1 receptor type 13 (IL-1R13)",
                "Interleukin-1 receptor type 18 (IL-1R18)",
                "Interleukin-1 receptor type 19 (IL-1R19)",
                "Interleukin-1 receptor type 20 (IL-1R20)",
                "Interleukin-1 receptor type 21 (IL-1R21)",
                "Interleukin-1 receptor type 22 (IL-1R22)"
            ],
            "mismatches": [],
            "true_referents": [
                "Interleukin-1 receptor antagonist (IL-1ra)",
                "Interleukin-1 receptor type 10 (IL-1R10)",
                "Interleukin-1 receptor type 11 (IL-1R11)",
                "Interleukin-1 receptor type 13 (IL-1R13)",
                "Interleukin-1 receptor type 18 (IL-1R18)",
                "Interleukin-1 receptor type 19 (IL-1R19)",
                "Interleukin-1 receptor type 2 (IL-1R2)",
                "Interleukin-1 receptor type 20 (IL-1R20)",
                "Interleukin-1 receptor type 21 (IL-1R21)",
                "Interleukin-1 receptor type 22 (IL-1R22)",
                "Interleukin-1 receptor type 4 (IL-1R4)",
                "Interleukin-1 receptor type 7 (IL-1R7)",
                "Interleukin-1 receptor type 9 (IL-1R9)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Transforming growth factor beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Interleukin-11 (IL-11)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-26 (IL-26)",
            "Interferon-beta (IFN-\u03b2)",
            "Interleukin-28A (IFN-\u03bb2)",
            "Interleukin-28B (IFN-\u03bb3)",
            "Interleukin-29 (IFN-\u03bb1)",
            "Soluble TNF receptor 1 (sTNFR1)",
            "Soluble TNF receptor 2 (sTNFR2)",
            "Soluble IL-1 receptor type II (sIL-1R2)",
            "Soluble IL-6 receptor (sIL-6R)",
            "Soluble gp130"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-11 (IL-11)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-28A (IFN-\u03bb2)",
                "Interleukin-28B (IFN-\u03bb3)",
                "Interleukin-29 (IFN-\u03bb1)",
                "Soluble TNF receptor 1 (sTNFR1)",
                "Soluble TNF receptor 2 (sTNFR2)",
                "Soluble IL-1 receptor type II (sIL-1R2)",
                "Soluble IL-6 receptor (sIL-6R)",
                "Soluble gp130"
            ],
            "mismatches": [],
            "true_referents": [
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28A (IFN-\u03bb2)",
                "Interleukin-28B (IFN-\u03bb3)",
                "Interleukin-29 (IFN-\u03bb1)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Soluble IL-1 receptor type II (sIL-1R2)",
                "Soluble IL-6 receptor (sIL-6R)",
                "Soluble TNF receptor 1 (sTNFR1)",
                "Soluble TNF receptor 2 (sTNFR2)",
                "Soluble gp130",
                "Transforming Growth Factor-beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Transforming growth factor-beta (TGF-\u03b2)",
            "Interleukin-4 (IL-4)",
            "Interleukin-13 (IL-13)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interleukin-1 receptor antagonist (IL-1Ra)",
            "Interleukin-22 (IL-22)",
            "Interleukin-27 (IL-27)",
            "Interleukin-38 (IL-38)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Growth differentiation factor 15 (GDF15)",
            "Melanocortin (\u03b1-MSH)",
            "Annexin A1",
            "Lipoxin A4",
            "Resolvin D1",
            "Resolvin E1",
            "Protectin D1",
            "Maresin 1",
            "Galectin-1",
            "Vasoactive intestinal peptide (VIP)",
            "Thymosin beta-4",
            "Urocortin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Interleukin-4 (IL-4)",
                "Interleukin-13 (IL-13)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-38 (IL-38)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Growth differentiation factor 15 (GDF15)",
                "Melanocortin (\u03b1-MSH)",
                "Annexin A1",
                "Lipoxin A4",
                "Resolvin D1",
                "Resolvin E1",
                "Protectin D1",
                "Maresin 1",
                "Galectin-1",
                "Vasoactive intestinal peptide (VIP)",
                "Thymosin beta-4",
                "Urocortin"
            ],
            "mismatches": [],
            "true_referents": [
                "Annexin A1",
                "Galectin-1",
                "Growth differentiation factor 15 (GDF15)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-13 (IL-13)",
                "Interleukin-22 (IL-22)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Lipoxin A4",
                "Maresin 1",
                "Melanocortin (\u03b1-MSH)",
                "Protectin D1",
                "Resolvin D1",
                "Resolvin E1",
                "Thymosin beta-4",
                "Transforming growth factor-beta (TGF-\u03b2)",
                "Urocortin",
                "Vasoactive intestinal peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-4 (IL-4)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-13 (IL-13)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-27 (IL-27)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interleukin-38 (IL-38)",
            "Transforming growth factor-beta 1 (TGF-\u03b21)",
            "Transforming growth factor-beta 2 (TGF-\u03b22)",
            "Transforming growth factor-beta 3 (TGF-\u03b23)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Interleukin-1 receptor antagonist (IL-1RA)",
            "Soluble tumor necrosis factor receptor 1 (sTNFR1)",
            "Soluble tumor necrosis factor receptor 2 (sTNFR2)",
            "Soluble interleukin-6 receptor (sIL-6R)",
            "Soluble glycoprotein 130 (sgp130)",
            "Chemokine (C-C motif) ligand 18 (CCL18)",
            "Interleukin-25 (IL-25)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-4 (IL-4)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Transforming growth factor-beta 1 (TGF-\u03b21)",
                "Transforming growth factor-beta 2 (TGF-\u03b22)",
                "Transforming growth factor-beta 3 (TGF-\u03b23)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1RA)",
                "Soluble tumor necrosis factor receptor 1 (sTNFR1)",
                "Soluble tumor necrosis factor receptor 2 (sTNFR2)",
                "Soluble interleukin-6 receptor (sIL-6R)",
                "Soluble glycoprotein 130 (sgp130)",
                "Chemokine (C-C motif) ligand 18 (CCL18)",
                "Interleukin-25 (IL-25)"
            ],
            "mismatches": [],
            "true_referents": [
                "Chemokine (C-C motif) ligand 18 (CCL18)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-1 receptor antagonist (IL-1RA)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-25 (IL-25)",
                "Interleukin-27 (IL-27)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Soluble glycoprotein 130 (sgp130)",
                "Soluble interleukin-6 receptor (sIL-6R)",
                "Soluble tumor necrosis factor receptor 1 (sTNFR1)",
                "Soluble tumor necrosis factor receptor 2 (sTNFR2)",
                "Transforming growth factor-beta 1 (TGF-\u03b21)",
                "Transforming growth factor-beta 2 (TGF-\u03b22)",
                "Transforming growth factor-beta 3 (TGF-\u03b23)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-10 (IL-10)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Interleukin-11 (IL-11)",
            "Interleukin-13 (IL-13)",
            "Interleukin-1 Receptor Antagonist (IL-1Ra)",
            "Interleukin-4 (IL-4)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-26 (IL-26)",
            "Interleukin-27 (IL-27)",
            "Interleukin-28A (IL-28A)",
            "Interleukin-28B (IL-28B)",
            "Interleukin-29 (IL-29)",
            "Interleukin-36Ra (IL-36Ra)",
            "Interleukin-38 (IL-38)",
            "Leukemia Inhibitory Factor (LIF)",
            "Adiponectin",
            "Lipoxin A4",
            "Resolvin E1",
            "Annexin A1"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10 (IL-10)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-4 (IL-4)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)",
                "Interleukin-29 (IL-29)",
                "Interleukin-36Ra (IL-36Ra)",
                "Interleukin-38 (IL-38)",
                "Leukemia Inhibitory Factor (LIF)",
                "Adiponectin",
                "Lipoxin A4",
                "Resolvin E1",
                "Annexin A1"
            ],
            "mismatches": [],
            "true_referents": [
                "Adiponectin",
                "Annexin A1",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28A (IL-28A)",
                "Interleukin-28B (IL-28B)",
                "Interleukin-29 (IL-29)",
                "Interleukin-35 (IL-35)",
                "Interleukin-36Ra (IL-36Ra)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Leukemia Inhibitory Factor (LIF)",
                "Lipoxin A4",
                "Resolvin E1",
                "Transforming Growth Factor-beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin-4 (IL-4)",
            "Interleukin-10 (IL-10)",
            "Interleukin-11 (IL-11)",
            "Interleukin-13 (IL-13)",
            "Transforming Growth Factor-beta (TGF-\u03b2)",
            "Interleukin-1 Receptor Antagonist (IL-1Ra)",
            "Interleukin-35 (IL-35)",
            "Interleukin-37 (IL-37)",
            "Interferon-alpha (IFN-\u03b1)",
            "Interferon-beta (IFN-\u03b2)",
            "Leukemia Inhibitory Factor (LIF)",
            "Oncostatin M (OSM)",
            "Ciliary Neurotrophic Factor (CNTF)",
            "Cardiotrophin-1 (CT-1)",
            "Interleukin-19 (IL-19)",
            "Interleukin-20 (IL-20)",
            "Interleukin-22 (IL-22)",
            "Interleukin-24 (IL-24)",
            "Interleukin-26 (IL-26)",
            "Interleukin-27 (IL-27)",
            "Interleukin-28 (IL-28)",
            "Interleukin-29 (IL-29)",
            "Interleukin-38 (IL-38)",
            "Interleukin-1 Receptor Type 2 (IL-1R2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-4 (IL-4)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Transforming Growth Factor-beta (TGF-\u03b2)",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Leukemia Inhibitory Factor (LIF)",
                "Oncostatin M (OSM)",
                "Ciliary Neurotrophic Factor (CNTF)",
                "Cardiotrophin-1 (CT-1)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28 (IL-28)",
                "Interleukin-29 (IL-29)",
                "Interleukin-38 (IL-38)",
                "Interleukin-1 Receptor Type 2 (IL-1R2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cardiotrophin-1 (CT-1)",
                "Ciliary Neurotrophic Factor (CNTF)",
                "Interferon-alpha (IFN-\u03b1)",
                "Interferon-beta (IFN-\u03b2)",
                "Interleukin-1 Receptor Antagonist (IL-1Ra)",
                "Interleukin-1 Receptor Type 2 (IL-1R2)",
                "Interleukin-10 (IL-10)",
                "Interleukin-11 (IL-11)",
                "Interleukin-13 (IL-13)",
                "Interleukin-19 (IL-19)",
                "Interleukin-20 (IL-20)",
                "Interleukin-22 (IL-22)",
                "Interleukin-24 (IL-24)",
                "Interleukin-26 (IL-26)",
                "Interleukin-27 (IL-27)",
                "Interleukin-28 (IL-28)",
                "Interleukin-29 (IL-29)",
                "Interleukin-35 (IL-35)",
                "Interleukin-37 (IL-37)",
                "Interleukin-38 (IL-38)",
                "Interleukin-4 (IL-4)",
                "Leukemia Inhibitory Factor (LIF)",
                "Oncostatin M (OSM)",
                "Transforming Growth Factor-beta (TGF-\u03b2)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "IL-10 (Interleukin-10)",
            "IL-4 (Interleukin-4)",
            "IL-13 (Interleukin-13)",
            "IL-22 (Interleukin-22)",
            "TGF-\u03b2 (Transforming Growth Factor Beta)",
            "IL-1ra (Interleukin-1 Receptor Antagonist)",
            "IL-10R\u03b1 (Interleukin-10 Receptor Alpha)",
            "IL-10R\u03b2 (Interleukin-10 Receptor Beta)",
            "IL-4R\u03b1 (Interleukin-4 Receptor Alpha)",
            "IL-4R\u03b2 (Interleukin-4 Receptor Beta)",
            "IL-13R\u03b11 (Interleukin-13 Receptor Alpha 1)",
            "IL-13R\u03b12 (Interleukin-13 Receptor Alpha 2)",
            "IL-22R\u03b11 (Interleukin-22 Receptor Alpha 1)",
            "IL-22R\u03b12 (Interleukin-22 Receptor Alpha 2)",
            "IL-27 (Interleukin-27)",
            "IL-35 (Interleukin-35)",
            "IL-39 (Interleukin-39)",
            "IL-37 (Interleukin-37)",
            "IL-38 (Interleukin-38)",
            "IL-40 (Interleukin-40)",
            "IL-31 (Interleukin-31)",
            "IL-33 (Interleukin-33)",
            "IL-36\u03b3 (Interleukin-36 Gamma)",
            "IL-36\u03b2 (Interleukin-36 Beta)",
            "IL-36\u03b1 (Interleukin-36 Alpha)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "IL-10 (Interleukin-10)",
                "IL-4 (Interleukin-4)",
                "IL-13 (Interleukin-13)",
                "IL-22 (Interleukin-22)",
                "TGF-\u03b2 (Transforming Growth Factor Beta)",
                "IL-1ra (Interleukin-1 Receptor Antagonist)",
                "IL-10R\u03b1 (Interleukin-10 Receptor Alpha)",
                "IL-10R\u03b2 (Interleukin-10 Receptor Beta)",
                "IL-4R\u03b1 (Interleukin-4 Receptor Alpha)",
                "IL-4R\u03b2 (Interleukin-4 Receptor Beta)",
                "IL-13R\u03b11 (Interleukin-13 Receptor Alpha 1)",
                "IL-13R\u03b12 (Interleukin-13 Receptor Alpha 2)",
                "IL-22R\u03b11 (Interleukin-22 Receptor Alpha 1)",
                "IL-22R\u03b12 (Interleukin-22 Receptor Alpha 2)",
                "IL-27 (Interleukin-27)",
                "IL-35 (Interleukin-35)",
                "IL-39 (Interleukin-39)",
                "IL-37 (Interleukin-37)",
                "IL-38 (Interleukin-38)",
                "IL-31 (Interleukin-31)",
                "IL-33 (Interleukin-33)",
                "IL-36\u03b3 (Interleukin-36 Gamma)",
                "IL-36\u03b2 (Interleukin-36 Beta)"
            ],
            "mismatches": [
                "IL-40 (Interleukin-40)",
                "IL-36\u03b1 (Interleukin-36 Alpha)"
            ],
            "true_referents": [
                "IL-10 (Interleukin-10)",
                "IL-10R\u03b1 (Interleukin-10 Receptor Alpha)",
                "IL-10R\u03b2 (Interleukin-10 Receptor Beta)",
                "IL-13 (Interleukin-13)",
                "IL-13R\u03b11 (Interleukin-13 Receptor Alpha 1)",
                "IL-13R\u03b12 (Interleukin-13 Receptor Alpha 2)",
                "IL-1ra (Interleukin-1 Receptor Antagonist)",
                "IL-22 (Interleukin-22)",
                "IL-22R\u03b11 (Interleukin-22 Receptor Alpha 1)",
                "IL-22R\u03b12 (Interleukin-22 Receptor Alpha 2)",
                "IL-27 (Interleukin-27)",
                "IL-31 (Interleukin-31)",
                "IL-33 (Interleukin-33)",
                "IL-35 (Interleukin-35)",
                "IL-36\u03b2 (Interleukin-36 Beta)",
                "IL-36\u03b3 (Interleukin-36 Gamma)",
                "IL-37 (Interleukin-37)",
                "IL-38 (Interleukin-38)",
                "IL-39 (Interleukin-39)",
                "IL-4 (Interleukin-4)",
                "IL-40 (Interleukin-40)",
                "IL-4R\u03b1 (Interleukin-4 Receptor Alpha)",
                "IL-4R\u03b2 (Interleukin-4 Receptor Beta)",
                "TGF-\u03b2 (Transforming Growth Factor Beta)"
            ],
            "TP": 23,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            "Interleukin 10 (IL-10)",
            "Interleukin 35 (IL-35)",
            "Transforming Growth Factor Beta (TGF-\u03b2)",
            "Interleukin 22 (IL-22)",
            "Interleukin 37 (IL-37)",
            "Interleukin 19 (IL-19)",
            "Interleukin 24 (IL-24)",
            "Interleukin 26 (IL-26)",
            "Heart-type Fatty Acid-binding Protein (H-FABP)",
            "Interleukin 27 (IL-27)",
            "Soluble CD14 (sCD14)",
            "Interleukin 1 Receptor Antagonist (IL-1Ra)",
            "Interleukin 36 (IL-36Ra)",
            "Tumor Necrosis Factor-inducible Gene 6 Protein (TSG-6)",
            "Interleukin 38 (IL-38)",
            "Interleukin 40 (IL-40)",
            "Galectin-1",
            "Galectin-9",
            "Secreted Protein Acidic and Rich in Cysteine (SPARC)",
            "Chitinase-3-Like Protein 1 (CHI3L1)",
            "Placenta Growth Factor (PlGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Macrophage Migration Inhibitory Factor (MIF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin 10 (IL-10)",
                "Interleukin 35 (IL-35)",
                "Transforming Growth Factor Beta (TGF-\u03b2)",
                "Interleukin 22 (IL-22)",
                "Interleukin 37 (IL-37)",
                "Interleukin 19 (IL-19)",
                "Interleukin 24 (IL-24)",
                "Interleukin 26 (IL-26)",
                "Heart-type Fatty Acid-binding Protein (H-FABP)",
                "Interleukin 27 (IL-27)",
                "Soluble CD14 (sCD14)",
                "Interleukin 1 Receptor Antagonist (IL-1Ra)",
                "Interleukin 36 (IL-36Ra)",
                "Tumor Necrosis Factor-inducible Gene 6 Protein (TSG-6)",
                "Interleukin 38 (IL-38)",
                "Interleukin 40 (IL-40)",
                "Galectin-1",
                "Galectin-9",
                "Secreted Protein Acidic and Rich in Cysteine (SPARC)",
                "Chitinase-3-Like Protein 1 (CHI3L1)",
                "Placenta Growth Factor (PlGF)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Macrophage Migration Inhibitory Factor (MIF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Chitinase-3-Like Protein 1 (CHI3L1)",
                "Galectin-1",
                "Galectin-9",
                "Heart-type Fatty Acid-binding Protein (H-FABP)",
                "Interleukin 1 Receptor Antagonist (IL-1Ra)",
                "Interleukin 10 (IL-10)",
                "Interleukin 19 (IL-19)",
                "Interleukin 22 (IL-22)",
                "Interleukin 24 (IL-24)",
                "Interleukin 26 (IL-26)",
                "Interleukin 27 (IL-27)",
                "Interleukin 35 (IL-35)",
                "Interleukin 36 (IL-36Ra)",
                "Interleukin 37 (IL-37)",
                "Interleukin 38 (IL-38)",
                "Interleukin 40 (IL-40)",
                "Macrophage Migration Inhibitory Factor (MIF)",
                "Placenta Growth Factor (PlGF)",
                "Secreted Protein Acidic and Rich in Cysteine (SPARC)",
                "Soluble CD14 (sCD14)",
                "Transforming Growth Factor Beta (TGF-\u03b2)",
                "Tumor Necrosis Factor-inducible Gene 6 Protein (TSG-6)",
                "Vascular Endothelial Growth Factor (VEGF)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "anti-inflammatory cytokine",
        "domain": "biology",
        "response": [
            {
                "Referent": "Interleukin-10 (IL-10)",
                "Canonical Name": "Interleukin-10"
            },
            {
                "Referent": "Interleukin-4 (IL-4)",
                "Canonical Name": "Interleukin-4"
            },
            {
                "Referent": "Interleukin-13 (IL-13)",
                "Canonical Name": "Interleukin-13"
            },
            {
                "Referent": "Interleukin-35 (IL-35)",
                "Canonical Name": "Interleukin-35"
            },
            {
                "Referent": "Interleukin-11 (IL-11)",
                "Canonical Name": "Interleukin-11"
            },
            {
                "Referent": "Interleukin-37 (IL-37)",
                "Canonical Name": "Interleukin-37"
            },
            {
                "Referent": "Interleukin-27 (IL-27)",
                "Canonical Name": "Interleukin-27"
            },
            {
                "Referent": "Interleukin-21 (IL-21)",
                "Canonical Name": "Interleukin-21"
            },
            {
                "Referent": "Interleukin-22 (IL-22)",
                "Canonical Name": "Interleukin-22"
            },
            {
                "Referent": "Interleukin-28A (IL-28A)",
                "Canonical Name": "Interleukin-28A"
            },
            {
                "Referent": "Interleukin-28B (IL-28B)",
                "Canonical Name": "Interleukin-28B"
            },
            {
                "Referent": "Interleukin-29 (IL-29)",
                "Canonical Name": "Interleukin-29"
            },
            {
                "Referent": "Interleukin-38 (IL-38)",
                "Canonical Name": "Interleukin-38"
            },
            {
                "Referent": "Interleukin-31 (IL-31)",
                "Canonical Name": "Interleukin-31"
            },
            {
                "Referent": "Interleukin-34 (IL-34)",
                "Canonical Name": "Interleukin-34"
            },
            {
                "Referent": "Interleukin-33 (IL-33)",
                "Canonical Name": "Interleukin-33"
            },
            {
                "Referent": "Interleukin-32 (IL-32)",
                "Canonical Name": "Interleukin-32"
            },
            {
                "Referent": "Interleukin-12 (IL-12)",
                "Canonical Name": "Interleukin-12"
            },
            {
                "Referent": "Interleukin-17A (IL-17A)",
                "Canonical Name": "Interleukin-17A"
            },
            {
                "Referent": "Interleukin-17F (IL-17F)",
                "Canonical Name": "Interleukin-17F"
            },
            {
                "Referent": "Interleukin-23 (IL-23)",
                "Canonical Name": "Interleukin-23"
            },
            {
                "Referent": "Interleukin-18 (IL-18)",
                "Canonical Name": "Interleukin-18"
            },
            {
                "Referent": "Interleukin-20 (IL-20)",
                "Canonical Name": "Interleukin-20"
            },
            {
                "Referent": "Interleukin-19 (IL-19)",
                "Canonical Name": "Interleukin-19"
            },
            {
                "Referent": "Interleukin-25 (IL-25)",
                "Canonical Name": "Interleukin-25"
            },
            {
                "Referent": "Interleukin-1 (IL-1)",
                "Canonical Name": "Interleukin-1"
            },
            {
                "Referent": "Interleukin-1Ra (IL-1Ra)",
                "Canonical Name": "Interleukin-1Ra"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Interleukin-10",
                "Interleukin-4",
                "Interleukin-13",
                "Interleukin-35",
                "Interleukin-11",
                "Interleukin-37",
                "Interleukin-27",
                "Interleukin-21",
                "Interleukin-22",
                "Interleukin-28A",
                "Interleukin-28B",
                "Interleukin-29",
                "Interleukin-38",
                "Interleukin-31",
                "Interleukin-34",
                "Interleukin-33",
                "Interleukin-32",
                "Interleukin-12",
                "Interleukin-17A",
                "Interleukin-17F",
                "Interleukin-23",
                "Interleukin-18",
                "Interleukin-20",
                "Interleukin-19"
            ],
            "mismatches": [
                "Interleukin-25",
                "Interleukin-1",
                "Interleukin-1Ra"
            ],
            "true_referents": [
                "{\"Referent\": \"Interleukin-10 (IL-10)\", \"Canonical Name\": \"Interleukin-10\"}",
                "{\"Referent\": \"Interleukin-11 (IL-11)\", \"Canonical Name\": \"Interleukin-11\"}",
                "{\"Referent\": \"Interleukin-12 (IL-12)\", \"Canonical Name\": \"Interleukin-12\"}",
                "{\"Referent\": \"Interleukin-13 (IL-13)\", \"Canonical Name\": \"Interleukin-13\"}",
                "{\"Referent\": \"Interleukin-17A (IL-17A)\", \"Canonical Name\": \"Interleukin-17A\"}",
                "{\"Referent\": \"Interleukin-17F (IL-17F)\", \"Canonical Name\": \"Interleukin-17F\"}",
                "{\"Referent\": \"Interleukin-18 (IL-18)\", \"Canonical Name\": \"Interleukin-18\"}",
                "{\"Referent\": \"Interleukin-19 (IL-19)\", \"Canonical Name\": \"Interleukin-19\"}",
                "{\"Referent\": \"Interleukin-20 (IL-20)\", \"Canonical Name\": \"Interleukin-20\"}",
                "{\"Referent\": \"Interleukin-21 (IL-21)\", \"Canonical Name\": \"Interleukin-21\"}",
                "{\"Referent\": \"Interleukin-22 (IL-22)\", \"Canonical Name\": \"Interleukin-22\"}",
                "{\"Referent\": \"Interleukin-23 (IL-23)\", \"Canonical Name\": \"Interleukin-23\"}",
                "{\"Referent\": \"Interleukin-27 (IL-27)\", \"Canonical Name\": \"Interleukin-27\"}",
                "{\"Referent\": \"Interleukin-28A (IL-28A)\", \"Canonical Name\": \"Interleukin-28A\"}",
                "{\"Referent\": \"Interleukin-28B (IL-28B)\", \"Canonical Name\": \"Interleukin-28B\"}",
                "{\"Referent\": \"Interleukin-29 (IL-29)\", \"Canonical Name\": \"Interleukin-29\"}",
                "{\"Referent\": \"Interleukin-31 (IL-31)\", \"Canonical Name\": \"Interleukin-31\"}",
                "{\"Referent\": \"Interleukin-32 (IL-32)\", \"Canonical Name\": \"Interleukin-32\"}",
                "{\"Referent\": \"Interleukin-33 (IL-33)\", \"Canonical Name\": \"Interleukin-33\"}",
                "{\"Referent\": \"Interleukin-34 (IL-34)\", \"Canonical Name\": \"Interleukin-34\"}",
                "{\"Referent\": \"Interleukin-35 (IL-35)\", \"Canonical Name\": \"Interleukin-35\"}",
                "{\"Referent\": \"Interleukin-37 (IL-37)\", \"Canonical Name\": \"Interleukin-37\"}",
                "{\"Referent\": \"Interleukin-38 (IL-38)\", \"Canonical Name\": \"Interleukin-38\"}",
                "{\"Referent\": \"Interleukin-4 (IL-4)\", \"Canonical Name\": \"Interleukin-4\"}"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    }
]