[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Cholecystokinin (CCK)",
            "Gastrin",
            "Secretin",
            "Motilin",
            "Gastric inhibitory polypeptide (GIP)",
            "Vasoactive intestinal peptide (VIP)",
            "Glucagon",
            "Somatostatin",
            "Orexin",
            "Peptide YY (PYY)",
            "Neurotensin",
            "Substance P",
            "Ghrelin",
            "Pancreatic polypeptide",
            "Cholera toxin (Note: While not a naturally occurring hormone, it mimics the effect of certain gastrointestinal hormones)",
            "Enteroglucagon",
            "Enterostatin",
            "Corticotropin-releasing hormone (CRH)",
            "Vasoactive intestinal peptide (VIP) - note different regulatory functions in context",
            "Calcitonin gene-related peptide (CGRP)",
            "Galanin",
            "Bombesin",
            "Neuropeptide Y (NPY)",
            "Enkephalin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cholecystokinin (CCK)",
                "Gastrin",
                "Secretin",
                "Motilin",
                "Gastric inhibitory polypeptide (GIP)",
                "Vasoactive intestinal peptide (VIP)",
                "Glucagon",
                "Somatostatin",
                "Orexin",
                "Peptide YY (PYY)",
                "Neurotensin",
                "Substance P",
                "Ghrelin",
                "Pancreatic polypeptide",
                "Cholera toxin (Note: While not a naturally occurring hormone, it mimics the effect of certain gastrointestinal hormones)",
                "Enteroglucagon",
                "Enterostatin",
                "Corticotropin-releasing hormone (CRH)",
                "Vasoactive intestinal peptide (VIP) - note different regulatory functions in context",
                "Calcitonin gene-related peptide (CGRP)",
                "Galanin",
                "Bombesin",
                "Neuropeptide Y (NPY)",
                "Enkephalin"
            ],
            "mismatches": [],
            "true_referents": [
                "Bombesin",
                "Calcitonin gene-related peptide (CGRP)",
                "Cholecystokinin (CCK)",
                "Cholera toxin (Note: While not a naturally occurring hormone, it mimics the effect of certain gastrointestinal hormones)",
                "Corticotropin-releasing hormone (CRH)",
                "Enkephalin",
                "Enteroglucagon",
                "Enterostatin",
                "Galanin",
                "Gastric inhibitory polypeptide (GIP)",
                "Gastrin",
                "Ghrelin",
                "Glucagon",
                "Motilin",
                "Neuropeptide Y (NPY)",
                "Neurotensin",
                "Orexin",
                "Pancreatic polypeptide",
                "Peptide YY (PYY)",
                "Secretin",
                "Somatostatin",
                "Substance P",
                "Vasoactive intestinal peptide (VIP)",
                "Vasoactive intestinal peptide (VIP) - note different regulatory functions in context"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (G-17)",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Motilin",
            "Gastric Inhibitory Polypeptide (GIP)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Somatostatin",
            "Neurotensin",
            "Enteroglucagon",
            "Ghrelin",
            "Oxyntomodulin",
            "Peptide YY (PYY)",
            "Pancreatic Polypeptide (PP)",
            "Gastric inhibitory polypeptide (GIP)",
            "Glucagon-like peptide-2 (GLP-2)",
            "Guanylin",
            "Uroguanylin",
            "Neurotensin (NT)",
            "Enterostatin",
            "Ghrelin-acyl-form (G-acyl)",
            "Glucagon-like peptide-1 related peptide (GLP-1-36)",
            "Glucagon-like peptide-1 related peptide-36 (GLP-1-37)",
            "Oxyntomodulin (OXM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (G-17)",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Motilin",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Somatostatin",
                "Neurotensin",
                "Enteroglucagon",
                "Ghrelin",
                "Oxyntomodulin",
                "Peptide YY (PYY)",
                "Pancreatic Polypeptide (PP)",
                "Gastric inhibitory polypeptide (GIP)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Guanylin",
                "Uroguanylin",
                "Neurotensin (NT)",
                "Enterostatin",
                "Ghrelin-acyl-form (G-acyl)",
                "Glucagon-like peptide-1 related peptide (GLP-1-36)",
                "Glucagon-like peptide-1 related peptide-36 (GLP-1-37)",
                "Oxyntomodulin (OXM)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cholecystokinin (CCK)",
                "Enteroglucagon",
                "Enterostatin",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Gastric inhibitory polypeptide (GIP)",
                "Gastrin (G-17)",
                "Ghrelin",
                "Ghrelin-acyl-form (G-acyl)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-1 related peptide (GLP-1-36)",
                "Glucagon-like peptide-1 related peptide-36 (GLP-1-37)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Guanylin",
                "Motilin",
                "Neurotensin",
                "Neurotensin (NT)",
                "Oxyntomodulin",
                "Oxyntomodulin (OXM)",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin",
                "Somatostatin",
                "Uroguanylin",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (G)",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Gastric Inhibitory Peptide (GIP)",
            "Motilin",
            "Neurotensin",
            "Peptide YY (PYY)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Glucagon-like peptide-2 (GLP-2)",
            "Oxyntomodulin",
            "Enteroglucagon",
            "Vasoactive intestinal peptide (VIP)",
            "Enteric glucagon",
            "Pancreatic polypeptide (PP)",
            "Somatostatin",
            "Ghrelin",
            "Neuropeptide Y (NPY)",
            "Substance P",
            "Neurokinin A",
            "Neurokinin B",
            "Histamine",
            "Serotonin (5-HT)",
            "Leptin",
            "Adenosine",
            "Calcitonin Gene-Related Peptide (CGRP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (G)",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Gastric Inhibitory Peptide (GIP)",
                "Motilin",
                "Neurotensin",
                "Peptide YY (PYY)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Oxyntomodulin",
                "Enteroglucagon",
                "Vasoactive intestinal peptide (VIP)",
                "Enteric glucagon",
                "Pancreatic polypeptide (PP)",
                "Somatostatin",
                "Ghrelin",
                "Neuropeptide Y (NPY)",
                "Substance P",
                "Neurokinin A",
                "Neurokinin B",
                "Histamine",
                "Serotonin (5-HT)",
                "Leptin",
                "Adenosine"
            ],
            "mismatches": [
                "Calcitonin Gene-Related Peptide (CGRP)"
            ],
            "true_referents": [
                "Adenosine",
                "Cholecystokinin (CCK)",
                "Enteric glucagon",
                "Enteroglucagon",
                "Gastric Inhibitory Peptide (GIP)",
                "Gastrin (G)",
                "Ghrelin",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Histamine",
                "Leptin",
                "Motilin",
                "Neurokinin A",
                "Neurokinin B",
                "Neuropeptide Y (NPY)",
                "Neurotensin",
                "Oxyntomodulin",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin",
                "Serotonin (5-HT)",
                "Somatostatin",
                "Substance P",
                "Vasoactive intestinal peptide (VIP)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin (SEC)",
            "Gastric Inhibitory Polypeptide (GIP)",
            "Motilin (MOT)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Enteroglucagon (EGC)",
            "Somatostatin (SST)",
            "Ghrelin (GHR)",
            "Peptide YY (PYY)",
            "Glucagon-like Peptide-1 (GLP-1)",
            "Oxyntomodulin (OXM)",
            "Pancreatic Polypeptide (PP)",
            "Neurotensin (NT)",
            "Bombesin (BN)",
            "Cholecystokinin-Releasing Peptide (CCRP)",
            "Urotensin II (UII)",
            "Gastrin-Releasing Peptide (GRP)",
            "Cerebrocupulin (CRP)",
            "Apelin (APLN)",
            "Gonadotropin-Releasing Hormone (GnRH) - in gut neurons",
            "Substance P (SP) - in gut neurons",
            "Calcitonin Gene-Related Peptide (CGRP) - in gut neurons",
            "Endothelin-1 (ET-1) - in gut neurons"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin (SEC)",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Motilin (MOT)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Enteroglucagon (EGC)",
                "Somatostatin (SST)",
                "Ghrelin (GHR)",
                "Peptide YY (PYY)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Oxyntomodulin (OXM)",
                "Pancreatic Polypeptide (PP)",
                "Neurotensin (NT)",
                "Bombesin (BN)",
                "Cholecystokinin-Releasing Peptide (CCRP)",
                "Urotensin II (UII)",
                "Gastrin-Releasing Peptide (GRP)",
                "Cerebrocupulin (CRP)",
                "Apelin (APLN)",
                "Gonadotropin-Releasing Hormone (GnRH) - in gut neurons",
                "Substance P (SP) - in gut neurons",
                "Calcitonin Gene-Related Peptide (CGRP) - in gut neurons",
                "Endothelin-1 (ET-1) - in gut neurons"
            ],
            "mismatches": [],
            "true_referents": [
                "Apelin (APLN)",
                "Bombesin (BN)",
                "Calcitonin Gene-Related Peptide (CGRP) - in gut neurons",
                "Cerebrocupulin (CRP)",
                "Cholecystokinin (CCK)",
                "Cholecystokinin-Releasing Peptide (CCRP)",
                "Endothelin-1 (ET-1) - in gut neurons",
                "Enteroglucagon (EGC)",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Gastrin (GAS)",
                "Gastrin-Releasing Peptide (GRP)",
                "Ghrelin (GHR)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Gonadotropin-Releasing Hormone (GnRH) - in gut neurons",
                "Motilin (MOT)",
                "Neurotensin (NT)",
                "Oxyntomodulin (OXM)",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin (SEC)",
                "Somatostatin (SST)",
                "Substance P (SP) - in gut neurons",
                "Urotensin II (UII)",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Motilin",
            "Ghrelin",
            "Gastric Inhibitory Polypeptide (GIP)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Peptide YY (PYY)",
            "Enteroglucagon",
            "Neurotensin",
            "Somatostatin",
            "Bombesin",
            "Substance P",
            "Pancreatic Polypeptide (PP)",
            "Enterostatin",
            "Oxyntomodulin",
            "Amylin",
            "Glucagon-like Peptide-1 (GLP-1)",
            "Glucagon-like Peptide-2 (GLP-2)",
            "Uroguanylin",
            "Guanylin",
            "Trefoil Factor Family Peptides (TFF)",
            "Histamine",
            "Serotonin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Motilin",
                "Ghrelin",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Peptide YY (PYY)",
                "Enteroglucagon",
                "Neurotensin",
                "Somatostatin",
                "Bombesin",
                "Substance P",
                "Pancreatic Polypeptide (PP)",
                "Enterostatin",
                "Oxyntomodulin",
                "Amylin",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Glucagon-like Peptide-2 (GLP-2)",
                "Uroguanylin",
                "Guanylin",
                "Trefoil Factor Family Peptides (TFF)",
                "Histamine",
                "Serotonin"
            ],
            "mismatches": [],
            "true_referents": [
                "Amylin",
                "Bombesin",
                "Cholecystokinin (CCK)",
                "Enteroglucagon",
                "Enterostatin",
                "Gastric Inhibitory Polypeptide (GIP)",
                "Gastrin (GAS)",
                "Ghrelin",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Glucagon-like Peptide-2 (GLP-2)",
                "Guanylin",
                "Histamine",
                "Motilin",
                "Neurotensin",
                "Oxyntomodulin",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin",
                "Serotonin",
                "Somatostatin",
                "Substance P",
                "Trefoil Factor Family Peptides (TFF)",
                "Uroguanylin",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin (SEC)",
            "Gastric Inhibitory Peptide (GIP)",
            "Motilin (MTL)",
            "Somatostatin (SST)",
            "Peptide YY (PYY)",
            "Neurotensin (NT)",
            "Enteroglucagon (EG)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Glucagon-like Peptide-1 (GLP-1)",
            "Glucagon-like Peptide-2 (GLP-2)",
            "Pancreatic Polypeptide (PP)",
            "Serotonin (5-HT)",
            "Calcitonin Gene-Related Peptide (CGRP)",
            "Insulin (INS)",
            "Amylin (AMY)",
            "Cholecystokinin-like peptide (CCK-LP)",
            "Neuropeptide Y (NPY)",
            "Histamine (HIS)",
            "Endothelin (ET)",
            "Atrial Natriuretic Peptide (ANP)",
            "Prostaglandin E2 (PGE2)",
            "Leptin (LEP)",
            "Adiponectin (ADIPOQ)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin (SEC)",
                "Gastric Inhibitory Peptide (GIP)",
                "Motilin (MTL)",
                "Somatostatin (SST)",
                "Peptide YY (PYY)",
                "Neurotensin (NT)",
                "Enteroglucagon (EG)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Glucagon-like Peptide-2 (GLP-2)",
                "Pancreatic Polypeptide (PP)",
                "Serotonin (5-HT)",
                "Calcitonin Gene-Related Peptide (CGRP)",
                "Insulin (INS)",
                "Amylin (AMY)",
                "Cholecystokinin-like peptide (CCK-LP)",
                "Neuropeptide Y (NPY)",
                "Histamine (HIS)",
                "Endothelin (ET)",
                "Atrial Natriuretic Peptide (ANP)",
                "Prostaglandin E2 (PGE2)",
                "Leptin (LEP)"
            ],
            "mismatches": [
                "Adiponectin (ADIPOQ)"
            ],
            "true_referents": [
                "Amylin (AMY)",
                "Atrial Natriuretic Peptide (ANP)",
                "Calcitonin Gene-Related Peptide (CGRP)",
                "Cholecystokinin (CCK)",
                "Cholecystokinin-like peptide (CCK-LP)",
                "Endothelin (ET)",
                "Enteroglucagon (EG)",
                "Gastric Inhibitory Peptide (GIP)",
                "Gastrin (GAS)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Glucagon-like Peptide-2 (GLP-2)",
                "Histamine (HIS)",
                "Insulin (INS)",
                "Leptin (LEP)",
                "Motilin (MTL)",
                "Neuropeptide Y (NPY)",
                "Neurotensin (NT)",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Prostaglandin E2 (PGE2)",
                "Secretin (SEC)",
                "Serotonin (5-HT)",
                "Somatostatin (SST)",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin (SEC)",
            "Gastric Inhibitory Peptide (GIP)",
            "Motilin (MOT)",
            "Neurotensin (NT)",
            "Peptide YY (PYY)",
            "Pancreatic Polypeptide (PP)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Somatostatin (SST)",
            "Glucagon-Like Peptide 1 (GLP-1)",
            "Glucagon-Like Peptide 2 (GLP-2)",
            "Oxyntomodulin (OXM)",
            "Enteroglucagon (EG)",
            "Glucose-dependent Insulinotropic Peptide (GIP)",
            "Ghrelin (GHRL)",
            "Proglucagon (PGC)",
            "Neuropeptide Y (NPY)",
            "Leptin (LEP)",
            "Insulin-like Growth Factor 1 (IGF-1)",
            "Insulin-like Growth Factor 2 (IGF-2)",
            "Serotonin (5-HT)",
            "Dopamine (DA)",
            "Endothelin-1 (ET-1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin (SEC)",
                "Gastric Inhibitory Peptide (GIP)",
                "Motilin (MOT)",
                "Neurotensin (NT)",
                "Peptide YY (PYY)",
                "Pancreatic Polypeptide (PP)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Somatostatin (SST)",
                "Glucagon-Like Peptide 1 (GLP-1)",
                "Glucagon-Like Peptide 2 (GLP-2)",
                "Oxyntomodulin (OXM)",
                "Enteroglucagon (EG)",
                "Glucose-dependent Insulinotropic Peptide (GIP)",
                "Ghrelin (GHRL)",
                "Proglucagon (PGC)",
                "Neuropeptide Y (NPY)",
                "Leptin (LEP)",
                "Insulin-like Growth Factor 1 (IGF-1)",
                "Insulin-like Growth Factor 2 (IGF-2)",
                "Serotonin (5-HT)",
                "Dopamine (DA)",
                "Endothelin-1 (ET-1)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cholecystokinin (CCK)",
                "Dopamine (DA)",
                "Endothelin-1 (ET-1)",
                "Enteroglucagon (EG)",
                "Gastric Inhibitory Peptide (GIP)",
                "Gastrin (GAS)",
                "Ghrelin (GHRL)",
                "Glucagon-Like Peptide 1 (GLP-1)",
                "Glucagon-Like Peptide 2 (GLP-2)",
                "Glucose-dependent Insulinotropic Peptide (GIP)",
                "Insulin-like Growth Factor 1 (IGF-1)",
                "Insulin-like Growth Factor 2 (IGF-2)",
                "Leptin (LEP)",
                "Motilin (MOT)",
                "Neuropeptide Y (NPY)",
                "Neurotensin (NT)",
                "Oxyntomodulin (OXM)",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Proglucagon (PGC)",
                "Secretin (SEC)",
                "Serotonin (5-HT)",
                "Somatostatin (SST)",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "gastrin",
            "cholecystokinin",
            "secretin",
            "motilin",
            "ghrelin",
            "somatostatin",
            "neurotensin",
            "vasoactive intestinal peptide",
            "peptide YY",
            "enteroglucagon",
            "glucagon-like peptide 1",
            "glucagon-like peptide 2",
            "gastric inhibitory polypeptide",
            "bombesin",
            "calcitonin gene-related peptide",
            "substance P",
            "melanin-concentrating hormone",
            "insulin",
            "leptin",
            "adiponectin",
            "resistin",
            "ghrelin",
            "orexin",
            "neuropeptide Y"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "gastrin",
                "cholecystokinin",
                "secretin",
                "motilin",
                "ghrelin",
                "somatostatin",
                "neurotensin",
                "vasoactive intestinal peptide",
                "peptide YY",
                "enteroglucagon",
                "glucagon-like peptide 1",
                "glucagon-like peptide 2",
                "gastric inhibitory polypeptide",
                "bombesin",
                "calcitonin gene-related peptide",
                "substance P",
                "melanin-concentrating hormone",
                "insulin",
                "leptin",
                "adiponectin",
                "resistin",
                "orexin",
                "neuropeptide Y"
            ],
            "mismatches": [],
            "true_referents": [
                "adiponectin",
                "bombesin",
                "calcitonin gene-related peptide",
                "cholecystokinin",
                "enteroglucagon",
                "gastric inhibitory polypeptide",
                "gastrin",
                "ghrelin",
                "glucagon-like peptide 1",
                "glucagon-like peptide 2",
                "insulin",
                "leptin",
                "melanin-concentrating hormone",
                "motilin",
                "neuropeptide Y",
                "neurotensin",
                "orexin",
                "peptide YY",
                "resistin",
                "secretin",
                "somatostatin",
                "substance P",
                "vasoactive intestinal peptide"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Cholecystokinin (CCK)",
            "Secretin (ST)",
            "Gastrin",
            "Motilin",
            "Somatostatin (SST)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Glucose-dependent Insulinotropic Peptide (GIP)",
            "Glucagon-like Peptide-1 (GLP-1)",
            "Neurotensin",
            "Peptide YY (PYY)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cholecystokinin (CCK)",
                "Secretin (ST)",
                "Gastrin",
                "Motilin",
                "Somatostatin (SST)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Glucose-dependent Insulinotropic Peptide (GIP)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Neurotensin",
                "Peptide YY (PYY)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cholecystokinin (CCK)",
                "Gastrin",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Glucose-dependent Insulinotropic Peptide (GIP)",
                "Motilin",
                "Neurotensin",
                "Peptide YY (PYY)",
                "Secretin (ST)",
                "Somatostatin (SST)",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Motilin (MLN)",
            "Ghrelin (GHRL)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Glucose-dependent insulinotropic polypeptide (GIP)",
            "Peptide YY (PYY)",
            "Somatostatin (SST)",
            "Vasoactive intestinal peptide (VIP)",
            "Gastric inhibitory polypeptide (GIP)",
            "Neurotensin (NTS)",
            "Pancreatic polypeptide (PP)",
            "Oxyntomodulin (OXM)",
            "Glucagon-like peptide-2 (GLP-2)",
            "Gastrin-releasing peptide (GRP)",
            "Enteroglucagon",
            "Guanylin",
            "Uroguanylin",
            "Obestatin",
            "Xenin",
            "Apelin",
            "Neuropeptide Y (NPY)",
            "Substance P (SP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin (SCT)",
                "Motilin (MLN)",
                "Ghrelin (GHRL)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Peptide YY (PYY)",
                "Somatostatin (SST)",
                "Vasoactive intestinal peptide (VIP)",
                "Gastric inhibitory polypeptide (GIP)",
                "Neurotensin (NTS)",
                "Pancreatic polypeptide (PP)",
                "Oxyntomodulin (OXM)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Gastrin-releasing peptide (GRP)",
                "Enteroglucagon",
                "Guanylin",
                "Uroguanylin",
                "Obestatin",
                "Xenin",
                "Apelin",
                "Neuropeptide Y (NPY)",
                "Substance P (SP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Apelin",
                "Cholecystokinin (CCK)",
                "Enteroglucagon",
                "Gastric inhibitory polypeptide (GIP)",
                "Gastrin (GAS)",
                "Gastrin-releasing peptide (GRP)",
                "Ghrelin (GHRL)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Guanylin",
                "Motilin (MLN)",
                "Neuropeptide Y (NPY)",
                "Neurotensin (NTS)",
                "Obestatin",
                "Oxyntomodulin (OXM)",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin (SCT)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Uroguanylin",
                "Vasoactive intestinal peptide (VIP)",
                "Xenin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Glucose-dependent insulinotropic polypeptide (GIP)",
            "Vasoactive intestinal peptide (VIP)",
            "Motilin",
            "Somatostatin",
            "Pancreatic polypeptide (PP)",
            "Peptide YY (PYY)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Glucagon-like peptide-2 (GLP-2)",
            "Oxyntomodulin",
            "Ghrelin",
            "Neurotensin",
            "Substance P",
            "Galanin",
            "Enteroglucagon",
            "Guanylin",
            "Uroguanylin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Vasoactive intestinal peptide (VIP)",
                "Motilin",
                "Somatostatin",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Oxyntomodulin",
                "Ghrelin",
                "Neurotensin",
                "Substance P",
                "Galanin",
                "Enteroglucagon",
                "Guanylin",
                "Uroguanylin"
            ],
            "mismatches": [],
            "true_referents": [
                "Cholecystokinin (CCK)",
                "Enteroglucagon",
                "Galanin",
                "Gastrin",
                "Ghrelin",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucagon-like peptide-2 (GLP-2)",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Guanylin",
                "Motilin",
                "Neurotensin",
                "Oxyntomodulin",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin",
                "Somatostatin",
                "Substance P",
                "Uroguanylin",
                "Vasoactive intestinal peptide (VIP)"
            ],
            "TP": 19,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (G)",
            "Cholecystokinin (CCK)",
            "Secretin (S)",
            "Glucose-dependent insulinotropic polypeptide (GIP)",
            "Motilin (M)",
            "Ghrelin (GHRL)",
            "Peptide YY (PYY)",
            "Pancreatic polypeptide (PP)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Oxyntomodulin (OXM)",
            "Neurotensin (NT)",
            "Vasoactive intestinal peptide (VIP)",
            "Somatostatin (SST)",
            "Substance P (SP)",
            "Neuropeptide Y (NPY)",
            "Galanin (GAL)",
            "Calcitonin gene-related peptide (CGRP)",
            "Bombesin (BN)",
            "Endothelin (ET)",
            "Adrenomedullin (ADM)",
            "Uroguanylin (UGN)",
            "Guanylin (GN)",
            "Xenin (XEN)",
            "Apelin (APLN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (G)",
                "Cholecystokinin (CCK)",
                "Secretin (S)",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Motilin (M)",
                "Ghrelin (GHRL)",
                "Peptide YY (PYY)",
                "Pancreatic polypeptide (PP)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Oxyntomodulin (OXM)",
                "Neurotensin (NT)",
                "Vasoactive intestinal peptide (VIP)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Neuropeptide Y (NPY)",
                "Galanin (GAL)",
                "Calcitonin gene-related peptide (CGRP)",
                "Bombesin (BN)",
                "Endothelin (ET)",
                "Adrenomedullin (ADM)",
                "Uroguanylin (UGN)",
                "Guanylin (GN)",
                "Xenin (XEN)",
                "Apelin (APLN)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenomedullin (ADM)",
                "Apelin (APLN)",
                "Bombesin (BN)",
                "Calcitonin gene-related peptide (CGRP)",
                "Cholecystokinin (CCK)",
                "Endothelin (ET)",
                "Galanin (GAL)",
                "Gastrin (G)",
                "Ghrelin (GHRL)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Glucose-dependent insulinotropic polypeptide (GIP)",
                "Guanylin (GN)",
                "Motilin (M)",
                "Neuropeptide Y (NPY)",
                "Neurotensin (NT)",
                "Oxyntomodulin (OXM)",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Secretin (S)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Uroguanylin (UGN)",
                "Vasoactive intestinal peptide (VIP)",
                "Xenin (XEN)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (G)",
            "Cholecystokinin (CCK)",
            "Secretin (S)",
            "Motilin (M)",
            "Somatostatin (SS)",
            "Ghrelin (G)",
            "Peptide YY (PYY)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Neurotensin (NT)",
            "Substance P (SP)",
            "Calcitonin Gene-Related Peptide (CGRP)",
            "Glucagon-Like Peptide-1 (GLP-1)",
            "Glucagon-Like Peptide-2 (GLP-2)",
            "Enteroglucagon (EG)",
            "Pancreatic Polypeptide (PP)",
            "Insulin-Like Growth Factor-1 (IGF-1)",
            "Insulin-Like Growth Factor-2 (IGF-2)",
            "Transforming Growth Factor-Beta (TGF-\u03b2)",
            "Epidermal Growth Factor (EGF)",
            "Fibroblast Growth Factor (FGF)",
            "Platelet-Derived Growth Factor (PDGF)",
            "Vascular Endothelial Growth Factor (VEGF)",
            "Nerve Growth Factor (NGF)",
            "Brain-Derived Neurotrophic Factor (BDNF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (G)",
                "Cholecystokinin (CCK)",
                "Secretin (S)",
                "Motilin (M)",
                "Somatostatin (SS)",
                "Ghrelin (G)",
                "Peptide YY (PYY)",
                "Vasoactive Intestinal Peptide (VIP)",
                "Neurotensin (NT)",
                "Substance P (SP)",
                "Calcitonin Gene-Related Peptide (CGRP)",
                "Glucagon-Like Peptide-1 (GLP-1)",
                "Glucagon-Like Peptide-2 (GLP-2)",
                "Enteroglucagon (EG)",
                "Pancreatic Polypeptide (PP)",
                "Insulin-Like Growth Factor-1 (IGF-1)",
                "Insulin-Like Growth Factor-2 (IGF-2)",
                "Transforming Growth Factor-Beta (TGF-\u03b2)",
                "Epidermal Growth Factor (EGF)",
                "Fibroblast Growth Factor (FGF)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Nerve Growth Factor (NGF)",
                "Brain-Derived Neurotrophic Factor (BDNF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Brain-Derived Neurotrophic Factor (BDNF)",
                "Calcitonin Gene-Related Peptide (CGRP)",
                "Cholecystokinin (CCK)",
                "Enteroglucagon (EG)",
                "Epidermal Growth Factor (EGF)",
                "Fibroblast Growth Factor (FGF)",
                "Gastrin (G)",
                "Ghrelin (G)",
                "Glucagon-Like Peptide-1 (GLP-1)",
                "Glucagon-Like Peptide-2 (GLP-2)",
                "Insulin-Like Growth Factor-1 (IGF-1)",
                "Insulin-Like Growth Factor-2 (IGF-2)",
                "Motilin (M)",
                "Nerve Growth Factor (NGF)",
                "Neurotensin (NT)",
                "Pancreatic Polypeptide (PP)",
                "Peptide YY (PYY)",
                "Platelet-Derived Growth Factor (PDGF)",
                "Secretin (S)",
                "Somatostatin (SS)",
                "Substance P (SP)",
                "Transforming Growth Factor-Beta (TGF-\u03b2)",
                "Vascular Endothelial Growth Factor (VEGF)",
                "Vasoactive Intestinal Peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Secretin (Sec)",
            "Cholecystokinin (CCK)",
            "Gastrin",
            "Motilin",
            "Vasoactive intestinal peptide (VIP)",
            "Gastric inhibitory peptide (GIP)",
            "Pancreatic polypeptide (PP)",
            "Bombesin",
            "Neuromedin B",
            "Motilin-like peptide",
            "Gastric-releasing peptide (GRP)",
            "Ghrelin",
            "Urocortin 3",
            "Urocortin 1",
            "Urocortin 2",
            "CGRP (Calcitonin gene-related peptide)",
            "Adrenomedullin",
            "Pituitary adenylate cyclase-activating polypeptide (PACAP)",
            "Vasoactive intestinal peptide (VIP)",
            "Peptide YY (PYY)",
            "Glucagon",
            "Insulin",
            "Somatostatin",
            "Pancreatic stone protein (PSP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Secretin (Sec)",
                "Cholecystokinin (CCK)",
                "Gastrin",
                "Motilin",
                "Vasoactive intestinal peptide (VIP)",
                "Gastric inhibitory peptide (GIP)",
                "Pancreatic polypeptide (PP)",
                "Bombesin",
                "Neuromedin B",
                "Motilin-like peptide",
                "Gastric-releasing peptide (GRP)",
                "Ghrelin",
                "Urocortin 3",
                "Urocortin 1",
                "Urocortin 2",
                "CGRP (Calcitonin gene-related peptide)",
                "Adrenomedullin",
                "Pituitary adenylate cyclase-activating polypeptide (PACAP)",
                "Peptide YY (PYY)",
                "Glucagon",
                "Insulin",
                "Somatostatin",
                "Pancreatic stone protein (PSP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenomedullin",
                "Bombesin",
                "CGRP (Calcitonin gene-related peptide)",
                "Cholecystokinin (CCK)",
                "Gastric inhibitory peptide (GIP)",
                "Gastric-releasing peptide (GRP)",
                "Gastrin",
                "Ghrelin",
                "Glucagon",
                "Insulin",
                "Motilin",
                "Motilin-like peptide",
                "Neuromedin B",
                "Pancreatic polypeptide (PP)",
                "Pancreatic stone protein (PSP)",
                "Peptide YY (PYY)",
                "Pituitary adenylate cyclase-activating polypeptide (PACAP)",
                "Secretin (Sec)",
                "Somatostatin",
                "Urocortin 1",
                "Urocortin 2",
                "Urocortin 3",
                "Vasoactive intestinal peptide (VIP)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Motilin (MOT)",
            "Gastric inhibitory peptide (GIP)",
            "Vasoactive intestinal peptide (VIP)",
            "Neurotensin (NT)",
            "Bombesin (BOM)",
            "Ghrelin (GHRL)",
            "Obestatin (OB)",
            "Pyroglutamylated RFamide peptide (QRFP)",
            "Enteroglucagon (OXT)",
            "Peptide YY (PYY)",
            "Pancreatic polypeptide (PP)",
            "Duodenal acid peptide (DUPA)",
            "Cholecystokinin-8 (CCK-8)",
            "Gastrin-releasing peptide (GRP)",
            "Gastrin-17 (G17)",
            "Motilin-associated peptide (MAP)",
            "Gastrointestinal hormone peptide (GHRP)",
            "Ghrelin/obestatin prepropeptide (GHRL-OXT)",
            "Neuromedin U (NMU)",
            "Rat gastrin-releasing peptide (rGRP)",
            "Helodermin (HLD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Secretin (SCT)",
                "Motilin (MOT)",
                "Gastric inhibitory peptide (GIP)",
                "Vasoactive intestinal peptide (VIP)",
                "Neurotensin (NT)",
                "Bombesin (BOM)",
                "Ghrelin (GHRL)",
                "Obestatin (OB)",
                "Pyroglutamylated RFamide peptide (QRFP)",
                "Enteroglucagon (OXT)",
                "Peptide YY (PYY)",
                "Pancreatic polypeptide (PP)",
                "Duodenal acid peptide (DUPA)",
                "Cholecystokinin-8 (CCK-8)",
                "Gastrin-releasing peptide (GRP)",
                "Gastrin-17 (G17)",
                "Motilin-associated peptide (MAP)",
                "Gastrointestinal hormone peptide (GHRP)",
                "Ghrelin/obestatin prepropeptide (GHRL-OXT)",
                "Neuromedin U (NMU)",
                "Rat gastrin-releasing peptide (rGRP)",
                "Helodermin (HLD)"
            ],
            "mismatches": [],
            "true_referents": [
                "Bombesin (BOM)",
                "Cholecystokinin (CCK)",
                "Cholecystokinin-8 (CCK-8)",
                "Duodenal acid peptide (DUPA)",
                "Enteroglucagon (OXT)",
                "Gastric inhibitory peptide (GIP)",
                "Gastrin (GAS)",
                "Gastrin-17 (G17)",
                "Gastrin-releasing peptide (GRP)",
                "Gastrointestinal hormone peptide (GHRP)",
                "Ghrelin (GHRL)",
                "Ghrelin/obestatin prepropeptide (GHRL-OXT)",
                "Helodermin (HLD)",
                "Motilin (MOT)",
                "Motilin-associated peptide (MAP)",
                "Neuromedin U (NMU)",
                "Neurotensin (NT)",
                "Obestatin (OB)",
                "Pancreatic polypeptide (PP)",
                "Peptide YY (PYY)",
                "Pyroglutamylated RFamide peptide (QRFP)",
                "Rat gastrin-releasing peptide (rGRP)",
                "Secretin (SCT)",
                "Vasoactive intestinal peptide (VIP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "gastrointestinal hormone",
        "domain": "biology",
        "response": [
            {
                "name": "Gastric inhibitory peptide (GIP)",
                "abbreviation": "GIP"
            },
            {
                "name": "Secretin",
                "abbreviation": "SEC"
            },
            {
                "name": "Cholecystokinin (CCK)",
                "abbreviation": "CCK"
            },
            {
                "name": "Gastric acid",
                "abbreviation": "GA"
            },
            {
                "name": "Enterogastrone",
                "abbreviation": "EG"
            },
            {
                "name": "Motilin",
                "abbreviation": "MTL"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Gastric inhibitory peptide (GIP)",
                "Secretin",
                "Cholecystokinin (CCK)",
                "Gastric acid",
                "Enterogastrone",
                "Motilin"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"name\": \"Cholecystokinin (CCK)\", \"abbreviation\": \"CCK\"}",
                "{\"name\": \"Enterogastrone\", \"abbreviation\": \"EG\"}",
                "{\"name\": \"Gastric acid\", \"abbreviation\": \"GA\"}",
                "{\"name\": \"Gastric inhibitory peptide (GIP)\", \"abbreviation\": \"GIP\"}",
                "{\"name\": \"Motilin\", \"abbreviation\": \"MTL\"}",
                "{\"name\": \"Secretin\", \"abbreviation\": \"SEC\"}"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 0
        }
    }
]