[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Adrenocorticotropic hormone (ACTH)",
            "Angiotensin II",
            "Atrial natriuretic peptide (ANP)",
            "Beta-endorphin",
            "Calcitonin",
            "Cholecystokinin (CCK)",
            "Dopamine",
            "Erythropoietin (EPO)",
            "Follicle-stimulating hormone (FSH)",
            "Gastrin",
            "Ghrelin",
            "Glucagon",
            "Growth hormone (GH)",
            "Human chorionic gonadotropin (hCG)",
            "Human placental lactogen (HPL)",
            "Insulin",
            "Luteinizing hormone (LH)",
            "Melanocyte-stimulating hormone (MSH)",
            "Oxytocin",
            "Parathyroid hormone (PTH)",
            "Prolactin",
            "Somatostatin",
            "Thyroid-stimulating hormone (TSH)",
            "Vasopressin (Antidiuretic hormone, ADH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic hormone (ACTH)",
                "Angiotensin II",
                "Calcitonin",
                "Follicle-stimulating hormone (FSH)",
                "Ghrelin",
                "Glucagon",
                "Growth hormone (GH)",
                "Insulin",
                "Luteinizing hormone (LH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Oxytocin",
                "Parathyroid hormone (PTH)",
                "Prolactin",
                "Somatostatin"
            ],
            "mismatches": [
                "Atrial natriuretic peptide (ANP)",
                "Beta-endorphin",
                "Cholecystokinin (CCK)",
                "Dopamine",
                "Erythropoietin (EPO)",
                "Gastrin",
                "Human chorionic gonadotropin (hCG)",
                "Human placental lactogen (HPL)",
                "Thyroid-stimulating hormone (TSH)",
                "Vasopressin (Antidiuretic hormone, ADH)"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensins",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Chorionic Gonadotropin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Human beta Subunit Chorionic Gonadotropin",
                "Insulin",
                "Insulins",
                "Leptin",
                "Long-Acting Insulin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Placental Hormones",
                "Placental Lactogen",
                "Procalcitonin",
                "Proglucagon",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "alpha-Endorphin",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-Lipotropin",
                "beta-MSH"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GLU)",
            "Growth Hormone (GH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Luteinizing Hormone (LH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Prolactin (PRL)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Endorphins (END)",
            "Gonadotropin-Releasing Hormone (GnRH)",
            "Somatostatin (SST)",
            "Melanocyte-Stimulating Hormone (MSH)",
            "Corticotropin-Releasing Hormone (CRH)",
            "Cholecystokinin (CCK)",
            "Gastrin (GAS)",
            "Secretin (SCT)",
            "Motilin (MTL)",
            "Ghrelin (GRL)",
            "Neuromedin U (NMU)",
            "Substance P (SP)",
            "Calcitonin (CT)",
            "Parathyroid Hormone (PTH)",
            "Thymosin (THYM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin (INS)",
                "Glucagon (GLU)",
                "Growth Hormone (GH)",
                "Adrenocorticotropic Hormone (ACTH)",
                "Luteinizing Hormone (LH)",
                "Follicle-Stimulating Hormone (FSH)",
                "Prolactin (PRL)",
                "Oxytocin (OXT)",
                "Vasopressin (AVP)",
                "Gonadotropin-Releasing Hormone (GnRH)",
                "Somatostatin (SST)",
                "Melanocyte-Stimulating Hormone (MSH)",
                "Corticotropin-Releasing Hormone (CRH)",
                "Gastrin (GAS)",
                "Secretin (SCT)",
                "Motilin (MTL)",
                "Ghrelin (GRL)",
                "Calcitonin (CT)",
                "Parathyroid Hormone (PTH)"
            ],
            "mismatches": [
                "Thyroid-Stimulating Hormone (TSH)",
                "Endorphins (END)",
                "Cholecystokinin (CCK)",
                "Neuromedin U (NMU)",
                "Substance P (SP)",
                "Thymosin (THYM)"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Arginine Vasopressin",
                "Buserelin",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Motilin",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Urofollitropin",
                "Urotensins",
                "Vasopressins",
                "alpha-Endorphin",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-MSH",
                "gamma-Endorphin"
            ],
            "TP": 19,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Adrenocorticotropic hormone (ACTH)",
            "Antidiuretic hormone (ADH)",
            "Angiotensin II",
            "Angiotensin I",
            "Angiotensin III",
            "Angiotensinogen",
            "Atrial natriuretic peptide (ANP)",
            "B-type natriuretic peptide (BNP)",
            "Brain natriuretic peptide (BNP)",
            "Calcitonin",
            "Cholecystokinin (CCK)",
            "Corticotropin-releasing hormone (CRH)",
            "Endothelin",
            "Gastrin",
            "Gastrin-releasing peptide (GRP)",
            "Ghrelin",
            "Glucagon",
            "Growth hormone-releasing hormone (GHRH)",
            "Insulin",
            "Leptin",
            "Melanocyte-stimulating hormone (MSH)",
            "Neurotensin",
            "Oxytocin",
            "Prolactin",
            "Thyrotropin-releasing hormone (TRH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic hormone (ACTH)",
                "Angiotensin II",
                "Angiotensin I",
                "Angiotensin III",
                "Brain natriuretic peptide (BNP)",
                "Calcitonin",
                "Corticotropin-releasing hormone (CRH)",
                "Ghrelin",
                "Glucagon",
                "Insulin",
                "Leptin",
                "Oxytocin",
                "Prolactin"
            ],
            "mismatches": [
                "Antidiuretic hormone (ADH)",
                "Angiotensinogen",
                "Atrial natriuretic peptide (ANP)",
                "Cholecystokinin (CCK)",
                "Endothelin",
                "Gastrin",
                "Gastrin-releasing peptide (GRP)",
                "Growth hormone-releasing hormone (GHRH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Neurotensin",
                "Thyrotropin-releasing hormone (TRH)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensin III",
                "Angiotensins",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "C-Type Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulins",
                "Leptin",
                "Long-Acting Insulin",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Procalcitonin",
                "Proglucagon",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Terlipressin",
                "Vasopressins",
                "beta-MSH"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth Hormone-Releasing Hormone (GHRH)",
            "Somatostatin (SST)",
            "Prolactin (PRL)",
            "Thyrotropin-Releasing Hormone (TRH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Luteinizing Hormone (LH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Vasopressin (AVP)",
            "Oxytocin (OT)",
            "Cholecystokinin (CCK)",
            "Gastrin (GAS)",
            "Secretin (SEC)",
            "Motilin (MTL)",
            "Substance P (SP)",
            "Neuropeptide Y (NPY)",
            "Endorphins (END)",
            "Enkephalins (ENK)",
            "Dynorphins (DYN)",
            "Gonadotropin-Releasing Hormone (GnRH)",
            "Melanocyte-Stimulating Hormone (MSH)",
            "Atrial Natriuretic Peptide (ANP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth Hormone-Releasing Hormone",
                "Somatostatin",
                "Prolactin",
                "Thyrotropin-Releasing Hormone",
                "Adrenocorticotropic Hormone",
                "Luteinizing Hormone",
                "Follicle-Stimulating Hormone",
                "Vasopressin",
                "Oxytocin",
                "Gastrin",
                "Secretin",
                "Motilin",
                "Gonadotropin-Releasing Hormone",
                "Melanocyte-Stimulating Hormone"
            ],
            "mismatches": [
                "Thyroid-Stimulating Hormone",
                "Cholecystokinin",
                "Substance P",
                "Neuropeptide Y",
                "Endorphins",
                "Enkephalins",
                "Dynorphins",
                "Atrial Natriuretic Peptide"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Buserelin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Motilin",
                "Natriuretic Peptides",
                "Oxytocin",
                "Pancreatic Polypeptide",
                "Peptide YY",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "alpha-Endorphin",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-MSH",
                "gamma-Endorphin"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth Hormone (GH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid Stimulating Hormone (TSH)",
            "Follicle Stimulating Hormone (FSH)",
            "Luteinizing Hormone (LH)",
            "Prolactin (PRL)",
            "Parathyroid Hormone (PTH)",
            "Calcitonin (CT)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Ghrelin (GHRL)",
            "Leptin (LEP)",
            "Cholecystokinin (CCK)",
            "Gastrin (GAST)",
            "Secretin (SCT)",
            "Somatostatin (SST)",
            "Endothelin (ET)",
            "Erythropoietin (EPO)",
            "Angiotensin II (ANG II)",
            "Atrial Natriuretic Peptide (ANP)",
            "Melanocyte Stimulating Hormone (MSH)",
            "Hypothalamic Releasing Hormones"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth Hormone",
                "Adrenocorticotropic Hormone",
                "Follicle Stimulating Hormone",
                "Luteinizing Hormone",
                "Prolactin",
                "Parathyroid Hormone",
                "Calcitonin",
                "Oxytocin",
                "Vasopressin",
                "Ghrelin",
                "Leptin",
                "Gastrin",
                "Secretin",
                "Somatostatin",
                "Angiotensin II",
                "Melanocyte Stimulating Hormone"
            ],
            "mismatches": [
                "Thyroid Stimulating Hormone",
                "Cholecystokinin",
                "Endothelin",
                "Erythropoietin",
                "Atrial Natriuretic Peptide",
                "Hypothalamic Releasing Hormones"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensins",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Hypothalamic Hormones",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Pituitary Hormone-Releasing Hormones",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone",
                "beta-Lipotropin",
                "beta-MSH"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Somatostatin (SST)",
            "Vasopressin (AVP)",
            "Oxytocin (OXT)",
            "Calcitonin (CALC)",
            "Gastrin (GAST)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Neuropeptide Y (NPY)",
            "Adrenocorticotropic hormone (ACTH)",
            "Luteinizing hormone (LH)",
            "Follicle-stimulating hormone (FSH)",
            "Thyroid-stimulating hormone (TSH)",
            "Atrial natriuretic peptide (ANP)",
            "Brain natriuretic peptide (BNP)",
            "Prolactin (PRL)",
            "Growth hormone-releasing hormone (GHRH)",
            "Ghrelin (GHRL)",
            "Leptin (LEP)",
            "Peptide YY (PYY)",
            "Inhibin (INH)",
            "Neuropeptide FF (NPFF)",
            "Endothelin (EDN)",
            "Angiotensin II (AGT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin (INS)",
                "Glucagon (GCG)",
                "Somatostatin (SST)",
                "Vasopressin (AVP)",
                "Oxytocin (OXT)",
                "Calcitonin (CALC)",
                "Gastrin (GAST)",
                "Secretin (SCT)",
                "Adrenocorticotropic hormone (ACTH)",
                "Luteinizing hormone (LH)",
                "Follicle-stimulating hormone (FSH)",
                "Atrial natriuretic peptide (ANP)",
                "Brain natriuretic peptide (BNP)",
                "Prolactin (PRL)",
                "Growth hormone-releasing hormone (GHRH)",
                "Ghrelin (GHRL)",
                "Leptin (LEP)",
                "Peptide YY (PYY)",
                "Inhibin (INH)"
            ],
            "mismatches": [
                "Cholecystokinin (CCK)",
                "Neuropeptide Y (NPY)",
                "Thyroid-stimulating hormone (TSH)",
                "Neuropeptide FF (NPFF)",
                "Endothelin (EDN)",
                "Angiotensin II (AGT)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensins",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "C-Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Inhibin-beta Subunits",
                "Inhibins",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Peptide PHI",
                "Peptide YY",
                "Pro-Opiomelanocortin",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone"
            ],
            "TP": 19,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth Hormone (GH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Luteinizing Hormone (LH)",
            "Prolactin (PRL)",
            "Human Chorionic Gonadotropin (hCG)",
            "Antidiuretic Hormone (ADH)",
            "Oxytocin (OXT)",
            "Gastrin (GAST)",
            "Secretin (SCT)",
            "Cholecystokinin (CCK)",
            "Ghrelin (GHRL)",
            "Leptin (LEP)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Somatostatin (SST)",
            "Parathyroid Hormone (PTH)",
            "Calcitonin (CALCA)",
            "Thymosin Alpha 1 (TMSA1)",
            "Thymopoietin (TMPO)",
            "Angiotensin II (ANGII)",
            "Endothelins (EDN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth Hormone",
                "Adrenocorticotropic Hormone",
                "Follicle-Stimulating Hormone",
                "Luteinizing Hormone",
                "Prolactin",
                "Oxytocin",
                "Ghrelin",
                "Leptin",
                "Vasoactive Intestinal Peptide",
                "Somatostatin",
                "Parathyroid Hormone",
                "Calcitonin",
                "Angiotensin II"
            ],
            "mismatches": [
                "Thyroid-Stimulating Hormone",
                "Human Chorionic Gonadotropin",
                "Antidiuretic Hormone",
                "Gastrin",
                "Secretin",
                "Cholecystokinin",
                "Thymosin Alpha 1",
                "Thymopoietin",
                "Endothelins"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensin III",
                "Angiotensins",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Chorionic Gonadotropin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Human beta Subunit Chorionic Gonadotropin",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thymalfasin",
                "Thymosin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasoactive Intestinal Peptide",
                "Vasopressins",
                "alpha-MSH",
                "beta Subunit Luteinizing Hormone"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin",
            "Glucagon",
            "Growth hormone (GH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Thyroid-stimulating hormone (TSH)",
            "Follicle-stimulating hormone (FSH)",
            "Luteinizing hormone (LH)",
            "Prolactin",
            "Oxytocin",
            "Vasopressin",
            "Calcitonin",
            "Parathyroid hormone (PTH)",
            "Corticotropin-releasing hormone (CRH)",
            "Gonadotropin-releasing hormone (GnRH)",
            "Thyrotropin-releasing hormone (TRH)",
            "Gastrin",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Motilin",
            "Ghrelin",
            "Leptin",
            "Melanocyte-stimulating hormone (MSH)",
            "Beta-endorphin",
            "Alpha-melanocyte-stimulating hormone (\u03b1-MSH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth hormone (GH)",
                "Adrenocorticotropic hormone (ACTH)",
                "Follicle-stimulating hormone (FSH)",
                "Luteinizing hormone (LH)",
                "Prolactin",
                "Oxytocin",
                "Vasopressin",
                "Calcitonin",
                "Parathyroid hormone (PTH)",
                "Corticotropin-releasing hormone (CRH)",
                "Gonadotropin-releasing hormone (GnRH)",
                "Thyrotropin-releasing hormone (TRH)",
                "Secretin",
                "Motilin",
                "Ghrelin",
                "Leptin",
                "Melanocyte-stimulating hormone (MSH)",
                "Beta-endorphin"
            ],
            "mismatches": [
                "Thyroid-stimulating hormone (TSH)",
                "Gastrin",
                "Cholecystokinin (CCK)",
                "Alpha-melanocyte-stimulating hormone (\u03b1-MSH)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Buserelin",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulins",
                "Leptin",
                "Long-Acting Insulin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocortins",
                "Melanocyte-Stimulating Hormones",
                "Motilin",
                "Nafarelin",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Proglucagon",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "alpha-Endorphin",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-MSH"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth Hormone (GH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid Stimulating Hormone (TSH)",
            "Luteinizing Hormone (LH)",
            "Follicle Stimulating Hormone (FSH)",
            "Prolactin (PRL)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Erythropoietin (EPO)",
            "Calcitonin (CT)",
            "Parathyroid Hormone (PTH)",
            "Gastrin (GAST)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Somatostatin (SST)",
            "Angiotensin II (ANGII)",
            "Atrial Natriuretic Peptide (ANP)",
            "Beta-Endorphin (END)",
            "Proopiomelanocortin (POMC)",
            "Neuropeptide Y (NPY)",
            "Substance P (SUBP)",
            "Ghrelin (GHRL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth Hormone",
                "Adrenocorticotropic Hormone",
                "Luteinizing Hormone",
                "Follicle Stimulating Hormone",
                "Prolactin",
                "Oxytocin",
                "Vasopressin",
                "Calcitonin",
                "Parathyroid Hormone",
                "Secretin",
                "Somatostatin",
                "Angiotensin II",
                "Ghrelin",
                "beta-Endorphin",
                "Pro-Opiomelanocortin"
            ],
            "mismatches": [
                "Thyroid Stimulating Hormone",
                "Erythropoietin",
                "Gastrin",
                "Cholecystokinin",
                "Atrial Natriuretic Peptide",
                "Neuropeptide Y",
                "Substance P"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensin III",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocortins",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Peptide YY",
                "Pro-Opiomelanocortin",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "alpha-Endorphin",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-Lipotropin",
                "gamma-Endorphin"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth hormone (GH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Calcitonin (CALCA)",
            "Parathyroid hormone (PTH)",
            "Thyroid-stimulating hormone (TSH)",
            "Follicle-stimulating hormone (FSH)",
            "Luteinizing hormone (LH)",
            "Prolactin (PRL)",
            "Melanocyte-stimulating hormone (MSH)",
            "Gastrin (GAST)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Ghrelin (GHRL)",
            "Leptin (LEP)",
            "Somatostatin (SST)",
            "Corticotropin-releasing hormone (CRH)",
            "Gonadotropin-releasing hormone (GnRH)",
            "Thyrotropin-releasing hormone (TRH)",
            "Atrial natriuretic peptide (NPPA)",
            "Brain natriuretic peptide (NPPB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Growth Hormone",
                "Adrenocorticotropic Hormone",
                "Oxytocin",
                "Vasopressins",
                "Calcitonin",
                "Parathyroid Hormone",
                "Follicle Stimulating Hormone",
                "Luteinizing Hormone",
                "Prolactin",
                "Melanocyte-Stimulating Hormones",
                "Gastrins",
                "Secretin",
                "Ghrelin",
                "Leptin",
                "Somatostatin",
                "Corticotropin-Releasing Hormone",
                "Gonadotropin-Releasing Hormone",
                "Thyrotropin-Releasing Hormone",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide"
            ],
            "mismatches": [
                "Thyroid-stimulating hormone (TSH)",
                "Cholecystokinin (CCK)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "C-Type Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocyte-Stimulating Hormones",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone",
                "beta-MSH"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Somatostatin (SST)",
            "Growth Hormone-Releasing Hormone (GHRH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Luteinizing Hormone (LH)",
            "Prolactin (PRL)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Calcitonin (CALCA)",
            "Parathyroid Hormone (PTH)",
            "Atrial Natriuretic Peptide (ANP)",
            "Brain Natriuretic Peptide (BNP)",
            "Gastrin (GAST)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Motilin (MLN)",
            "Ghrelin (GHRL)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Glucagon-like Peptide-1 (GLP-1)",
            "Pancreatic Polypeptide (PPY)",
            "Neuropeptide Y (NPY)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Somatostatin",
                "Growth Hormone-Releasing Hormone",
                "Adrenocorticotropic Hormone",
                "Follicle-Stimulating Hormone",
                "Luteinizing Hormone",
                "Prolactin",
                "Oxytocin",
                "Vasopressin",
                "Calcitonin",
                "Parathyroid Hormone",
                "Secretin",
                "Motilin",
                "Ghrelin",
                "Vasoactive Intestinal Peptide",
                "Pancreatic Polypeptide"
            ],
            "mismatches": [
                "Thyroid-Stimulating Hormone (TSH)",
                "Atrial Natriuretic Peptide (ANP)",
                "Brain Natriuretic Peptide (BNP)",
                "Gastrin (GAST)",
                "Cholecystokinin (CCK)",
                "Glucagon-like Peptide-1 (GLP-1)",
                "Neuropeptide Y (NPY)"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Arginine Vasopressin",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Islet Amyloid Polypeptide",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Motilin",
                "Nafarelin",
                "Natriuretic Peptides",
                "Oxytocin",
                "Pancreatic Polypeptide",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Peptide YY",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasoactive Intestinal Peptide",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Somatostatin (SST)",
            "Growth Hormone-Releasing Hormone (GHRH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Luteinizing Hormone (LH)",
            "Prolactin (PRL)",
            "Oxytocin (OXT)",
            "Antidiuretic Hormone (ADH)",
            "Angiotensin II (AGT)",
            "Bradykinin (BRD)",
            "Calcitonin (CALCA)",
            "Gastrin (GAST)",
            "Cholecystokinin (CCK)",
            "Secretin (SCT)",
            "Vasoactive Intestinal Peptide (VIP)",
            "Substance P (TAC1)",
            "Neuropeptide Y (NPY)",
            "Ghrelin (GHRL)",
            "Leptin (LEP)",
            "Parathyroid Hormone (PTH)",
            "Gonadotropin-Releasing Hormone (GnRH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Glucagon",
                "Somatostatin",
                "Growth Hormone-Releasing Hormone",
                "Adrenocorticotropic Hormone",
                "Follicle Stimulating Hormone",
                "Luteinizing Hormone",
                "Prolactin",
                "Oxytocin",
                "Angiotensin II",
                "Calcitonin",
                "Secretin",
                "Vasoactive Intestinal Peptide",
                "Ghrelin",
                "Leptin",
                "Parathyroid Hormone",
                "Gonadotropin-Releasing Hormone"
            ],
            "mismatches": [
                "Thyroid-Stimulating Hormone (TSH)",
                "Antidiuretic Hormone (ADH)",
                "Bradykinin (BRD)",
                "Gastrin (GAST)",
                "Cholecystokinin (CCK)",
                "Substance P (TAC1)",
                "Neuropeptide Y (NPY)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensins",
                "Bombesin",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulin Glargine",
                "Insulins",
                "Leptin",
                "Luteinizing Hormone",
                "Lypressin",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Peptide YY",
                "Procalcitonin",
                "Proglucagon",
                "Proinsulin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasoactive Intestinal Peptide",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Adrenocorticotropic Hormone (ACTH)",
            "Antidiuretic Hormone (ADH)",
            "Calcitonin",
            "Cholecystokinin (CCK)",
            "Corticotropin-Releasing Hormone (CRH)",
            "Erythropoietin (EPO)",
            "Follicle-Stimulating Hormone (FSH)",
            "Gastrin",
            "Ghrelin",
            "Glucagon",
            "Gonadotropin-Releasing Hormone (GnRH)",
            "Growth Hormone (GH)",
            "Insulin",
            "Leptin",
            "Luteinizing Hormone (LH)",
            "Melatonin",
            "Oxytocin",
            "Parathyroid Hormone (PTH)",
            "Prolactin (PRL)",
            "Secretin",
            "Somatostatin",
            "Thyrotropin-Releasing Hormone (TRH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Vasopressin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic Hormone",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Follicle-Stimulating Hormone",
                "Gastrin",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Insulin",
                "Leptin",
                "Luteinizing Hormone",
                "Oxytocin",
                "Parathyroid Hormone",
                "Prolactin",
                "Secretin",
                "Somatostatin",
                "Thyrotropin-Releasing Hormone",
                "Vasopressin"
            ],
            "mismatches": [
                "Antidiuretic Hormone (ADH)",
                "Cholecystokinin (CCK)",
                "Erythropoietin (EPO)",
                "Melatonin",
                "Thyroid-Stimulating Hormone (TSH)"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastric Inhibitory Polypeptide",
                "Gastrins",
                "Ghrelin",
                "Glucagon",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulins",
                "Leptin",
                "Long-Acting Insulin",
                "Luteinizing Hormone",
                "Lypressin",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Proglucagon",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone",
                "beta-Lipotropin"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Adrenocorticotropic hormone (ACTH)",
            "Angiotensin-converting enzyme (ACE) inhibitor",
            "Antidiuretic hormone (ADH)",
            "Calcitonin",
            "Calcitonin gene-related peptide (CGRP)",
            "Cholecystokinin (CCK)",
            "Chorionic gonadotropin (hCG)",
            "Corticotropin-releasing factor (CRF)",
            "Follicle-stimulating hormone (FSH)",
            "Growth hormone-releasing hormone (GHRH)",
            "Growth hormone (GH)",
            "Insulin-like growth factor 1 (IGF-1)",
            "Insulin",
            "Luteinizing hormone (LH)",
            "Melanocyte-stimulating hormone (MSH)",
            "Melanotropin",
            "Oxytocin",
            "Parathyroid hormone (PTH)",
            "Prolactin",
            "Somatostatin",
            "Somatotropin",
            "Thyrotropin-releasing hormone (TRH)",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic hormone (ACTH)",
                "Calcitonin",
                "Chorionic gonadotropin (hCG)",
                "Corticotropin-releasing factor (CRF)",
                "Follicle-stimulating hormone (FSH)",
                "Growth hormone-releasing hormone (GHRH)",
                "Growth hormone (GH)",
                "Insulin",
                "Luteinizing hormone (LH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Oxytocin",
                "Parathyroid hormone (PTH)",
                "Prolactin",
                "Somatostatin",
                "Thyrotropin-releasing hormone (TRH)"
            ],
            "mismatches": [
                "Angiotensin-converting enzyme (ACE) inhibitor",
                "Antidiuretic hormone (ADH)",
                "Calcitonin gene-related peptide (CGRP)",
                "Cholecystokinin (CCK)",
                "Insulin-like growth factor 1 (IGF-1)",
                "Melanotropin",
                "Somatotropin",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)"
            ],
            "true_referents": [
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensins",
                "Calcitonin",
                "Chorionic Gonadotropin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Follicle Stimulating Hormone",
                "Gastrins",
                "Ghrelin",
                "Gonadotropin-Releasing Hormone",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Human beta Subunit Chorionic Gonadotropin",
                "Insulin",
                "Insulins",
                "Long-Acting Insulin",
                "Luteinizing Hormone",
                "Lypressin",
                "Melanocortins",
                "Melanocyte-Stimulating Hormones",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Procalcitonin",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Vasopressins",
                "beta Subunit Luteinizing Hormone",
                "beta-MSH"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            "Adiponectin (ADIPOQ)",
            "Adrenocorticotropic hormone (ACTH)",
            "Aldosterone",
            "Angiotensinogen",
            "Antidiuretic hormone (ADH)",
            "Atrial natriuretic peptide (ANP)",
            "Calcitonin",
            "Corticotropin-releasing hormone (CRH)",
            "Galectin-13",
            "Ghrelin",
            "Growth hormone-releasing hormone (GHRH)",
            "Insulin",
            "Leptin",
            "Motilin",
            "Natriuretic peptides (NPs)",
            "Neuromedin U (NMU)",
            "Neuropeptide Y (NPY)",
            "Oxytocin",
            "Parathyroid hormone (PTH)",
            "Peptide YY (PYY)",
            "Relaxin",
            "Secretin",
            "Somatostatin",
            "Thyroid-stimulating hormone (TSH)",
            "Vasoactive intestinal peptide (VIP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adiponectin",
                "Adrenocorticotropic hormone",
                "Atrial natriuretic peptide",
                "Calcitonin",
                "Corticotropin-releasing hormone",
                "Ghrelin",
                "Growth hormone-releasing hormone",
                "Insulin",
                "Leptin",
                "Motilin",
                "Natriuretic peptides",
                "Oxytocin",
                "Parathyroid hormone",
                "Peptide YY",
                "Relaxin",
                "Secretin",
                "Somatostatin"
            ],
            "mismatches": [
                "Aldosterone",
                "Angiotensinogen",
                "Antidiuretic hormone",
                "Galectin-13",
                "Neuromedin U",
                "Neuropeptide Y",
                "Thyroid-stimulating hormone",
                "Vasoactive intestinal peptide"
            ],
            "true_referents": [
                "Adipokines",
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensins",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Buserelin",
                "C-Peptide",
                "C-Type Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Cosyntropin",
                "Ghrelin",
                "Granulins",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Growth Hormone",
                "Insulin",
                "Insulins",
                "Leptin",
                "Long-Acting Insulin",
                "Lypressin",
                "Motilin",
                "Nafarelin",
                "Natriuretic Peptides",
                "Oxytocin",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Peptide PHI",
                "Peptide YY",
                "Procalcitonin",
                "Progranulins",
                "Relaxin",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Terlipressin",
                "Thyrotropin",
                "Thyrotropin Alfa",
                "Thyrotropin-Releasing Hormone",
                "Urofollitropin",
                "Urotensins",
                "Vasopressins"
            ],
            "TP": 17,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "peptide hormone",
        "domain": "biology",
        "response": [
            {
                "Referent": "Adrenocorticotropic hormone (ACTH)",
                "Canonical name": "(ACTH)"
            },
            {
                "Referent": "Angiotensin II (Ang II)",
                "Canonical name": "(Ang II)"
            },
            {
                "Referent": "Atrial natriuretic factor (ANF)",
                "Canonical name": "(ANF)"
            },
            {
                "Referent": "Calcitonin (CT)",
                "Canonical name": "(CT)"
            },
            {
                "Referent": "Corticotropin-releasing hormone (CRH)",
                "Canonical name": "(CRH)"
            },
            {
                "Referent": "Erythropoietin (EPO)",
                "Canonical name": "(EPO)"
            },
            {
                "Referent": "Follitropin (FSH)",
                "Canonical name": "(FSH)"
            },
            {
                "Referent": "Follicle-stimulating hormone (FSH)",
                "Canonical name": "(FSH)"
            },
            {
                "Referent": "Glucagon (GC)",
                "Canonical name": "(GC)"
            },
            {
                "Referent": "Growth hormone (GH)",
                "Canonical name": "(GH)"
            },
            {
                "Referent": "Growth hormone-releasing hormone (GHRH)",
                "Canonical name": "(GHRH)"
            },
            {
                "Referent": "Insulin (INS)",
                "Canonical name": "(INS)"
            },
            {
                "Referent": "Insulin-like growth factor 1 (IGF-1)",
                "Canonical name": "(IGF-1)"
            },
            {
                "Referent": "Insulin-like growth factor 2 (IGF-2)",
                "Canonical name": "(IGF-2)"
            },
            {
                "Referent": "Insulin-like growth factor 3 (IGF-3)",
                "Canonical name": "(IGF-3)"
            },
            {
                "Referent": "Luteinizing hormone (LH)",
                "Canonical name": "(LH)"
            },
            {
                "Referent": "Melanocyte-stimulating hormone (MSH)",
                "Canonical name": "(MSH)"
            },
            {
                "Referent": "Parathyroid hormone (PTH)",
                "Canonical name": "(PTH)"
            },
            {
                "Referent": "Prolactin (PRL)",
                "Canonical name": "(PRL)"
            },
            {
                "Referent": "Proopiomelanocortin (POMC)",
                "Canonical name": "(POMC)"
            },
            {
                "Referent": "Prostaglandin F2\u03b1 (PGF2\u03b1)",
                "Canonical name": "(PGF2\u03b1)"
            },
            {
                "Referent": "Renin (REN)",
                "Canonical name": "(REN)"
            },
            {
                "Referent": "Somatostatin (SST)",
                "Canonical name": "(SST)"
            },
            {
                "Referent": "Testosterone (TES)",
                "Canonical name": "(TES)"
            },
            {
                "Referent": "Thyrotropin-releasing hormone (TRH)",
                "Canonical name": "(TRH)"
            },
            {
                "Referent": "Thyroid-stimulating hormone (TSH)",
                "Canonical name": "(TSH)"
            },
            {
                "Referent": "Thyroxine (T4)",
                "Canonical name": "(T4)"
            },
            {
                "Referent": "Vasopressin (VP)",
                "Canonical name": "(VP)"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic Hormone",
                "Angiotensin II",
                "Atrial Natriuretic Factor",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Follicle Stimulating Hormone",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Insulin",
                "Luteinizing Hormone",
                "Melanocyte-Stimulating Hormones",
                "Parathyroid Hormone",
                "Prolactin",
                "Pro-Opiomelanocortin",
                "Somatostatin",
                "Thyrotropin"
            ],
            "mismatches": [
                "Erythropoietin",
                "Insulin-like growth factor 1",
                "Insulin-like growth factor 2",
                "Insulin-like growth factor 3",
                "Prostaglandin F2\u03b1",
                "Renin",
                "Testosterone",
                "Thyrotropin-releasing hormone",
                "Thyroid-stimulating hormone",
                "Thyroxine",
                "Vasopressin"
            ],
            "true_referents": [
                "Adiponectin",
                "Adrenocorticotropic Hormone",
                "Angiotensin I",
                "Angiotensin II",
                "Angiotensin III",
                "Angiotensins",
                "Atrial Natriuretic Factor",
                "Brain Natriuretic Peptide",
                "Calcitonin",
                "Corticotropin-Releasing Hormone",
                "Follicle Stimulating Hormone",
                "Glucagon",
                "Growth Hormone",
                "Growth Hormone-Releasing Hormone",
                "Human Follicle Stimulating Hormone",
                "Human Growth Hormone",
                "Human Regular Insulin",
                "Insulin",
                "Insulins",
                "Leuprolide",
                "Luteinizing Hormone",
                "Melanocortins",
                "Melanocyte-Stimulating Hormones",
                "Natriuretic Peptides",
                "Parathyroid Hormone",
                "Parathyroid Hormone-Related Protein",
                "Pro-Opiomelanocortin",
                "Procalcitonin",
                "Proglucagon",
                "Prolactin",
                "Prolactin-Releasing Hormone",
                "Secretin",
                "Somatostatin",
                "Somatostatin-28",
                "Thyrotropin",
                "beta Subunit Follicle Stimulating Hormone",
                "beta Subunit Luteinizing Hormone",
                "beta-Endorphin",
                "beta-Lipotropin",
                "gamma-Endorphin"
            ],
            "TP": 17,
            "FP": 11,
            "FN": 0
        }
    }
]