[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Adrenaline (EPI)",
            "Dopamine",
            "Serotonin",
            "Melatonin",
            "Histamine",
            "Leptin",
            "Gastrin",
            "Gastrin-releasing peptide",
            "Somatostatin",
            "Vasoactive intestinal peptide",
            "Neuropeptide Y",
            "Bradykinin",
            "Angiotensin II",
            "Calcitonin gene-related peptide",
            "Neurotensin",
            "Substance P",
            "Enkephalin",
            "Dynorphin",
            "Endorphin",
            "Prolactin-releasing peptide",
            "Orexin",
            "Cholecystokinin",
            "Galanin",
            "Neuropeptide FF"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenaline (EPI)",
                "Dopamine",
                "Serotonin",
                "Melatonin",
                "Histamine",
                "Leptin",
                "Gastrin",
                "Gastrin-releasing peptide",
                "Somatostatin",
                "Vasoactive intestinal peptide",
                "Neuropeptide Y",
                "Bradykinin",
                "Angiotensin II",
                "Calcitonin gene-related peptide",
                "Neurotensin",
                "Substance P",
                "Enkephalin",
                "Dynorphin",
                "Endorphin",
                "Prolactin-releasing peptide",
                "Orexin",
                "Cholecystokinin",
                "Galanin",
                "Neuropeptide FF"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline (EPI)",
                "Angiotensin II",
                "Bradykinin",
                "Calcitonin gene-related peptide",
                "Cholecystokinin",
                "Dopamine",
                "Dynorphin",
                "Endorphin",
                "Enkephalin",
                "Galanin",
                "Gastrin",
                "Gastrin-releasing peptide",
                "Histamine",
                "Leptin",
                "Melatonin",
                "Neuropeptide FF",
                "Neuropeptide Y",
                "Neurotensin",
                "Orexin",
                "Prolactin-releasing peptide",
                "Serotonin",
                "Somatostatin",
                "Substance P",
                "Vasoactive intestinal peptide"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Adrenaline (Epinephrine)",
            "Noradrenaline (Norepinephrine)",
            "Dopamine",
            "Serotonin",
            "Melatonin",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Calcitonin",
            "Corticotropin-releasing hormone (CRH)",
            "Oxytocin",
            "Vasopressin (Antidiuretic Hormone, ADH)",
            "Glycine",
            "Gamma-aminobutyric acid (GABA)",
            "Histamine",
            "Glutamate",
            "Aspartate",
            "Endorphins",
            "Enkephalins",
            "Dynorphins",
            "Growth Hormone-Releasing Hormone (GHRH)",
            "Somatostatin",
            "Prolactin-Releasing Hormone (PRH)",
            "Somatostatin",
            "Gastrin-Releasing Peptide (GRP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenaline (Epinephrine)",
                "Noradrenaline (Norepinephrine)",
                "Dopamine",
                "Serotonin",
                "Melatonin",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Calcitonin",
                "Corticotropin-releasing hormone (CRH)",
                "Oxytocin",
                "Vasopressin (Antidiuretic Hormone, ADH)",
                "Glycine",
                "Gamma-aminobutyric acid (GABA)",
                "Histamine",
                "Glutamate",
                "Aspartate",
                "Endorphins",
                "Enkephalins",
                "Dynorphins",
                "Growth Hormone-Releasing Hormone (GHRH)",
                "Somatostatin",
                "Prolactin-Releasing Hormone (PRH)",
                "Gastrin-Releasing Peptide (GRP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline (Epinephrine)",
                "Aspartate",
                "Calcitonin",
                "Corticotropin-releasing hormone (CRH)",
                "Dopamine",
                "Dynorphins",
                "Endorphins",
                "Enkephalins",
                "Gamma-aminobutyric acid (GABA)",
                "Gastrin-Releasing Peptide (GRP)",
                "Glutamate",
                "Glycine",
                "Growth Hormone-Releasing Hormone (GHRH)",
                "Histamine",
                "Melatonin",
                "Noradrenaline (Norepinephrine)",
                "Oxytocin",
                "Prolactin-Releasing Hormone (PRH)",
                "Serotonin",
                "Somatostatin",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Vasopressin (Antidiuretic Hormone, ADH)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Adrenaline (Epinephrine)",
            "Noradrenaline (Norepinephrine)",
            "Dopamine",
            "Serotonin (5-HT)",
            "Histamine",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Melatonin",
            "Oxytocin",
            "Vasopressin",
            "Prostaglandins",
            "Leukotrienes",
            "Thromboxanes",
            "Melatonin",
            "GABA (Gamma-Aminobutyric Acid)",
            "Glutamate",
            "Acetylcholine",
            "Norepinephrine",
            "Tryptophan",
            "Tyrosine",
            "Phenylalanine",
            "Catecholamines",
            "Indoleamines",
            "Taurine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenaline (Epinephrine)",
                "Noradrenaline (Norepinephrine)",
                "Dopamine",
                "Serotonin (5-HT)",
                "Histamine",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Melatonin",
                "Oxytocin",
                "Vasopressin",
                "Prostaglandins",
                "Leukotrienes",
                "Thromboxanes",
                "GABA (Gamma-Aminobutyric Acid)",
                "Glutamate",
                "Acetylcholine",
                "Norepinephrine",
                "Tryptophan",
                "Tyrosine",
                "Phenylalanine",
                "Catecholamines",
                "Indoleamines",
                "Taurine"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylcholine",
                "Adrenaline (Epinephrine)",
                "Catecholamines",
                "Dopamine",
                "GABA (Gamma-Aminobutyric Acid)",
                "Glutamate",
                "Histamine",
                "Indoleamines",
                "Leukotrienes",
                "Melatonin",
                "Noradrenaline (Norepinephrine)",
                "Norepinephrine",
                "Oxytocin",
                "Phenylalanine",
                "Prostaglandins",
                "Serotonin (5-HT)",
                "Taurine",
                "Thromboxanes",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptophan",
                "Tyrosine",
                "Vasopressin"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Epinephrine (EPI)",
            "Norepinephrine (NE)",
            "Dopamine (DA)",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Serotonin (5-HT)",
            "Melatonin (MEL)",
            "Histamine (HIS)",
            "Cortisol (COR)",
            "Aldosterone (ALD)",
            "Progesterone (PROG)",
            "Testosterone (TEST)",
            "Estrogen (EST)",
            "Glucagon (GLUC)",
            "Insulin (INS)",
            "Growth Hormone-Releasing Hormone (GHRH)",
            "Somatostatin (SST)",
            "Prolactin (PRL)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Antidiuretic Hormone (ADH)",
            "Oxytocin (OT)",
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Epinephrine (EPI)",
                "Norepinephrine (NE)",
                "Dopamine (DA)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Serotonin (5-HT)",
                "Melatonin (MEL)",
                "Histamine (HIS)",
                "Cortisol (COR)",
                "Aldosterone (ALD)",
                "Progesterone (PROG)",
                "Testosterone (TEST)",
                "Estrogen (EST)",
                "Glucagon (GLUC)",
                "Insulin (INS)",
                "Growth Hormone-Releasing Hormone (GHRH)",
                "Somatostatin (SST)",
                "Prolactin (PRL)",
                "Adrenocorticotropic Hormone (ACTH)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Antidiuretic Hormone (ADH)",
                "Oxytocin (OT)",
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenocorticotropic Hormone (ACTH)",
                "Aldosterone (ALD)",
                "Antidiuretic Hormone (ADH)",
                "Cholecystokinin (CCK)",
                "Cortisol (COR)",
                "Dopamine (DA)",
                "Epinephrine (EPI)",
                "Estrogen (EST)",
                "Gastrin (GAS)",
                "Glucagon (GLUC)",
                "Growth Hormone-Releasing Hormone (GHRH)",
                "Histamine (HIS)",
                "Insulin (INS)",
                "Melatonin (MEL)",
                "Norepinephrine (NE)",
                "Oxytocin (OT)",
                "Progesterone (PROG)",
                "Prolactin (PRL)",
                "Serotonin (5-HT)",
                "Somatostatin (SST)",
                "Testosterone (TEST)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Epinephrine (Adrenaline)",
            "Norepinephrine (Noradrenaline)",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Melatonin",
            "Serotonin",
            "Dopamine",
            "Histamine",
            "Thyrotropin-Releasing Hormone (TRH)",
            "L-DOPA",
            "Gamma-Aminobutyric Acid (GABA)",
            "Tryptamine",
            "Indole-3-acetic acid (IAA)",
            "Octopamine",
            "Phenethylamine",
            "Diiodothyronine (T2)",
            "5-Hydroxytryptophan (5-HTP)",
            "3-Iodothyronamine (T1AM)",
            "Phenylethylamine (PEA)",
            "Tyramine",
            "Diiodotyrosine (DIT)",
            "Monoiodotyrosine (MIT)",
            "Ornithine",
            "Spermidine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Epinephrine (Adrenaline)",
                "Norepinephrine (Noradrenaline)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Melatonin",
                "Serotonin",
                "Dopamine",
                "Histamine",
                "Thyrotropin-Releasing Hormone (TRH)",
                "L-DOPA",
                "Gamma-Aminobutyric Acid (GABA)",
                "Tryptamine",
                "Indole-3-acetic acid (IAA)",
                "Octopamine",
                "Phenethylamine",
                "Diiodothyronine (T2)",
                "5-Hydroxytryptophan (5-HTP)",
                "3-Iodothyronamine (T1AM)",
                "Phenylethylamine (PEA)",
                "Tyramine",
                "Diiodotyrosine (DIT)",
                "Monoiodotyrosine (MIT)",
                "Ornithine",
                "Spermidine"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Iodothyronamine (T1AM)",
                "5-Hydroxytryptophan (5-HTP)",
                "Diiodothyronine (T2)",
                "Diiodotyrosine (DIT)",
                "Dopamine",
                "Epinephrine (Adrenaline)",
                "Gamma-Aminobutyric Acid (GABA)",
                "Histamine",
                "Indole-3-acetic acid (IAA)",
                "L-DOPA",
                "Melatonin",
                "Monoiodotyrosine (MIT)",
                "Norepinephrine (Noradrenaline)",
                "Octopamine",
                "Ornithine",
                "Phenethylamine",
                "Phenylethylamine (PEA)",
                "Serotonin",
                "Spermidine",
                "Thyrotropin-Releasing Hormone (TRH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptamine",
                "Tyramine"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Epinephrine (EPI)",
            "Norepinephrine (NE)",
            "Thyroid Hormone (TH)",
            "Serotonin (5-HT)",
            "Melatonin (MEL)",
            "Histamine (HIS)",
            "Dopamine (DA)",
            "Prolactin (PRL)",
            "Adrenaline (ADR)",
            "Tryptophan (TRP)",
            "Tyrosine (TYR)",
            "Glutamate (GLU)",
            "Glycine (GLY)",
            "Cortisol (CORT)",
            "Insulin (INS)",
            "Vasopressin (AVP)",
            "Oxytocin (OXT)",
            "Somatostatin (SST)",
            "Calcitonin (CT)",
            "Angiotensin II (ANG II)",
            "Gastrin (GAS)",
            "Cholecystokinin (CCK)",
            "Neuropeptide Y (NPY)",
            "Substance P (SP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Epinephrine (EPI)",
                "Norepinephrine (NE)",
                "Thyroid Hormone (TH)",
                "Serotonin (5-HT)",
                "Melatonin (MEL)",
                "Histamine (HIS)",
                "Dopamine (DA)",
                "Prolactin (PRL)",
                "Adrenaline (ADR)",
                "Tryptophan (TRP)",
                "Tyrosine (TYR)",
                "Glutamate (GLU)",
                "Glycine (GLY)",
                "Cortisol (CORT)",
                "Insulin (INS)",
                "Vasopressin (AVP)",
                "Oxytocin (OXT)",
                "Somatostatin (SST)",
                "Calcitonin (CT)",
                "Angiotensin II (ANG II)",
                "Gastrin (GAS)",
                "Cholecystokinin (CCK)",
                "Neuropeptide Y (NPY)",
                "Substance P (SP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline (ADR)",
                "Angiotensin II (ANG II)",
                "Calcitonin (CT)",
                "Cholecystokinin (CCK)",
                "Cortisol (CORT)",
                "Dopamine (DA)",
                "Epinephrine (EPI)",
                "Gastrin (GAS)",
                "Glutamate (GLU)",
                "Glycine (GLY)",
                "Histamine (HIS)",
                "Insulin (INS)",
                "Melatonin (MEL)",
                "Neuropeptide Y (NPY)",
                "Norepinephrine (NE)",
                "Oxytocin (OXT)",
                "Prolactin (PRL)",
                "Serotonin (5-HT)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Thyroid Hormone (TH)",
                "Tryptophan (TRP)",
                "Tyrosine (TYR)",
                "Vasopressin (AVP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Epinephrine (Adrenaline)",
            "Norepinephrine (Noradrenaline)",
            "Dopamine",
            "Serotonin",
            "Melatonin",
            "Histamine",
            "Glutamate",
            "GABA (Gamma-aminobutyric acid)",
            "Aspartate",
            "Glycine",
            "D-aspartic acid",
            "N-methyl-D-aspartate (NMDA)",
            "Phenethylamine",
            "Tyramine",
            "Tryptamine",
            "Serotonin (5-HT)",
            "Melanocyte-stimulating hormone (MSH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Thyrotropin-releasing hormone (TRH)",
            "Gonadotropin-releasing hormone (GnRH)",
            "Growth hormone-releasing hormone (GHRH)",
            "Corticotropin-releasing hormone (CRH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Epinephrine (Adrenaline)",
                "Norepinephrine (Noradrenaline)",
                "Dopamine",
                "Serotonin",
                "Melatonin",
                "Histamine",
                "Glutamate",
                "GABA (Gamma-aminobutyric acid)",
                "Aspartate",
                "Glycine",
                "D-aspartic acid",
                "N-methyl-D-aspartate (NMDA)",
                "Phenethylamine",
                "Tyramine",
                "Tryptamine",
                "Serotonin (5-HT)",
                "Melanocyte-stimulating hormone (MSH)",
                "Adrenocorticotropic hormone (ACTH)",
                "Thyrotropin-releasing hormone (TRH)",
                "Gonadotropin-releasing hormone (GnRH)",
                "Growth hormone-releasing hormone (GHRH)",
                "Corticotropin-releasing hormone (CRH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenocorticotropic hormone (ACTH)",
                "Aspartate",
                "Corticotropin-releasing hormone (CRH)",
                "D-aspartic acid",
                "Dopamine",
                "Epinephrine (Adrenaline)",
                "GABA (Gamma-aminobutyric acid)",
                "Glutamate",
                "Glycine",
                "Gonadotropin-releasing hormone (GnRH)",
                "Growth hormone-releasing hormone (GHRH)",
                "Histamine",
                "Melanocyte-stimulating hormone (MSH)",
                "Melatonin",
                "N-methyl-D-aspartate (NMDA)",
                "Norepinephrine (Noradrenaline)",
                "Phenethylamine",
                "Serotonin",
                "Serotonin (5-HT)",
                "Thyrotropin-releasing hormone (TRH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptamine",
                "Tyramine"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Insulin",
            "Epinephrine",
            "Oxytocin",
            "Thyroxine",
            "Melatonin",
            "Growth hormone",
            "Glucagon",
            "Adrenaline",
            "Serotonin",
            "Dopamine",
            "Norepinephrine",
            "Calcitonin",
            "Parathyroid hormone",
            "Vasopressin",
            "Angiotensin",
            "Leptin",
            "Gastrin",
            "Cholecystokinin",
            "Erythropoietin",
            "Cortisol",
            "Progesterone",
            "Estrogen",
            "Testosterone",
            "Thyroid-stimulating hormone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin",
                "Epinephrine",
                "Oxytocin",
                "Thyroxine",
                "Melatonin",
                "Growth hormone",
                "Glucagon",
                "Adrenaline",
                "Serotonin",
                "Dopamine",
                "Norepinephrine",
                "Calcitonin",
                "Parathyroid hormone",
                "Vasopressin",
                "Angiotensin",
                "Leptin",
                "Gastrin",
                "Cholecystokinin",
                "Erythropoietin",
                "Cortisol",
                "Progesterone",
                "Estrogen",
                "Testosterone",
                "Thyroid-stimulating hormone"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline",
                "Angiotensin",
                "Calcitonin",
                "Cholecystokinin",
                "Cortisol",
                "Dopamine",
                "Epinephrine",
                "Erythropoietin",
                "Estrogen",
                "Gastrin",
                "Glucagon",
                "Growth hormone",
                "Insulin",
                "Leptin",
                "Melatonin",
                "Norepinephrine",
                "Oxytocin",
                "Parathyroid hormone",
                "Progesterone",
                "Serotonin",
                "Testosterone",
                "Thyroid-stimulating hormone",
                "Thyroxine",
                "Vasopressin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Growth hormone (GH)",
            "Prolactin (PRL)",
            "Oxytocin (OT)",
            "Vasopressin (ADH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Luteinizing hormone (LH)",
            "Follicle-stimulating hormone (FSH)",
            "Erythropoietin (EPO)",
            "Parathyroid hormone (PTH)",
            "Somatostatin (SST)",
            "Epinephrine (EPI)",
            "Norepinephrine (NOR)",
            "Dopamine (DA)",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Melatonin (MEL)",
            "Serotonin (5-HT)",
            "Histamine (HIS)",
            "Calcitonin (CT)",
            "Beta-endorphin (END)",
            "Neuropeptide Y (NPY)",
            "Leptin (LEP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin (INS)",
                "Glucagon (GCG)",
                "Growth hormone (GH)",
                "Prolactin (PRL)",
                "Oxytocin (OT)",
                "Vasopressin (ADH)",
                "Adrenocorticotropic hormone (ACTH)",
                "Luteinizing hormone (LH)",
                "Follicle-stimulating hormone (FSH)",
                "Erythropoietin (EPO)",
                "Parathyroid hormone (PTH)",
                "Somatostatin (SST)",
                "Epinephrine (EPI)",
                "Norepinephrine (NOR)",
                "Dopamine (DA)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Melatonin (MEL)",
                "Serotonin (5-HT)",
                "Histamine (HIS)",
                "Calcitonin (CT)",
                "Beta-endorphin (END)",
                "Neuropeptide Y (NPY)",
                "Leptin (LEP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenocorticotropic hormone (ACTH)",
                "Beta-endorphin (END)",
                "Calcitonin (CT)",
                "Dopamine (DA)",
                "Epinephrine (EPI)",
                "Erythropoietin (EPO)",
                "Follicle-stimulating hormone (FSH)",
                "Glucagon (GCG)",
                "Growth hormone (GH)",
                "Histamine (HIS)",
                "Insulin (INS)",
                "Leptin (LEP)",
                "Luteinizing hormone (LH)",
                "Melatonin (MEL)",
                "Neuropeptide Y (NPY)",
                "Norepinephrine (NOR)",
                "Oxytocin (OT)",
                "Parathyroid hormone (PTH)",
                "Prolactin (PRL)",
                "Serotonin (5-HT)",
                "Somatostatin (SST)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Vasopressin (ADH)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Epinephrine (Adrenaline)",
            "Norepinephrine (Noradrenaline)",
            "Melatonin",
            "Serotonin",
            "Dopamine",
            "Histamine",
            "Tyramine",
            "Tryptamine",
            "3,5-Diiodothyronine (T2)",
            "Reverse T3 (rT3)",
            "Thyrotropin-releasing hormone (TRH)",
            "Oxytocin",
            "Vasopressin (ADH)",
            "Melanocyte-stimulating hormone (MSH)",
            "Thyroid-stimulating hormone (TSH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Prolactin",
            "Growth hormone (GH)",
            "Gastrin",
            "Secretin",
            "Cholecystokinin (CCK)",
            "Calcitonin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Epinephrine (Adrenaline)",
                "Norepinephrine (Noradrenaline)",
                "Melatonin",
                "Serotonin",
                "Dopamine",
                "Histamine",
                "Tyramine",
                "Tryptamine",
                "3,5-Diiodothyronine (T2)",
                "Reverse T3 (rT3)",
                "Thyrotropin-releasing hormone (TRH)",
                "Oxytocin",
                "Vasopressin (ADH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Thyroid-stimulating hormone (TSH)",
                "Adrenocorticotropic hormone (ACTH)",
                "Prolactin",
                "Growth hormone (GH)",
                "Gastrin",
                "Secretin",
                "Cholecystokinin (CCK)",
                "Calcitonin"
            ],
            "mismatches": [],
            "true_referents": [
                "3,5-Diiodothyronine (T2)",
                "Adrenocorticotropic hormone (ACTH)",
                "Calcitonin",
                "Cholecystokinin (CCK)",
                "Dopamine",
                "Epinephrine (Adrenaline)",
                "Gastrin",
                "Growth hormone (GH)",
                "Histamine",
                "Melanocyte-stimulating hormone (MSH)",
                "Melatonin",
                "Norepinephrine (Noradrenaline)",
                "Oxytocin",
                "Prolactin",
                "Reverse T3 (rT3)",
                "Secretin",
                "Serotonin",
                "Thyroid-stimulating hormone (TSH)",
                "Thyrotropin-releasing hormone (TRH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptamine",
                "Tyramine",
                "Vasopressin (ADH)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Epinephrine (EPI)",
            "Norepinephrine (NE)",
            "Dopamine (DA)",
            "Serotonin (5-HT)",
            "Melatonin",
            "Histamine",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Calcitonin",
            "Gastrin",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Ghrelin",
            "Leptin",
            "Insulin",
            "Glucagon",
            "Cortisol",
            "Aldosterone",
            "Testosterone",
            "Estradiol (E2)",
            "Progesterone",
            "Oxytocin",
            "Vasopressin (ADH)",
            "Thyroid-stimulating hormone (TSH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Epinephrine (EPI)",
                "Norepinephrine (NE)",
                "Dopamine (DA)",
                "Serotonin (5-HT)",
                "Melatonin",
                "Histamine",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Calcitonin",
                "Gastrin",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Ghrelin",
                "Leptin",
                "Insulin",
                "Glucagon",
                "Cortisol",
                "Aldosterone",
                "Testosterone",
                "Estradiol (E2)",
                "Progesterone",
                "Oxytocin",
                "Vasopressin (ADH)",
                "Thyroid-stimulating hormone (TSH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldosterone",
                "Calcitonin",
                "Cholecystokinin (CCK)",
                "Cortisol",
                "Dopamine (DA)",
                "Epinephrine (EPI)",
                "Estradiol (E2)",
                "Gastrin",
                "Ghrelin",
                "Glucagon",
                "Histamine",
                "Insulin",
                "Leptin",
                "Melatonin",
                "Norepinephrine (NE)",
                "Oxytocin",
                "Progesterone",
                "Secretin",
                "Serotonin (5-HT)",
                "Testosterone",
                "Thyroid-stimulating hormone (TSH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Vasopressin (ADH)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Melatonin (MLT)",
            "Epinephrine (EPI)",
            "Norepinephrine (NOR)",
            "Dopamine (DA)",
            "Serotonin (5-HT)",
            "Histamine (HIS)",
            "Thyroid hormones (T3, T4)",
            "Catecholamines",
            "Tryptamine",
            "Tyramine",
            "Phenethylamine",
            "Octopamine",
            "Thyroxine (T4)",
            "Triiodothyronine (T3)",
            "Reverse triiodothyronine (rT3)",
            "Thyroglobulin",
            "Calcitonin",
            "Parathyroid hormone (PTH)",
            "Angiotensin II",
            "Bradykinin",
            "Carnosine",
            "Gamma-Aminobutyric acid (GABA)",
            "Glutamate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Melatonin (MLT)",
                "Epinephrine (EPI)",
                "Norepinephrine (NOR)",
                "Dopamine (DA)",
                "Serotonin (5-HT)",
                "Histamine (HIS)",
                "Thyroid hormones (T3, T4)",
                "Catecholamines",
                "Tryptamine",
                "Tyramine",
                "Phenethylamine",
                "Octopamine",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Reverse triiodothyronine (rT3)",
                "Thyroglobulin",
                "Calcitonin",
                "Parathyroid hormone (PTH)",
                "Angiotensin II",
                "Bradykinin",
                "Carnosine",
                "Gamma-Aminobutyric acid (GABA)",
                "Glutamate"
            ],
            "mismatches": [],
            "true_referents": [
                "Angiotensin II",
                "Bradykinin",
                "Calcitonin",
                "Carnosine",
                "Catecholamines",
                "Dopamine (DA)",
                "Epinephrine (EPI)",
                "Gamma-Aminobutyric acid (GABA)",
                "Glutamate",
                "Histamine (HIS)",
                "Melatonin (MLT)",
                "Norepinephrine (NOR)",
                "Octopamine",
                "Parathyroid hormone (PTH)",
                "Phenethylamine",
                "Reverse triiodothyronine (rT3)",
                "Serotonin (5-HT)",
                "Thyroglobulin",
                "Thyroid hormones (T3, T4)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Tryptamine",
                "Tyramine"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Adrenaline (A)",
            "Dopamine (D)",
            "Epinephrine (E)",
            "Histamine (H)",
            "Melatonin (M)",
            "Norepinephrine (N)",
            "Serotonin (S)",
            "Thyroxine (T)",
            "Triiodothyronine (T3)",
            "Insulin (I)",
            "Glucagon (G)",
            "Cortisol (C)",
            "Estrogen (E2)",
            "Progesterone (P)",
            "Testosterone (T)",
            "Oxytocin (O)",
            "Vasopressin (V)",
            "Parathyroid Hormone (PTH)",
            "Calcitonin (CT)",
            "Prolactin (PRL)",
            "Growth Hormone (GH)",
            "Antidiuretic Hormone (ADH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Follicle-Stimulating Hormone (FSH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenaline (A)",
                "Dopamine (D)",
                "Epinephrine (E)",
                "Histamine (H)",
                "Melatonin (M)",
                "Norepinephrine (N)",
                "Serotonin (S)",
                "Thyroxine (T)",
                "Triiodothyronine (T3)",
                "Insulin (I)",
                "Glucagon (G)",
                "Cortisol (C)",
                "Estrogen (E2)",
                "Progesterone (P)",
                "Testosterone (T)",
                "Oxytocin (O)",
                "Vasopressin (V)",
                "Parathyroid Hormone (PTH)",
                "Calcitonin (CT)",
                "Prolactin (PRL)",
                "Growth Hormone (GH)",
                "Antidiuretic Hormone (ADH)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Follicle-Stimulating Hormone (FSH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline (A)",
                "Antidiuretic Hormone (ADH)",
                "Calcitonin (CT)",
                "Cortisol (C)",
                "Dopamine (D)",
                "Epinephrine (E)",
                "Estrogen (E2)",
                "Follicle-Stimulating Hormone (FSH)",
                "Glucagon (G)",
                "Growth Hormone (GH)",
                "Histamine (H)",
                "Insulin (I)",
                "Melatonin (M)",
                "Norepinephrine (N)",
                "Oxytocin (O)",
                "Parathyroid Hormone (PTH)",
                "Progesterone (P)",
                "Prolactin (PRL)",
                "Serotonin (S)",
                "Testosterone (T)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Thyroxine (T)",
                "Triiodothyronine (T3)",
                "Vasopressin (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Aromatic amino acid decarboxylase (AADC) is a hormone",
            "Tyrosine is derived from an amino acid",
            "Dopamine is a hormone",
            "Tyramine is derived from an amino acid",
            "Epinephrine is a hormone",
            "Norepinephrine is a hormone",
            "Serotonin is a hormone",
            "Histamine is a hormone",
            "Melatonin is a hormone",
            "Tryptophan is derived from an amino acid",
            "Catecholamine is a hormone",
            "Adrenaline is a hormone",
            "Noradrenaline is a hormone",
            "Phenylethylamine is derived from an amino acid",
            "Octopamine is a hormone",
            "Dopamine receptor agonist is a hormone",
            "Serotonin receptor agonist is a hormone",
            "Histamine receptor agonist is a hormone",
            "Melatonin receptor agonist is a hormone",
            "Tyrosine hydroxylase is a hormone",
            "Tyrosine transaminase is a hormone",
            "Tyrosine kinase is a hormone",
            "Tyrosine phosphatase is a hormone",
            "Tyrosine sulfotransferase is a hormone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aromatic amino acid decarboxylase (AADC) is a hormone",
                "Tyrosine is derived from an amino acid",
                "Dopamine is a hormone",
                "Tyramine is derived from an amino acid",
                "Epinephrine is a hormone",
                "Norepinephrine is a hormone",
                "Serotonin is a hormone",
                "Histamine is a hormone",
                "Melatonin is a hormone",
                "Tryptophan is derived from an amino acid",
                "Catecholamine is a hormone",
                "Adrenaline is a hormone",
                "Noradrenaline is a hormone",
                "Phenylethylamine is derived from an amino acid",
                "Octopamine is a hormone",
                "Dopamine receptor agonist is a hormone",
                "Serotonin receptor agonist is a hormone",
                "Histamine receptor agonist is a hormone",
                "Melatonin receptor agonist is a hormone",
                "Tyrosine hydroxylase is a hormone",
                "Tyrosine transaminase is a hormone",
                "Tyrosine kinase is a hormone",
                "Tyrosine phosphatase is a hormone",
                "Tyrosine sulfotransferase is a hormone"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenaline is a hormone",
                "Aromatic amino acid decarboxylase (AADC) is a hormone",
                "Catecholamine is a hormone",
                "Dopamine is a hormone",
                "Dopamine receptor agonist is a hormone",
                "Epinephrine is a hormone",
                "Histamine is a hormone",
                "Histamine receptor agonist is a hormone",
                "Melatonin is a hormone",
                "Melatonin receptor agonist is a hormone",
                "Noradrenaline is a hormone",
                "Norepinephrine is a hormone",
                "Octopamine is a hormone",
                "Phenylethylamine is derived from an amino acid",
                "Serotonin is a hormone",
                "Serotonin receptor agonist is a hormone",
                "Tryptophan is derived from an amino acid",
                "Tyramine is derived from an amino acid",
                "Tyrosine hydroxylase is a hormone",
                "Tyrosine is derived from an amino acid",
                "Tyrosine kinase is a hormone",
                "Tyrosine phosphatase is a hormone",
                "Tyrosine sulfotransferase is a hormone",
                "Tyrosine transaminase is a hormone"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            "Adrenocorticotropic hormone (ACTH)",
            "Thyroid-stimulating hormone (TSH)",
            "Triiodothyronine (T3)",
            "Thyroxine (T4)",
            "Melatonin",
            "Epinephrine (Adrenaline)",
            "Norepinephrine (Noradrenaline)",
            "Dopamine",
            "Serotonin (5-Hydroxytryptamine)",
            "Melanocyte-stimulating hormone (MSH)",
            "Oxytocin",
            "Vasopressin (Antidiuretic hormone)",
            "Glucagon-like peptide-1 (GLP-1)",
            "Gastrin",
            "Cholecystokinin (CCK)",
            "Secretin",
            "Motilin",
            "Ghrelin",
            "Adiponectin",
            "Leptin",
            "Neuromedin N",
            "Neuromedin U",
            "Kisspeptin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adrenocorticotropic hormone (ACTH)",
                "Thyroid-stimulating hormone (TSH)",
                "Triiodothyronine (T3)",
                "Thyroxine (T4)",
                "Melatonin",
                "Epinephrine (Adrenaline)",
                "Norepinephrine (Noradrenaline)",
                "Dopamine",
                "Serotonin (5-Hydroxytryptamine)",
                "Melanocyte-stimulating hormone (MSH)",
                "Oxytocin",
                "Vasopressin (Antidiuretic hormone)",
                "Glucagon-like peptide-1 (GLP-1)",
                "Gastrin",
                "Cholecystokinin (CCK)",
                "Secretin",
                "Motilin",
                "Ghrelin",
                "Adiponectin",
                "Leptin",
                "Neuromedin N",
                "Neuromedin U",
                "Kisspeptin"
            ],
            "mismatches": [],
            "true_referents": [
                "Adiponectin",
                "Adrenocorticotropic hormone (ACTH)",
                "Cholecystokinin (CCK)",
                "Dopamine",
                "Epinephrine (Adrenaline)",
                "Gastrin",
                "Ghrelin",
                "Glucagon-like peptide-1 (GLP-1)",
                "Kisspeptin",
                "Leptin",
                "Melanocyte-stimulating hormone (MSH)",
                "Melatonin",
                "Motilin",
                "Neuromedin N",
                "Neuromedin U",
                "Norepinephrine (Noradrenaline)",
                "Oxytocin",
                "Secretin",
                "Serotonin (5-Hydroxytryptamine)",
                "Thyroid-stimulating hormone (TSH)",
                "Thyroxine (T4)",
                "Triiodothyronine (T3)",
                "Vasopressin (Antidiuretic hormone)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "amino acid-derived hormone",
        "domain": "biology",
        "response": [
            {
                "referent": "insulin",
                "canonical name": "insulin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "glucagon",
                "canonical name": "glucagon",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "epinephrine",
                "canonical name": "epinephrine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "thyroid hormone",
                "canonical name": "thyroid hormone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "corticosteroid",
                "canonical name": "corticosteroid",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "estrogen",
                "canonical name": "estrogen",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "progesterone",
                "canonical name": "progesterone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "testosterone",
                "canonical name": "testosterone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "estriol",
                "canonical name": "estriol",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "hydrocortisone",
                "canonical name": "hydrocortisone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "thyroxine",
                "canonical name": "thyroxine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "dopamine",
                "canonical name": "dopamine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "serotonin",
                "canonical name": "serotonin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "histamine",
                "canonical name": "histamine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "oxytocin",
                "canonical name": "oxytocin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "vasopressin",
                "canonical name": "vasopressin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "growth hormone",
                "canonical name": "growth hormone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "insulin",
                "canonical name": "insulin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "glucagon",
                "canonical name": "glucagon",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "epinephrine",
                "canonical name": "epinephrine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "thyroid hormone",
                "canonical name": "thyroid hormone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "corticosteroid",
                "canonical name": "corticosteroid",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "estrogen",
                "canonical name": "estrogen",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "progesterone",
                "canonical name": "progesterone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "testosterone",
                "canonical name": "testosterone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "estriol",
                "canonical name": "estriol",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "hydrocortisone",
                "canonical name": "hydrocortisone",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "thyroxine",
                "canonical name": "thyroxine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "dopamine",
                "canonical name": "dopamine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "serotonin",
                "canonical name": "serotonin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "histamine",
                "canonical name": "histamine",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "oxytocin",
                "canonical name": "oxytocin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "vasopressin",
                "canonical name": "vasopressin",
                "description": "is derived from an amino acid"
            },
            {
                "referent": "growth hormone",
                "canonical name": "growth hormone",
                "description": "is derived from an amino acid"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "insulin",
                "glucagon",
                "epinephrine",
                "thyroid hormone",
                "corticosteroid",
                "estrogen",
                "progesterone",
                "testosterone",
                "estriol",
                "hydrocortisone",
                "thyroxine",
                "dopamine",
                "serotonin",
                "histamine",
                "oxytocin",
                "vasopressin",
                "growth hormone"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"referent\": \"corticosteroid\", \"canonical name\": \"corticosteroid\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"dopamine\", \"canonical name\": \"dopamine\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"epinephrine\", \"canonical name\": \"epinephrine\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"estriol\", \"canonical name\": \"estriol\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"estrogen\", \"canonical name\": \"estrogen\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"glucagon\", \"canonical name\": \"glucagon\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"growth hormone\", \"canonical name\": \"growth hormone\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"histamine\", \"canonical name\": \"histamine\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"hydrocortisone\", \"canonical name\": \"hydrocortisone\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"insulin\", \"canonical name\": \"insulin\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"oxytocin\", \"canonical name\": \"oxytocin\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"progesterone\", \"canonical name\": \"progesterone\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"serotonin\", \"canonical name\": \"serotonin\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"testosterone\", \"canonical name\": \"testosterone\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"thyroid hormone\", \"canonical name\": \"thyroid hormone\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"thyroxine\", \"canonical name\": \"thyroxine\", \"description\": \"is derived from an amino acid\"}",
                "{\"referent\": \"vasopressin\", \"canonical name\": \"vasopressin\", \"description\": \"is derived from an amino acid\"}"
            ],
            "TP": 17,
            "FP": 0,
            "FN": 0
        }
    }
]