[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Pentapeptide (P5)",
            "Acetyl-Leu-Leu-Gly-Leu-Leu-AMC (LLGLL-AMC)",
            "Leu-Leu-Gly-Leu-Leu (LLGLL)",
            "Val-Leu-Gly-Leu-Leu (VLGLL)",
            "Leu-Val-Gly-Leu-Leu (LVGLL)",
            "Leu-Leu-Gly-Val-Leu (LLGVL)",
            "Leu-Leu-Gly-Leu-Val (LLGLV)",
            "Leu-Leu-Val-Gly-Leu (LLVGL)",
            "Leu-Leu-Leu-Gly-Leu (LLLGL)",
            "Leu-Leu-Leu-Leu-Gly (LLLLG)",
            "Leu-Leu-Leu-Gly-Val (LLLGV)",
            "Leu-Leu-Leu-Leu-Leu (LLLLL)",
            "Val-Leu-Leu-Gly-Leu (VLLGL)",
            "Leu-Val-Leu-Gly-Leu (LVLLG)",
            "Leu-Leu-Val-Leu-Leu (LLVLL)",
            "Leu-Leu-Leu-Val-Leu (LLLVL)",
            "Leu-Leu-Leu-Leu-Val (LLLLV)",
            "Val-Leu-Leu-Leu-Leu (VLLLL)",
            "Leu-Val-Leu-Leu-Leu (LVLLL)",
            "Leu-Leu-Leu-Leu-Leu (LLLLL)",
            "Val-Leu-Leu-Leu-Val (VLLLV)",
            "Leu-Val-Leu-Leu-Val (LVLLV)",
            "Leu-Leu-Leu-Leu-Val (LLLVV)",
            "Val-Leu-Leu-Leu-Leu (VLLLL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Pentapeptide (P5)",
                "Acetyl-Leu-Leu-Gly-Leu-Leu-AMC (LLGLL-AMC)",
                "Leu-Leu-Gly-Leu-Leu (LLGLL)",
                "Val-Leu-Gly-Leu-Leu (VLGLL)",
                "Leu-Val-Gly-Leu-Leu (LVGLL)",
                "Leu-Leu-Gly-Val-Leu (LLGVL)",
                "Leu-Leu-Gly-Leu-Val (LLGLV)",
                "Leu-Leu-Val-Gly-Leu (LLVGL)",
                "Leu-Leu-Leu-Gly-Leu (LLLGL)",
                "Leu-Leu-Leu-Leu-Gly (LLLLG)",
                "Leu-Leu-Leu-Gly-Val (LLLGV)",
                "Leu-Leu-Leu-Leu-Leu (LLLLL)",
                "Val-Leu-Leu-Gly-Leu (VLLGL)",
                "Leu-Val-Leu-Gly-Leu (LVLLG)",
                "Leu-Leu-Val-Leu-Leu (LLVLL)",
                "Leu-Leu-Leu-Val-Leu (LLLVL)",
                "Leu-Leu-Leu-Leu-Val (LLLLV)",
                "Val-Leu-Leu-Leu-Leu (VLLLL)",
                "Val-Leu-Leu-Leu-Val (VLLLV)",
                "Leu-Val-Leu-Leu-Val (LVLLV)",
                "Leu-Leu-Leu-Leu-Val (LLLVV)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetyl-Leu-Leu-Gly-Leu-Leu-AMC (LLGLL-AMC)",
                "Leu-Leu-Gly-Leu-Leu (LLGLL)",
                "Leu-Leu-Gly-Leu-Val (LLGLV)",
                "Leu-Leu-Gly-Val-Leu (LLGVL)",
                "Leu-Leu-Leu-Gly-Leu (LLLGL)",
                "Leu-Leu-Leu-Gly-Val (LLLGV)",
                "Leu-Leu-Leu-Leu-Gly (LLLLG)",
                "Leu-Leu-Leu-Leu-Leu (LLLLL)",
                "Leu-Leu-Leu-Leu-Val (LLLLV)",
                "Leu-Leu-Leu-Leu-Val (LLLVV)",
                "Leu-Leu-Leu-Val-Leu (LLLVL)",
                "Leu-Leu-Val-Gly-Leu (LLVGL)",
                "Leu-Leu-Val-Leu-Leu (LLVLL)",
                "Leu-Val-Gly-Leu-Leu (LVGLL)",
                "Leu-Val-Leu-Gly-Leu (LVLLG)",
                "Leu-Val-Leu-Leu-Leu (LVLLL)",
                "Leu-Val-Leu-Leu-Val (LVLLV)",
                "Pentapeptide (P5)",
                "Val-Leu-Gly-Leu-Leu (VLGLL)",
                "Val-Leu-Leu-Gly-Leu (VLLGL)",
                "Val-Leu-Leu-Leu-Leu (VLLLL)",
                "Val-Leu-Leu-Leu-Val (VLLLV)"
            ],
            "TP": 21,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Glycylglycylglycylglycine (GGGG)",
            "Glycylglycylglycylalanine (GGGA)",
            "Glycylglycylglycylleucine (GGGL)",
            "Glycylglycylglycylvaline (GGGV)",
            "Glycylglycylglycylisoleucine (GGGI)",
            "Glycylglycylglycylphenylalanine (GGGP)",
            "Glycylglycylglycyltryptophan (GGGT)",
            "Glycylglycylglycylmethionine (GGGM)",
            "Glycylglycylglycylproline (GGGP)",
            "Glycylglycylglycylaspartic acid (GGGD)",
            "Glycylglycylglycylglutamic acid (GGGE)",
            "Glycylglycylglycylserine (GGGS)",
            "Glycylglycylglycylthreonine (GGGT)",
            "Glycylglycylglycylcysteine (GGGC)",
            "Glycylglycylglycyltyrosine (GGGY)",
            "Glycylglycylglycylhistidine (GGGH)",
            "Glycylglycylglycylarginine (GGGR)",
            "Glycylglycylglycyllysine (GGGK)",
            "Glycylglycylglycylasparagine (GGGN)",
            "Glycylglycylglycylglutamine (GGGQ)",
            "Alanylglycylglycylglycine (AGGG)",
            "Leucylglycylglycylglycine (LGFG)",
            "Valylglycylglycylglycine (VGFG)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycylglycylglycylglycine (GGGG)",
                "Glycylglycylglycylalanine (GGGA)",
                "Glycylglycylglycylleucine (GGGL)",
                "Glycylglycylglycylvaline (GGGV)",
                "Glycylglycylglycylisoleucine (GGGI)",
                "Glycylglycylglycylphenylalanine (GGGP)",
                "Glycylglycylglycyltryptophan (GGGT)",
                "Glycylglycylglycylmethionine (GGGM)",
                "Glycylglycylglycylproline (GGGP)",
                "Glycylglycylglycylaspartic acid (GGGD)",
                "Glycylglycylglycylglutamic acid (GGGE)",
                "Glycylglycylglycylserine (GGGS)",
                "Glycylglycylglycylthreonine (GGGT)",
                "Glycylglycylglycylcysteine (GGGC)",
                "Glycylglycylglycyltyrosine (GGGY)",
                "Glycylglycylglycylhistidine (GGGH)",
                "Glycylglycylglycylarginine (GGGR)",
                "Glycylglycylglycyllysine (GGGK)",
                "Glycylglycylglycylasparagine (GGGN)",
                "Glycylglycylglycylglutamine (GGGQ)",
                "Alanylglycylglycylglycine (AGGG)",
                "Leucylglycylglycylglycine (LGFG)",
                "Valylglycylglycylglycine (VGFG)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanylglycylglycylglycine (AGGG)",
                "Glycylglycylglycylalanine (GGGA)",
                "Glycylglycylglycylarginine (GGGR)",
                "Glycylglycylglycylasparagine (GGGN)",
                "Glycylglycylglycylaspartic acid (GGGD)",
                "Glycylglycylglycylcysteine (GGGC)",
                "Glycylglycylglycylglutamic acid (GGGE)",
                "Glycylglycylglycylglutamine (GGGQ)",
                "Glycylglycylglycylglycine (GGGG)",
                "Glycylglycylglycylhistidine (GGGH)",
                "Glycylglycylglycylisoleucine (GGGI)",
                "Glycylglycylglycylleucine (GGGL)",
                "Glycylglycylglycyllysine (GGGK)",
                "Glycylglycylglycylmethionine (GGGM)",
                "Glycylglycylglycylphenylalanine (GGGP)",
                "Glycylglycylglycylproline (GGGP)",
                "Glycylglycylglycylserine (GGGS)",
                "Glycylglycylglycylthreonine (GGGT)",
                "Glycylglycylglycyltryptophan (GGGT)",
                "Glycylglycylglycyltyrosine (GGGY)",
                "Glycylglycylglycylvaline (GGGV)",
                "Leucylglycylglycylglycine (LGFG)",
                "Valylglycylglycylglycine (VGFG)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Glycylglycylglycylglycylglycine (G5)",
            "Alanylalanylalanylalanylalanine (A5)",
            "Valylvalylvalylvalylvaline (V5)",
            "Leucylleucylleucylleucylleucine (L5)",
            "Isoleucylisoleucylisoleucylisoleucylisoleucine (I5)",
            "Serylserylserylserylserine (S5)",
            "Threonylthreonylthreonylthreonylthreonine (T5)",
            "Cysteinylcysteinylcysteinylcysteinylcysteine (C5)",
            "Methionylmethionylmethionylmethionylmethionine (M5)",
            "Prolylprolylprolylprolylproline (P5)",
            "Phenylalanylphenylalanylphenylalanylphenylalanylphenylalanine (F5)",
            "Tyrosyltyrosyltyrosyltyrosyltyrosine (Y5)",
            "Tryptophyltryptophyltryptophyltryptophyltryptophan (W5)",
            "Aspartylaspartylaspartylaspartylaspartate (D5)",
            "Glutamylglutamylglutamylglutamylglutamate (E5)",
            "Asparaginylasparaginylasparaginylasparaginylasparagine (N5)",
            "Glutaminylglutaminylglutaminylglutaminylglutamine (Q5)",
            "Arginylarginylarginylarginylarginine (R5)",
            "Lysyllysyllysyllysyllysine (K5)",
            "Histidylhistidylhistidylhistidylhistidine (H5)",
            "Glutathionylglutathionylglutathionylglutathionylglutathione (GSH5)",
            "Oxytocin",
            "Vasopressin",
            "Angiotensin II"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glycylglycylglycylglycylglycine (G5)",
                "Alanylalanylalanylalanylalanine (A5)",
                "Valylvalylvalylvalylvaline (V5)",
                "Leucylleucylleucylleucylleucine (L5)",
                "Isoleucylisoleucylisoleucylisoleucylisoleucine (I5)",
                "Serylserylserylserylserine (S5)",
                "Threonylthreonylthreonylthreonylthreonine (T5)",
                "Cysteinylcysteinylcysteinylcysteinylcysteine (C5)",
                "Methionylmethionylmethionylmethionylmethionine (M5)",
                "Prolylprolylprolylprolylproline (P5)",
                "Phenylalanylphenylalanylphenylalanylphenylalanylphenylalanine (F5)",
                "Tyrosyltyrosyltyrosyltyrosyltyrosine (Y5)",
                "Tryptophyltryptophyltryptophyltryptophyltryptophan (W5)",
                "Aspartylaspartylaspartylaspartylaspartate (D5)",
                "Glutamylglutamylglutamylglutamylglutamate (E5)",
                "Asparaginylasparaginylasparaginylasparaginylasparagine (N5)",
                "Glutaminylglutaminylglutaminylglutaminylglutamine (Q5)",
                "Arginylarginylarginylarginylarginine (R5)",
                "Lysyllysyllysyllysyllysine (K5)",
                "Histidylhistidylhistidylhistidylhistidine (H5)",
                "Glutathionylglutathionylglutathionylglutathionylglutathione (GSH5)",
                "Oxytocin",
                "Vasopressin",
                "Angiotensin II"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanylalanylalanylalanylalanine (A5)",
                "Angiotensin II",
                "Arginylarginylarginylarginylarginine (R5)",
                "Asparaginylasparaginylasparaginylasparaginylasparagine (N5)",
                "Aspartylaspartylaspartylaspartylaspartate (D5)",
                "Cysteinylcysteinylcysteinylcysteinylcysteine (C5)",
                "Glutaminylglutaminylglutaminylglutaminylglutamine (Q5)",
                "Glutamylglutamylglutamylglutamylglutamate (E5)",
                "Glutathionylglutathionylglutathionylglutathionylglutathione (GSH5)",
                "Glycylglycylglycylglycylglycine (G5)",
                "Histidylhistidylhistidylhistidylhistidine (H5)",
                "Isoleucylisoleucylisoleucylisoleucylisoleucine (I5)",
                "Leucylleucylleucylleucylleucine (L5)",
                "Lysyllysyllysyllysyllysine (K5)",
                "Methionylmethionylmethionylmethionylmethionine (M5)",
                "Oxytocin",
                "Phenylalanylphenylalanylphenylalanylphenylalanylphenylalanine (F5)",
                "Prolylprolylprolylprolylproline (P5)",
                "Serylserylserylserylserine (S5)",
                "Threonylthreonylthreonylthreonylthreonine (T5)",
                "Tryptophyltryptophyltryptophyltryptophyltryptophan (W5)",
                "Tyrosyltyrosyltyrosyltyrosyltyrosine (Y5)",
                "Valylvalylvalylvalylvaline (V5)",
                "Vasopressin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Pentapeptide Gly-Gly-Gly-Gly-Gly (Gly5)",
            "Angiotensin II (Ang II)",
            "Luteinizing hormone-releasing hormone (LHRH)",
            "Bradykinin",
            "Somatostatin",
            "Oxytocin",
            "Vasopressin (ADH)",
            "Melanocyte-stimulating hormone (MSH)",
            "Gonadotropin-releasing hormone (GnRH)",
            "Cholecystokinin-8 (CCK-8)",
            "Gastrin-5 (Gastrin-5)",
            "Neurotensin",
            "Substance P",
            "Endorphin beta",
            "Dermorphin",
            "Dynorphin A",
            "Leuenkephalin",
            "Met-enkephalin",
            "Bombesin",
            "Motilin",
            "Ghrelin",
            "Seladin-1",
            "Apidaecin 1b",
            "Defensin-1"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Pentapeptide Gly-Gly-Gly-Gly-Gly (Gly5)",
                "Angiotensin II (Ang II)",
                "Luteinizing hormone-releasing hormone (LHRH)",
                "Bradykinin",
                "Somatostatin",
                "Oxytocin",
                "Vasopressin (ADH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Gonadotropin-releasing hormone (GnRH)",
                "Cholecystokinin-8 (CCK-8)",
                "Gastrin-5 (Gastrin-5)",
                "Neurotensin",
                "Substance P",
                "Endorphin beta",
                "Dermorphin",
                "Dynorphin A",
                "Leuenkephalin",
                "Met-enkephalin",
                "Bombesin",
                "Motilin",
                "Ghrelin",
                "Seladin-1",
                "Apidaecin 1b",
                "Defensin-1"
            ],
            "mismatches": [],
            "true_referents": [
                "Angiotensin II (Ang II)",
                "Apidaecin 1b",
                "Bombesin",
                "Bradykinin",
                "Cholecystokinin-8 (CCK-8)",
                "Defensin-1",
                "Dermorphin",
                "Dynorphin A",
                "Endorphin beta",
                "Gastrin-5 (Gastrin-5)",
                "Ghrelin",
                "Gonadotropin-releasing hormone (GnRH)",
                "Leuenkephalin",
                "Luteinizing hormone-releasing hormone (LHRH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Met-enkephalin",
                "Motilin",
                "Neurotensin",
                "Oxytocin",
                "Pentapeptide Gly-Gly-Gly-Gly-Gly (Gly5)",
                "Seladin-1",
                "Somatostatin",
                "Substance P",
                "Vasopressin (ADH)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Oxytocin (OT)",
            "Vasopressin (AVP)",
            "Enkephalin (ENK)",
            "Angiotensin I (ANGI)",
            "Bradykinin (BK)",
            "Met-enkephalin (MENK)",
            "Leu-enkephalin (LENK)",
            "Neurotensin (NT)",
            "Pentapeptide B (PPB)",
            "Pentagastrin (PG)",
            "Thymopentin (TP5)",
            "Dermorphin (DMP)",
            "Deltorphin (DLT)",
            "Corticotropin-releasing factor (CRF)",
            "Substance P (SP)",
            "Gonadotropin-releasing hormone (GnRH)",
            "Somatostatin (SST)",
            "Insulin-like growth factor (IGF)",
            "Adrenocorticotropic hormone (ACTH)",
            "Endothelin (ET)",
            "Neuropeptide Y (NPY)",
            "Calcitonin gene-related peptide (CGRP)",
            "Vasoactive intestinal peptide (VIP)",
            "Gastrin-releasing peptide (GRP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Oxytocin (OT)",
                "Vasopressin (AVP)",
                "Enkephalin (ENK)",
                "Angiotensin I (ANGI)",
                "Bradykinin (BK)",
                "Met-enkephalin (MENK)",
                "Leu-enkephalin (LENK)",
                "Neurotensin (NT)",
                "Pentapeptide B (PPB)",
                "Pentagastrin (PG)",
                "Thymopentin (TP5)",
                "Dermorphin (DMP)",
                "Deltorphin (DLT)",
                "Corticotropin-releasing factor (CRF)",
                "Substance P (SP)",
                "Gonadotropin-releasing hormone (GnRH)",
                "Somatostatin (SST)",
                "Insulin-like growth factor (IGF)",
                "Adrenocorticotropic hormone (ACTH)",
                "Endothelin (ET)",
                "Neuropeptide Y (NPY)",
                "Calcitonin gene-related peptide (CGRP)",
                "Vasoactive intestinal peptide (VIP)",
                "Gastrin-releasing peptide (GRP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenocorticotropic hormone (ACTH)",
                "Angiotensin I (ANGI)",
                "Bradykinin (BK)",
                "Calcitonin gene-related peptide (CGRP)",
                "Corticotropin-releasing factor (CRF)",
                "Deltorphin (DLT)",
                "Dermorphin (DMP)",
                "Endothelin (ET)",
                "Enkephalin (ENK)",
                "Gastrin-releasing peptide (GRP)",
                "Gonadotropin-releasing hormone (GnRH)",
                "Insulin-like growth factor (IGF)",
                "Leu-enkephalin (LENK)",
                "Met-enkephalin (MENK)",
                "Neuropeptide Y (NPY)",
                "Neurotensin (NT)",
                "Oxytocin (OT)",
                "Pentagastrin (PG)",
                "Pentapeptide B (PPB)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Thymopentin (TP5)",
                "Vasoactive intestinal peptide (VIP)",
                "Vasopressin (AVP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Insulin (INS)",
            "Glucagon (GLU)",
            "Oxytocin (OXY)",
            "Vasopressin (AVP)",
            "Somatostatin (SST)",
            "Neuropeptide Y (NPY)",
            "Cholecystokinin (CCK)",
            "Substance P (SP)",
            "Calcitonin (CALC)",
            "Bradykinin (BK)",
            "Angiotensin II (ANG II)",
            "Enkephalins (ENK)",
            "Dynorphins (DYN)",
            "Ghrelin (GHRL)",
            "Corticotropin-releasing hormone (CRH)",
            "Adrenocorticotropic hormone (ACTH)",
            "Thyrotropin-releasing hormone (TRH)",
            "Luteinizing hormone-releasing hormone (LHRH)",
            "Melanocyte-stimulating hormone (MSH)",
            "Atrial natriuretic peptide (ANP)",
            "Neuropeptide FF (NPFF)",
            "Growth hormone-releasing hormone (GHRH)",
            "Prolactin-inhibiting hormone (PIH)",
            "Pancakine (PC)",
            "Neuropeptide B (NPB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Insulin (INS)",
                "Glucagon (GLU)",
                "Oxytocin (OXY)",
                "Vasopressin (AVP)",
                "Somatostatin (SST)",
                "Neuropeptide Y (NPY)",
                "Cholecystokinin (CCK)",
                "Substance P (SP)",
                "Calcitonin (CALC)",
                "Bradykinin (BK)",
                "Angiotensin II (ANG II)",
                "Enkephalins (ENK)",
                "Dynorphins (DYN)",
                "Ghrelin (GHRL)",
                "Corticotropin-releasing hormone (CRH)",
                "Adrenocorticotropic hormone (ACTH)",
                "Thyrotropin-releasing hormone (TRH)",
                "Luteinizing hormone-releasing hormone (LHRH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Atrial natriuretic peptide (ANP)",
                "Neuropeptide FF (NPFF)",
                "Growth hormone-releasing hormone (GHRH)",
                "Prolactin-inhibiting hormone (PIH)",
                "Pancakine (PC)"
            ],
            "mismatches": [
                "Neuropeptide B (NPB)"
            ],
            "true_referents": [
                "Adrenocorticotropic hormone (ACTH)",
                "Angiotensin II (ANG II)",
                "Atrial natriuretic peptide (ANP)",
                "Bradykinin (BK)",
                "Calcitonin (CALC)",
                "Cholecystokinin (CCK)",
                "Corticotropin-releasing hormone (CRH)",
                "Dynorphins (DYN)",
                "Enkephalins (ENK)",
                "Ghrelin (GHRL)",
                "Glucagon (GLU)",
                "Growth hormone-releasing hormone (GHRH)",
                "Insulin (INS)",
                "Luteinizing hormone-releasing hormone (LHRH)",
                "Melanocyte-stimulating hormone (MSH)",
                "Neuropeptide FF (NPFF)",
                "Neuropeptide Y (NPY)",
                "Oxytocin (OXY)",
                "Pancakine (PC)",
                "Prolactin-inhibiting hormone (PIH)",
                "Somatostatin (SST)",
                "Substance P (SP)",
                "Thyrotropin-releasing hormone (TRH)",
                "Vasopressin (AVP)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "1. Pentaglycine (Penta-G)",
            "2. Pentalanine (Penta-A)",
            "3. Pentavaline (Penta-V)",
            "4. Pentacysteine (Penta-C)",
            "5. Pentaproline (Penta-P)",
            "6. Pentahistidine (Penta-H)",
            "7. Penta-Arginine (Penta-R)",
            "8. Penta-Asparagine (Penta-N)",
            "9. Penta-Aspartic Acid (Penta-D)",
            "10. Penta-Glutamine (Penta-Q)",
            "11. Penta-Glutamic Acid (Penta-E)",
            "12. Penta-Glycine (Penta-G)",
            "13. Penta-Isoleucine (Penta-I)",
            "14. Penta-Leucine (Penta-L)",
            "15. Penta-Lysine (Penta-K)",
            "16. Penta-Methionine (Penta-M)",
            "17. Penta-Phenylalanine (Penta-F)",
            "18. Penta-Serine (Penta-S)",
            "19. Penta-Threonine (Penta-T)",
            "20. Penta-Tryptophan (Penta-W)",
            "21. Penta-Tyrosine (Penta-Y)",
            "22. Penta-Valine (Penta-V)",
            "23. Penta-Aspartic Acid (Penta-D)",
            "24. Penta-Glutamic Acid (Penta-E)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "1. Pentaglycine (Penta-G)",
                "2. Pentalanine (Penta-A)",
                "3. Pentavaline (Penta-V)",
                "4. Pentacysteine (Penta-C)",
                "5. Pentaproline (Penta-P)",
                "6. Pentahistidine (Penta-H)",
                "7. Penta-Arginine (Penta-R)",
                "8. Penta-Asparagine (Penta-N)",
                "9. Penta-Aspartic Acid (Penta-D)",
                "10. Penta-Glutamine (Penta-Q)",
                "11. Penta-Glutamic Acid (Penta-E)",
                "12. Penta-Glycine (Penta-G)",
                "13. Penta-Isoleucine (Penta-I)",
                "14. Penta-Leucine (Penta-L)",
                "15. Penta-Lysine (Penta-K)",
                "16. Penta-Methionine (Penta-M)",
                "17. Penta-Phenylalanine (Penta-F)",
                "18. Penta-Serine (Penta-S)",
                "19. Penta-Threonine (Penta-T)",
                "20. Penta-Tryptophan (Penta-W)",
                "21. Penta-Tyrosine (Penta-Y)",
                "22. Penta-Valine (Penta-V)",
                "23. Penta-Aspartic Acid (Penta-D)",
                "24. Penta-Glutamic Acid (Penta-E)"
            ],
            "mismatches": [],
            "true_referents": [
                "1. Pentaglycine (Penta-G)",
                "10. Penta-Glutamine (Penta-Q)",
                "11. Penta-Glutamic Acid (Penta-E)",
                "12. Penta-Glycine (Penta-G)",
                "13. Penta-Isoleucine (Penta-I)",
                "14. Penta-Leucine (Penta-L)",
                "15. Penta-Lysine (Penta-K)",
                "16. Penta-Methionine (Penta-M)",
                "17. Penta-Phenylalanine (Penta-F)",
                "18. Penta-Serine (Penta-S)",
                "19. Penta-Threonine (Penta-T)",
                "2. Pentalanine (Penta-A)",
                "20. Penta-Tryptophan (Penta-W)",
                "21. Penta-Tyrosine (Penta-Y)",
                "22. Penta-Valine (Penta-V)",
                "23. Penta-Aspartic Acid (Penta-D)",
                "24. Penta-Glutamic Acid (Penta-E)",
                "3. Pentavaline (Penta-V)",
                "4. Pentacysteine (Penta-C)",
                "5. Pentaproline (Penta-P)",
                "6. Pentahistidine (Penta-H)",
                "7. Penta-Arginine (Penta-R)",
                "8. Penta-Asparagine (Penta-N)",
                "9. Penta-Aspartic Acid (Penta-D)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Peptide (Pep)",
            "Protein (Prot)",
            "Oligopeptide (Olip)",
            "Polypeptide (Polip)",
            "Tripeptide (Trip)",
            "Tetrapeptide (Tetp)",
            "Hexapeptide (Hexp)",
            "Heptapeptide (Heptp)",
            "Octapeptide (Octp)",
            "Nonapeptide (Nonp)",
            "Decapeptide (Decp)",
            "Undecapeptide (Undecp)",
            "Dodecapeptide (Dodecp)",
            "Cyclic peptide (Cycp)",
            "Linear peptide (Linp)",
            "Dipeptide (Dip)",
            "Amino acid (AA)",
            "Peptide bond (Pepb)",
            "Polypeptide chain (Polipc)",
            "Protein subunit (Prots)",
            "Antimicrobial peptide (AMP)",
            "Signal peptide (Sigp)",
            "Neuropeptide (Neup)",
            "Glycopeptide (Glyp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Peptide (Pep)",
                "Protein (Prot)",
                "Oligopeptide (Olip)",
                "Polypeptide (Polip)",
                "Tripeptide (Trip)",
                "Tetrapeptide (Tetp)",
                "Hexapeptide (Hexp)",
                "Heptapeptide (Heptp)",
                "Octapeptide (Octp)",
                "Nonapeptide (Nonp)",
                "Decapeptide (Decp)",
                "Undecapeptide (Undecp)",
                "Dodecapeptide (Dodecp)",
                "Cyclic peptide (Cycp)",
                "Linear peptide (Linp)",
                "Dipeptide (Dip)",
                "Amino acid (AA)",
                "Peptide bond (Pepb)",
                "Polypeptide chain (Polipc)",
                "Protein subunit (Prots)",
                "Antimicrobial peptide (AMP)",
                "Signal peptide (Sigp)",
                "Neuropeptide (Neup)",
                "Glycopeptide (Glyp)"
            ],
            "mismatches": [],
            "true_referents": [
                "Amino acid (AA)",
                "Antimicrobial peptide (AMP)",
                "Cyclic peptide (Cycp)",
                "Decapeptide (Decp)",
                "Dipeptide (Dip)",
                "Dodecapeptide (Dodecp)",
                "Glycopeptide (Glyp)",
                "Heptapeptide (Heptp)",
                "Hexapeptide (Hexp)",
                "Linear peptide (Linp)",
                "Neuropeptide (Neup)",
                "Nonapeptide (Nonp)",
                "Octapeptide (Octp)",
                "Oligopeptide (Olip)",
                "Peptide (Pep)",
                "Peptide bond (Pepb)",
                "Polypeptide (Polip)",
                "Polypeptide chain (Polipc)",
                "Protein (Prot)",
                "Protein subunit (Prots)",
                "Signal peptide (Sigp)",
                "Tetrapeptide (Tetp)",
                "Tripeptide (Trip)",
                "Undecapeptide (Undecp)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Leu-enkephalin (LE)",
            "Met-enkephalin (ME)",
            "Thymopentin",
            "Syn-Ake (Pentapeptide-18)",
            "Argireline (Acetyl Hexapeptide-8)",
            "Pentapeptide-3",
            "Pentapeptide-17",
            "Pentapeptide-1",
            "Pentapeptide-5",
            "Pentapeptide-10",
            "Pentapeptide-6",
            "Pentapeptide-2",
            "Pentapeptide-9",
            "Pentapeptide-7",
            "Pentapeptide-4",
            "Pentapeptide-11",
            "Pentapeptide-12",
            "Pentapeptide-13",
            "Pentapeptide-14",
            "Pentapeptide-15",
            "Pentapeptide-16",
            "Pentapeptide-19",
            "Pentamidine",
            "Glutathione penta",
            "Carboxypentapeptide"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Leu-enkephalin (LE)",
                "Met-enkephalin (ME)",
                "Thymopentin",
                "Syn-Ake (Pentapeptide-18)",
                "Argireline (Acetyl Hexapeptide-8)",
                "Pentapeptide-3",
                "Pentapeptide-17",
                "Pentapeptide-1",
                "Pentapeptide-5",
                "Pentapeptide-10",
                "Pentapeptide-6",
                "Pentapeptide-2",
                "Pentapeptide-9",
                "Pentapeptide-7",
                "Pentapeptide-4",
                "Pentapeptide-11",
                "Pentapeptide-12",
                "Pentapeptide-13",
                "Pentapeptide-14",
                "Pentapeptide-15",
                "Pentapeptide-16",
                "Pentapeptide-19",
                "Pentamidine",
                "Glutathione penta"
            ],
            "mismatches": [
                "Carboxypentapeptide"
            ],
            "true_referents": [
                "Argireline (Acetyl Hexapeptide-8)",
                "Glutathione penta",
                "Leu-enkephalin (LE)",
                "Met-enkephalin (ME)",
                "Pentamidine",
                "Pentapeptide-1",
                "Pentapeptide-10",
                "Pentapeptide-11",
                "Pentapeptide-12",
                "Pentapeptide-13",
                "Pentapeptide-14",
                "Pentapeptide-15",
                "Pentapeptide-16",
                "Pentapeptide-17",
                "Pentapeptide-19",
                "Pentapeptide-2",
                "Pentapeptide-3",
                "Pentapeptide-4",
                "Pentapeptide-5",
                "Pentapeptide-6",
                "Pentapeptide-7",
                "Pentapeptide-9",
                "Syn-Ake (Pentapeptide-18)",
                "Thymopentin"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Enkephalin (ENK)",
            "Thymopentin (TP-5)",
            "Pentadactylin",
            "Opiorphin",
            "Kyotorphin",
            "Argireline (acetyl hexapeptide-3)",
            "Melanocyte-inhibiting factor (MIF-1)",
            "Proctolin",
            "Thymopoietin",
            "Enterostatin",
            "Urotensin II",
            "Bradykinin (1-5)",
            "Angiotensin (1-5)",
            "Substance P (1-5)",
            "Neuropeptide Y (1-5)",
            "Cholecystokinin (CCK-5)",
            "Thymopentin analog (TP-4)",
            "Leu-enkephalin",
            "Met-enkephalin",
            "Deltorphin I",
            "Dermorphin",
            "Endomorphin-1",
            "Endomorphin-2",
            "Casomorphin"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Enkephalin (ENK)",
                "Thymopentin (TP-5)",
                "Pentadactylin",
                "Opiorphin",
                "Kyotorphin",
                "Argireline (acetyl hexapeptide-3)",
                "Melanocyte-inhibiting factor (MIF-1)",
                "Proctolin",
                "Thymopoietin",
                "Enterostatin",
                "Urotensin II",
                "Bradykinin (1-5)",
                "Angiotensin (1-5)",
                "Substance P (1-5)",
                "Neuropeptide Y (1-5)",
                "Cholecystokinin (CCK-5)",
                "Thymopentin analog (TP-4)",
                "Leu-enkephalin",
                "Met-enkephalin",
                "Deltorphin I",
                "Dermorphin",
                "Endomorphin-1",
                "Endomorphin-2",
                "Casomorphin"
            ],
            "mismatches": [],
            "true_referents": [
                "Angiotensin (1-5)",
                "Argireline (acetyl hexapeptide-3)",
                "Bradykinin (1-5)",
                "Casomorphin",
                "Cholecystokinin (CCK-5)",
                "Deltorphin I",
                "Dermorphin",
                "Endomorphin-1",
                "Endomorphin-2",
                "Enkephalin (ENK)",
                "Enterostatin",
                "Kyotorphin",
                "Leu-enkephalin",
                "Melanocyte-inhibiting factor (MIF-1)",
                "Met-enkephalin",
                "Neuropeptide Y (1-5)",
                "Opiorphin",
                "Pentadactylin",
                "Proctolin",
                "Substance P (1-5)",
                "Thymopentin (TP-5)",
                "Thymopentin analog (TP-4)",
                "Thymopoietin",
                "Urotensin II"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Methionine enkephalin (Met-enkephalin)",
            "Leucine enkephalin (Leu-enkephalin)",
            "Thyrotropin-releasing hormone (TRH)",
            "Opioid growth factor (OGF)",
            "Endomorphin-1",
            "Endomorphin-2",
            "Vasoactive intestinal peptide fragment VIP(6-10)",
            "Kemptide",
            "Proctolin",
            "Phyllokinin",
            "Leucokinin I",
            "Leucopyrokinin",
            "Bradykinin fragment 1-5",
            "Angiotensin I fragment 1-5",
            "Angiotensin II fragment 1-5",
            "Substance P fragment 1-5",
            "Neurokinin A fragment 1-5",
            "Neurokinin B fragment 1-5",
            "Oxytocin fragment 1-5",
            "Vasopressin fragment 1-5",
            "Somatostatin fragment 1-5",
            "Neurotensin fragment 1-5",
            "Dynorphin A fragment 1-5",
            "Cholecystokinin fragment 26-30 (CCK-5)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methionine enkephalin (Met-enkephalin)",
                "Leucine enkephalin (Leu-enkephalin)",
                "Thyrotropin-releasing hormone (TRH)",
                "Opioid growth factor (OGF)",
                "Endomorphin-1",
                "Endomorphin-2",
                "Vasoactive intestinal peptide fragment VIP(6-10)",
                "Kemptide",
                "Proctolin",
                "Phyllokinin",
                "Leucokinin I",
                "Leucopyrokinin",
                "Bradykinin fragment 1-5",
                "Angiotensin I fragment 1-5",
                "Angiotensin II fragment 1-5",
                "Substance P fragment 1-5",
                "Neurokinin A fragment 1-5",
                "Neurokinin B fragment 1-5",
                "Oxytocin fragment 1-5",
                "Vasopressin fragment 1-5",
                "Somatostatin fragment 1-5",
                "Neurotensin fragment 1-5",
                "Dynorphin A fragment 1-5",
                "Cholecystokinin fragment 26-30 (CCK-5)"
            ],
            "mismatches": [],
            "true_referents": [
                "Angiotensin I fragment 1-5",
                "Angiotensin II fragment 1-5",
                "Bradykinin fragment 1-5",
                "Cholecystokinin fragment 26-30 (CCK-5)",
                "Dynorphin A fragment 1-5",
                "Endomorphin-1",
                "Endomorphin-2",
                "Kemptide",
                "Leucine enkephalin (Leu-enkephalin)",
                "Leucokinin I",
                "Leucopyrokinin",
                "Methionine enkephalin (Met-enkephalin)",
                "Neurokinin A fragment 1-5",
                "Neurokinin B fragment 1-5",
                "Neurotensin fragment 1-5",
                "Opioid growth factor (OGF)",
                "Oxytocin fragment 1-5",
                "Phyllokinin",
                "Proctolin",
                "Somatostatin fragment 1-5",
                "Substance P fragment 1-5",
                "Thyrotropin-releasing hormone (TRH)",
                "Vasoactive intestinal peptide fragment VIP(6-10)",
                "Vasopressin fragment 1-5"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Leu-enkephalin (YGGFL)",
            "Met-enkephalin (YGGFM)",
            "Kyotorphin (YGGFM)",
            "Dermorphin (YAFFXXRI)",
            "Endomorphin-1 (YPWF)",
            "Endomorphin-2 (YPFF)",
            "Deltorphin I (YAFXXI)",
            "Deltorphin II (YAFXXI)",
            "Casomorphin (YPFPGPI)",
            "alpha-Neoendorphin (YGGFLRKYP)",
            "beta-Neoendorphin (YGGFLRKYPK)",
            "Dynorphin A (YGGFLRRIRPKLK)",
            "Dynorphin B (YGGFLRRQFKVVT)",
            "Rimorphin (YGGFLRKYPK)",
            "Leumorphin (YGGFLRKYP)",
            "Endomorphin-1 analog (YPWFNH2)",
            "Endomorphin-2 analog (YPFFNH2)",
            "Kyotorphin analog (YGGFMNH2)",
            "Leu-enkephalin analog (YGGFLNH2)",
            "Met-enkephalin analog (YGGFMNH2)",
            "Dermorphin analog (YAFFXXRINH2)",
            "Deltorphin I analog (YAFXXINH2)",
            "Deltorphin II analog (YAFXXINH2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Leu-enkephalin (YGGFL)",
                "Met-enkephalin (YGGFM)",
                "Kyotorphin (YGGFM)",
                "Dermorphin (YAFFXXRI)",
                "Endomorphin-1 (YPWF)",
                "Endomorphin-2 (YPFF)",
                "Deltorphin I (YAFXXI)",
                "Deltorphin II (YAFXXI)",
                "Casomorphin (YPFPGPI)",
                "alpha-Neoendorphin (YGGFLRKYP)",
                "beta-Neoendorphin (YGGFLRKYPK)",
                "Dynorphin A (YGGFLRRIRPKLK)",
                "Dynorphin B (YGGFLRRQFKVVT)",
                "Rimorphin (YGGFLRKYPK)",
                "Leumorphin (YGGFLRKYP)",
                "Endomorphin-1 analog (YPWFNH2)",
                "Endomorphin-2 analog (YPFFNH2)",
                "Kyotorphin analog (YGGFMNH2)",
                "Leu-enkephalin analog (YGGFLNH2)",
                "Met-enkephalin analog (YGGFMNH2)",
                "Dermorphin analog (YAFFXXRINH2)",
                "Deltorphin I analog (YAFXXINH2)",
                "Deltorphin II analog (YAFXXINH2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Casomorphin (YPFPGPI)",
                "Deltorphin I (YAFXXI)",
                "Deltorphin I analog (YAFXXINH2)",
                "Deltorphin II (YAFXXI)",
                "Deltorphin II analog (YAFXXINH2)",
                "Dermorphin (YAFFXXRI)",
                "Dermorphin analog (YAFFXXRINH2)",
                "Dynorphin A (YGGFLRRIRPKLK)",
                "Dynorphin B (YGGFLRRQFKVVT)",
                "Endomorphin-1 (YPWF)",
                "Endomorphin-1 analog (YPWFNH2)",
                "Endomorphin-2 (YPFF)",
                "Endomorphin-2 analog (YPFFNH2)",
                "Kyotorphin (YGGFM)",
                "Kyotorphin analog (YGGFMNH2)",
                "Leu-enkephalin (YGGFL)",
                "Leu-enkephalin analog (YGGFLNH2)",
                "Leumorphin (YGGFLRKYP)",
                "Met-enkephalin (YGGFM)",
                "Met-enkephalin analog (YGGFMNH2)",
                "Rimorphin (YGGFLRKYPK)",
                "alpha-Neoendorphin (YGGFLRKYP)",
                "beta-Neoendorphin (YGGFLRKYPK)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Glutathione (GSH)",
            "Insulin (INS)",
            "Glucagon (GCG)",
            "Oxytocin (OXT)",
            "Vasopressin (AVP)",
            "Angiotensin II (ANG2)",
            "Bradykinin (BK)",
            "Substance P (SP)",
            "Somatostatin (SRIF)",
            "Enkephalin (ENK)",
            "Endorphin (END)",
            "Calcitonin (CT)",
            "Parathyroid Hormone (PTH)",
            "Thyroid-Stimulating Hormone (TSH)",
            "Follicle-Stimulating Hormone (FSH)",
            "Luteinizing Hormone (LH)",
            "Adrenocorticotropic Hormone (ACTH)",
            "Growth Hormone (GH)",
            "Prolactin (PRL)",
            "Melanocyte-Stimulating Hormone (MSH)",
            "Antidiuretic Hormone (ADH)",
            "Thyrotropin-Releasing Hormone (TRH)",
            "Gonadotropin-Releasing Hormone (GnRH)",
            "Corticotropin-Releasing Hormone (CRH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glutathione (GSH)",
                "Insulin (INS)",
                "Glucagon (GCG)",
                "Oxytocin (OXT)",
                "Vasopressin (AVP)",
                "Angiotensin II (ANG2)",
                "Bradykinin (BK)",
                "Substance P (SP)",
                "Somatostatin (SRIF)",
                "Enkephalin (ENK)",
                "Endorphin (END)",
                "Calcitonin (CT)",
                "Parathyroid Hormone (PTH)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Follicle-Stimulating Hormone (FSH)",
                "Luteinizing Hormone (LH)",
                "Adrenocorticotropic Hormone (ACTH)",
                "Growth Hormone (GH)",
                "Prolactin (PRL)",
                "Melanocyte-Stimulating Hormone (MSH)",
                "Antidiuretic Hormone (ADH)",
                "Thyrotropin-Releasing Hormone (TRH)",
                "Gonadotropin-Releasing Hormone (GnRH)",
                "Corticotropin-Releasing Hormone (CRH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Adrenocorticotropic Hormone (ACTH)",
                "Angiotensin II (ANG2)",
                "Antidiuretic Hormone (ADH)",
                "Bradykinin (BK)",
                "Calcitonin (CT)",
                "Corticotropin-Releasing Hormone (CRH)",
                "Endorphin (END)",
                "Enkephalin (ENK)",
                "Follicle-Stimulating Hormone (FSH)",
                "Glucagon (GCG)",
                "Glutathione (GSH)",
                "Gonadotropin-Releasing Hormone (GnRH)",
                "Growth Hormone (GH)",
                "Insulin (INS)",
                "Luteinizing Hormone (LH)",
                "Melanocyte-Stimulating Hormone (MSH)",
                "Oxytocin (OXT)",
                "Parathyroid Hormone (PTH)",
                "Prolactin (PRL)",
                "Somatostatin (SRIF)",
                "Substance P (SP)",
                "Thyroid-Stimulating Hormone (TSH)",
                "Thyrotropin-Releasing Hormone (TRH)",
                "Vasopressin (AVP)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Peptide hormone (Insulin)",
            "Peptide hormone (Ghrelin)",
            "Peptide hormone (Oxytocin)",
            "Peptide hormone (Vasopressin)",
            "Peptide hormone (Adrenocorticotropic hormone)",
            "Peptide hormone (Thyrotropin-releasing hormone)",
            "Peptide hormone (Growth hormone-releasing hormone)",
            "Peptide hormone (Growth hormone-inhibiting hormone)",
            "Peptide hormone (Melanocyte-stimulating hormone)",
            "Peptide hormone (Endorphin)",
            "Peptide hormone (Enkephalin)",
            "Peptide hormone (Somatostatin)",
            "Peptide hormone (Cholecystokinin)",
            "Peptide hormone (Gastrin-releasing peptide)",
            "Peptide hormone (Bombesin)",
            "Peptide hormone (Calcitonin)",
            "Peptide hormone (Parathyroid hormone)",
            "Peptide hormone (Adiponectin)",
            "Peptide hormone (Leptin)",
            "Peptide hormone (Insulin-like growth factor 1)",
            "Peptide hormone (Insulin-like growth factor 2)",
            "Peptide hormone (Fibroblast growth factor)",
            "Peptide hormone (Erythropoietin)",
            "Peptide hormone (Interleukin-1 beta)",
            "Peptide hormone (Interleukin-2)",
            "Peptide hormone (Interleukin-6)",
            "Peptide hormone (Tumor necrosis factor-alpha)",
            "Peptide hormone (Vasopressin-related peptide)",
            "Peptide hormone (Growth hormone-releasing peptide)",
            "Peptide hormone (Growth hormone-inhibiting peptide)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Peptide hormone (Insulin)",
                "Peptide hormone (Ghrelin)",
                "Peptide hormone (Oxytocin)",
                "Peptide hormone (Vasopressin)",
                "Peptide hormone (Adrenocorticotropic hormone)",
                "Peptide hormone (Thyrotropin-releasing hormone)",
                "Peptide hormone (Growth hormone-releasing hormone)",
                "Peptide hormone (Growth hormone-inhibiting hormone)",
                "Peptide hormone (Melanocyte-stimulating hormone)",
                "Peptide hormone (Endorphin)",
                "Peptide hormone (Enkephalin)",
                "Peptide hormone (Somatostatin)",
                "Peptide hormone (Cholecystokinin)",
                "Peptide hormone (Gastrin-releasing peptide)",
                "Peptide hormone (Bombesin)",
                "Peptide hormone (Calcitonin)",
                "Peptide hormone (Parathyroid hormone)",
                "Peptide hormone (Adiponectin)",
                "Peptide hormone (Leptin)",
                "Peptide hormone (Insulin-like growth factor 1)",
                "Peptide hormone (Insulin-like growth factor 2)",
                "Peptide hormone (Fibroblast growth factor)",
                "Peptide hormone (Erythropoietin)",
                "Peptide hormone (Interleukin-1 beta)"
            ],
            "mismatches": [
                "Peptide hormone (Interleukin-2)",
                "Peptide hormone (Interleukin-6)",
                "Peptide hormone (Tumor necrosis factor-alpha)",
                "Peptide hormone (Vasopressin-related peptide)",
                "Peptide hormone (Growth hormone-releasing peptide)",
                "Peptide hormone (Growth hormone-inhibiting peptide)"
            ],
            "true_referents": [
                "Peptide hormone (Adiponectin)",
                "Peptide hormone (Adrenocorticotropic hormone)",
                "Peptide hormone (Bombesin)",
                "Peptide hormone (Calcitonin)",
                "Peptide hormone (Cholecystokinin)",
                "Peptide hormone (Endorphin)",
                "Peptide hormone (Enkephalin)",
                "Peptide hormone (Erythropoietin)",
                "Peptide hormone (Fibroblast growth factor)",
                "Peptide hormone (Gastrin-releasing peptide)",
                "Peptide hormone (Ghrelin)",
                "Peptide hormone (Growth hormone-inhibiting hormone)",
                "Peptide hormone (Growth hormone-releasing hormone)",
                "Peptide hormone (Insulin)",
                "Peptide hormone (Insulin-like growth factor 1)",
                "Peptide hormone (Insulin-like growth factor 2)",
                "Peptide hormone (Interleukin-1 beta)",
                "Peptide hormone (Leptin)",
                "Peptide hormone (Melanocyte-stimulating hormone)",
                "Peptide hormone (Oxytocin)",
                "Peptide hormone (Parathyroid hormone)",
                "Peptide hormone (Somatostatin)",
                "Peptide hormone (Thyrotropin-releasing hormone)",
                "Peptide hormone (Vasopressin)"
            ],
            "TP": 24,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": [
            "Pentagastrin (Gastrin I)",
            "Pentadecapeptide (DPDPE)",
            "Enkephalin (Met-Enk)",
            "Leuenkephalin (Leu-Enk)",
            "Tyr-MIF-1",
            "Pentachlorphenol",
            "Pentachlorphenol (PCP)",
            "Glutathione",
            "Gly-His-Lys",
            "Lys-Gly-Asn",
            "Phe-Met-Arg-Phe-NH2",
            "Yatakamide A",
            "Yatakamide B",
            "Yatakamide C",
            "Yatakamide D",
            "Yatakamide E",
            "Yatakamide F",
            "Yatakamide G",
            "Yatakamide H",
            "Yatakamide I",
            "Yatakamide J",
            "Yatakamide K",
            "Yatakamide L"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Pentagastrin (Gastrin I)",
                "Pentadecapeptide (DPDPE)",
                "Enkephalin (Met-Enk)",
                "Leuenkephalin (Leu-Enk)",
                "Tyr-MIF-1",
                "Pentachlorphenol",
                "Pentachlorphenol (PCP)",
                "Glutathione",
                "Gly-His-Lys",
                "Lys-Gly-Asn",
                "Phe-Met-Arg-Phe-NH2",
                "Yatakamide A",
                "Yatakamide B",
                "Yatakamide C",
                "Yatakamide D",
                "Yatakamide E",
                "Yatakamide F",
                "Yatakamide G",
                "Yatakamide H",
                "Yatakamide I",
                "Yatakamide J",
                "Yatakamide K",
                "Yatakamide L"
            ],
            "mismatches": [],
            "true_referents": [
                "Enkephalin (Met-Enk)",
                "Glutathione",
                "Gly-His-Lys",
                "Leuenkephalin (Leu-Enk)",
                "Lys-Gly-Asn",
                "Pentachlorphenol",
                "Pentachlorphenol (PCP)",
                "Pentadecapeptide (DPDPE)",
                "Pentagastrin (Gastrin I)",
                "Phe-Met-Arg-Phe-NH2",
                "Tyr-MIF-1",
                "Yatakamide A",
                "Yatakamide B",
                "Yatakamide C",
                "Yatakamide D",
                "Yatakamide E",
                "Yatakamide F",
                "Yatakamide G",
                "Yatakamide H",
                "Yatakamide I",
                "Yatakamide J",
                "Yatakamide K",
                "Yatakamide L"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "pentapeptide",
        "domain": "chemistry",
        "response": "[\n        {\n            \"Referent\": \"insulin\" (ins),\n            \"Canonical name\": \"insulin\"\n        },\n        {\n            \"Referent\": \"oxytocin\" (oxytocin),\n            \"Canonical name\": \"oxytocin\"\n        },\n        {\n            \"Referent\": \"calcitonin\" (calcitonin),\n            \"Canonical name\": \"calcitonin\"\n        },\n        {\n            \"Referent\": \"parathyroid hormone\" (parathyroid hormone),\n            \"Canonical name\": \"parathyroid hormone\"\n        },\n        {\n            \"Referent\": \"thyroxine\" (thyroxine),\n            \"Canonical name\": \"thyroxine\"\n        },\n        {\n            \"Referent\": \"vitamin D3\" (vitamin D3),\n            \"Canonical name\": \"vitamin D3\"\n        },\n        {\n            \"Referent\": \"vitamin B12\" (vitamin B12),\n            \"Canonical name\": \"vitamin B12\"\n        },\n        {\n            \"Referent\": \"vitamin A\" (vitamin A),\n            \"Canonical name\": \"vitamin A\"\n        },\n        {\n            \"Referent\": \"vitamin K\" (vitamin K),\n            \"Canonical name\": \"vitamin K\"\n        },\n        {\n            \"Referent\": \"vitamin E\" (vitamin E),\n            \"Canonical name\": \"vitamin E\"\n        },\n        {\n            \"Referent\": \"vitamin C\" (vitamin C),\n            \"Canonical name\": \"vitamin C\"\n        },\n        {\n            \"Referent\": \"biotin\" (biotin),\n            \"Canonical name\": \"biotin\"\n        },\n        {\n            \"Referent\": \"folic acid\" (folic acid),\n            \"Canonical name\": \"folic acid\"\n        },\n        {\n            \"Referent\": \"riboflavin\" (riboflavin),\n            \"Canonical name\": \"riboflavin\"\n        },\n        {\n            \"Referent\": \"pantothenic acid\" (pantothenic acid),\n            \"Canonical name\": \"pantothenic acid\"\n        },\n        {\n            \"Referent\": \"pyridoxine\" (pyridoxine),\n            \"Canonical name\": \"pyridoxine\"\n        },\n        {\n            \"Referent\": \"thiamine\" (thiamine),\n            \"Canonical name\": \"thiamine\"\n        },\n        {\n            \"Referent\": \"cobalamin\" (cobalamin),\n            \"Canonical name\": \"cobalamin\"\n        },\n        {\n            \"Referent\": \"niacin\" (niacin),\n            \"Canonical name\": \"niacin\"\n        },\n        {\n            \"Referent\": \"biotin\" (biotin),\n            \"Canonical name\": \"biotin\"\n        },\n        {\n            \"Referent\": \"folic acid\" (folic acid),\n            \"Canonical name\": \"folic acid\"\n        },\n        {\n            \"Referent\": \"riboflavin\" (riboflavin),\n            \"Canonical name\": \"riboflavin\"\n        },\n        {\n            \"Referent\": \"pantothenic acid\" (pantothenic acid),\n            \"Canonical name\": \"pantothenic acid\"\n        },\n        {\n            \"Referent\": \"pyridoxine\" (pyridoxine),\n            \"Canonical name\": \"pyridoxine\"\n        },\n        {\n            \"Referent\": \"thiamine\" (thiamine),\n            \"Canonical name\": \"thiamine\"\n        },\n        {\n            \"Referent\": \"cobalamin\" (cobalamin),\n            \"Canonical name\": \"cobalamin\"\n        },\n        {\n            \"Referent\": \"niacin\" (niacin),\n            \"Canonical name\": \"niacin\"\n        },\n        {\n            \"Referent\": \"biotin\" (biotin),\n            \"Canonical name\": \"biotin\"\n        },\n        {\n            \"Referent\": \"folic acid\" (folic acid),\n            \"Canonical name\": \"folic acid\"\n        },\n        {\n            \"Referent\": \"riboflavin\" (riboflavin),\n            \"Canonical name\": \"riboflavin\"\n        },\n        {\n            \"Referent\": \"pantothenic acid\" (pantothenic acid),\n            \"Canonical name\": \"pantothenic acid\"\n        },\n        {\n            \"Referent\": \"pyridoxine\" (pyridoxine),\n            \"Canonical name\": \"pyridoxine\"\n        },\n        {\n            \"Referent\": \"thiamine\" (thiamine),\n            \"Canonical name\": \"thiamine\"\n        },\n        {\n            \"Referent\": \"cobalamin\" (cobalamin),\n            \"Canonical name\": \"cobalamin\"\n        },\n        {\n            \"Referent\": \"niacin\" (niacin),\n            \"Canonical name\": \"niacin\"\n        }\n    ]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]