[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Androstenedione (A-dione)",
            "Androsterone",
            "Corticosterone",
            "Dehydroepiandrosterone (DHEA)",
            "Estradiol (E2)",
            "Estriol",
            "Estrone",
            "Progesterone",
            "Testosterone",
            "Allopregnanolone",
            "Epinephrine",
            "Norepinephrine",
            "Cortisol (Hydrocortisone)",
            "Dexamethasone",
            "Prednisone",
            "11-Deoxycorticosterone",
            "Aldosterone",
            "Epiandrosterone",
            "Epiestriol",
            "Epiestriol-3-sulfate",
            "Estriol-3-sulfate",
            "Estriol-16-glycoside",
            "Estriol-16-sulfate",
            "Estrone-3-sulfate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Corticosterone",
                "Progesterone",
                "Dexamethasone",
                "Prednisone",
                "11-Deoxycorticosterone",
                "Aldosterone"
            ],
            "mismatches": [
                "Androstenedione (A-dione)",
                "Androsterone",
                "Dehydroepiandrosterone (DHEA)",
                "Estradiol (E2)",
                "Estriol",
                "Estrone",
                "Testosterone",
                "Allopregnanolone",
                "Epinephrine",
                "Norepinephrine",
                "Cortisol (Hydrocortisone)",
                "Epiandrosterone",
                "Epiestriol",
                "Epiestriol-3-sulfate",
                "Estriol-3-sulfate",
                "Estriol-16-glycoside",
                "Estriol-16-sulfate",
                "Estrone-3-sulfate"
            ],
            "true_referents": [
                "(5alpha)-3-oxocholestan-26-oic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "22,23-epoxy-5beta-campestan-3beta-ol",
                "24-Nor-5beta-cholane-3alpha,7alpha,12alpha-triol",
                "3a,20b-Pregnanediol",
                "3alpha,11beta,17alpha-Trihydroxy-5beta-pregnan-20-one",
                "3alpha,17alpha-Dihydroxy-5beta-pregnane-11,20-dione",
                "5alpha-dihydrodeoxycorticosterone",
                "5alpha-pregnane-3,20-dione",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Dexamethasone acetate",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "boldenone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "dexamethasone dipropionate",
                "estetrol",
                "gestonorone",
                "mineralocorticoid",
                "norethandrolone",
                "norgesterone",
                "oxandrolone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (CORT)",
            "Aldosterone (ALDO)",
            "Testosterone (TST)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Dehydroepiandrosterone (DHEA)",
            "Dihydrotestosterone (DHT)",
            "Androstenedione (A4)",
            "Epiandrosterone (EPIA)",
            "Pregnenolone (PREG)",
            "Corticosterone (CORTC)",
            "11-deoxycorticosterone (11-DOC)",
            "17\u03b1-hydroxyprogesterone (17-OHP)",
            "17\u03b2-estradiol (E2)",
            "Testosterone (T)",
            "Estrone (E1)",
            "Estriol (E3)",
            "Vitamin D3 (Cholecalciferol, D3)",
            "Cholesterol (CHOL)",
            "5\u03b1-Dihydrotestosterone (5\u03b1-DHT)",
            "11-Ketotestosterone (11-KT)",
            "Androstenediol (ANDIOL)",
            "Deoxycorticosterone (DOC)",
            "Cortisone (CORTONE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cortisol (CORT)",
                "Aldosterone (ALDO)",
                "Progesterone (P4)",
                "Corticosterone (CORTC)",
                "11-deoxycorticosterone (11-DOC)",
                "Cortisone (CORTONE)",
                "Deoxycorticosterone (DOC)"
            ],
            "mismatches": [
                "Testosterone (TST)",
                "Estradiol (E2)",
                "Dehydroepiandrosterone (DHEA)",
                "Dihydrotestosterone (DHT)",
                "Androstenedione (A4)",
                "Epiandrosterone (EPIA)",
                "Pregnenolone (PREG)",
                "17\u03b1-hydroxyprogesterone (17-OHP)",
                "17\u03b2-estradiol (E2)",
                "Testosterone (T)",
                "Estrone (E1)",
                "Estriol (E3)",
                "Vitamin D3 (Cholecalciferol, D3)",
                "Cholesterol (CHOL)",
                "5\u03b1-Dihydrotestosterone (5\u03b1-DHT)",
                "11-Ketotestosterone (11-KT)",
                "Androstenediol (ANDIOL)"
            ],
            "true_referents": [
                "11-Ketoprogesterone",
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "15alpha-Hydroxyprogesterone",
                "15beta-hydroxycyproterone acetate",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "1alpha,2beta,25-trihydroxy vitamin D3",
                "24-Nor-5beta-chol-22-ene-3alpha,12alpha-diol",
                "24-Nor-5beta-chol-22-ene-3alpha,6alpha-diol",
                "24-Nor-5beta-chol-22-ene-3alpha,7alpha,12alpha-triol",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "6beta,11alpha-Dihydroxyprogesterone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Deoxycorticosterone acetate",
                "Ethyltestosterone",
                "Pregn-5-ene-3beta,20alpha-diol",
                "aldosterone",
                "anabolic androgenic steroid",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "estetrol",
                "mineralocorticoid",
                "norgesterone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (Hydrocortisone)",
            "Aldosterone",
            "Testosterone",
            "Estradiol",
            "Progesterone",
            "Dihydrotestosterone (DHT)",
            "Corticosterone",
            "Estriol",
            "Dehydroepiandrosterone (DHEA)",
            "Cortisone",
            "Prednisone",
            "Prednisolone",
            "Methylprednisolone",
            "Estrone",
            "Androstenedione",
            "17\u03b1-Hydroxyprogesterone",
            "17\u03b1-Hydroxypregnenolone",
            "17\u03b1-Hydroxyprogesterone",
            "11-Deoxycortisol",
            "11-Deoxycorticosterone",
            "17\u03b1-Hydroxypregnenolone",
            "11-Deoxycortisone",
            "11-Dehydrocorticosterone",
            "11-Deoxycortisol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cortisol (Hydrocortisone)",
                "Aldosterone",
                "Progesterone",
                "Corticosterone",
                "Cortisone",
                "Prednisone",
                "Prednisolone",
                "11-Deoxycortisol",
                "11-Deoxycorticosterone",
                "11-Deoxycortisone"
            ],
            "mismatches": [
                "Testosterone",
                "Estradiol",
                "Dihydrotestosterone (DHT)",
                "Estriol",
                "Dehydroepiandrosterone (DHEA)",
                "Methylprednisolone",
                "Estrone",
                "Androstenedione",
                "17\u03b1-Hydroxyprogesterone",
                "17\u03b1-Hydroxypregnenolone",
                "11-Dehydrocorticosterone"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "21-Deoxycortisone",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "5alpha-dihydrodeoxycorticosterone",
                "Delta(1)-dihydrotestosterone",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "Prednisolone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "boldenone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "methylprednisolone acetate",
                "mineralocorticoid",
                "norethandrolone",
                "norgesterone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 10,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Cortisol (F)",
            "Aldosterone (Aldo)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (A4)",
            "Estriol (E3)",
            "Estrone (E1)",
            "Dihydrotestosterone (DHT)",
            "Prednisone",
            "Prednisolone",
            "Methylprednisolone",
            "Triamcinolone",
            "Betamethasone",
            "Hydrocortisone",
            "Spironolactone",
            "Mifepristone",
            "Fludrocortisone",
            "Drospirenone",
            "Norethindrone",
            "Levonorgestrel",
            "Medroxyprogesterone acetate (MPA)",
            "Danazol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Progesterone (P4)",
                "Cortisol (F)",
                "Aldosterone (Aldo)",
                "Prednisone",
                "Prednisolone",
                "Triamcinolone",
                "Betamethasone",
                "Fludrocortisone"
            ],
            "mismatches": [
                "Testosterone (T)",
                "Estradiol (E2)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione (A4)",
                "Estriol (E3)",
                "Estrone (E1)",
                "Dihydrotestosterone (DHT)",
                "Methylprednisolone",
                "Hydrocortisone",
                "Spironolactone",
                "Mifepristone",
                "Drospirenone",
                "Norethindrone",
                "Levonorgestrel",
                "Medroxyprogesterone acetate (MPA)",
                "Danazol"
            ],
            "true_referents": [
                "1-beta-hydroxymedroxyprogesterone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "6alpha-Chloro-17-acetoxyprogesterone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "Prednisolone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "betamethasone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "dexamethasone",
                "dexamethasone dipropionate",
                "estetrol",
                "fludrocortisone",
                "fludrocortisone acetate",
                "flumethasone",
                "hydrocortisone aceponate",
                "methylprednisolone acetate",
                "mineralocorticoid",
                "norethandrolone",
                "norgesterone",
                "norvinisterone",
                "oxandrolone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "stanozolol",
                "trenbolone",
                "triamcinolone",
                "triamcinolone acetonide"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (F)",
            "Aldosterone (Aldo)",
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Dihydrotestosterone (DHT)",
            "Estrone (E1)",
            "Estriol (E3)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (A4)",
            "Corticosterone (B)",
            "11-Deoxycortisol (S)",
            "11-Deoxycorticosterone (DOC)",
            "17-Hydroxyprogesterone (17-OHP)",
            "Androsterone",
            "Pregnenolone",
            "17\u03b1-Hydroxypregnenolone",
            "Dihydroepiandrosterone sulfate (DHEA-S)",
            "Tetrahydrocortisol (THF)",
            "Tetrahydrocortisone (THE)",
            "Tetrahydrodeoxycorticosterone (THDOC)",
            "5\u03b1-Dihydroprogesterone",
            "5-Androstenediol",
            "3\u03b2-Androstanediol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "cortisol",
                "aldosterone",
                "corticosterone",
                "11-deoxycorticosterone",
                "11-deoxycortisol",
                "progesterone",
                "tetrahydrocortisol"
            ],
            "mismatches": [
                "testosterone",
                "estradiol",
                "dihydrotestosterone",
                "estrone",
                "estriol",
                "dehydroepiandrosterone",
                "androstenedione",
                "androsterone",
                "pregnenolone",
                "17\u03b1-hydroxypregnenolone",
                "dihydroepiandrosterone sulfate",
                "tetrahydrocortisone",
                "tetrahydrodeoxycorticosterone",
                "5\u03b1-dihydroprogesterone",
                "5-androstenediol",
                "3\u03b2-androstanediol"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "21-Deoxycortisone",
                "3a,20b-Pregnanediol",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "3alpha,6alpha-Dihydroxy-5beta-pregnan-20-one",
                "4-Chloroprogesterone",
                "5a-Tetrahydrocorticosterone",
                "5a-Tetrahydrocortisol",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "6beta,11alpha-Dihydroxyprogesterone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Deoxycorticosterone acetate",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "estetrol",
                "fludrocortisone",
                "mineralocorticoid",
                "nandrolone",
                "norgesterone",
                "oxandrolone",
                "progesterone",
                "tetrahydrocortisol",
                "trenbolone"
            ],
            "TP": 7,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Cortisol (C)",
            "Aldosterone (A)",
            "Dihydrotestosterone (DHT)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (AD)",
            "Estrone (E1)",
            "Hydrocortisone (HC)",
            "Norethisterone (NET)",
            "Methyltestosterone (MT)",
            "Fludrocortisone (FLU)",
            "Prednisolone (PRED)",
            "Budesonide (BUD)",
            "Trenbolone (TREN)",
            "Tamoxifen (TAM)",
            "Anastrozole (ANA)",
            "Letrozole (LET)",
            "Clomiphene (CLO)",
            "Raloxifene (RAL)",
            "Finasteride (FIN)",
            "Spironolactone (SPI)",
            "Oxandrolone (OXA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Progesterone (P4)",
                "Cortisol (C)",
                "Aldosterone (A)",
                "Fludrocortisone (FLU)",
                "Prednisolone (PRED)",
                "Budesonide (BUD)",
                "Trenbolone (TREN)",
                "Oxandrolone (OXA)"
            ],
            "mismatches": [
                "Testosterone (T)",
                "Estradiol (E2)",
                "Dihydrotestosterone (DHT)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione (AD)",
                "Estrone (E1)",
                "Hydrocortisone (HC)",
                "Norethisterone (NET)",
                "Methyltestosterone (MT)",
                "Tamoxifen (TAM)",
                "Anastrozole (ANA)",
                "Letrozole (LET)",
                "Clomiphene (CLO)",
                "Raloxifene (RAL)",
                "Finasteride (FIN)",
                "Spironolactone (SPI)"
            ],
            "true_referents": [
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Delta(6)-prednisolone",
                "Ethyltestosterone",
                "Hydrocortisone cypionate",
                "aldosterone",
                "anabolic androgenic steroid",
                "betamethasone",
                "boldenone",
                "budesonide",
                "clobetasol",
                "clobetasone",
                "clocortolone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "fludrocortisone",
                "fludrocortisone acetate",
                "flumethasone",
                "hydrocortisone aceponate",
                "hydrocortisone caproate",
                "mineralocorticoid",
                "nandrolone",
                "norethandrolone",
                "norgesterone",
                "norvinisterone",
                "oxandrolone",
                "oxymetholone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (COR)",
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Aldosterone (ALD)",
            "Estriol (E3)",
            "Estrone (E1)",
            "Corticosterone (CORT)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (A4)",
            "17\u03b1-hydroxyprogesterone (17-OHP)",
            "11-Deoxycortisol (S)",
            "Prednisolone (PRED)",
            "Prednisone (PRD)",
            "Dexamethasone (DXM)",
            "Betamethasone (BET)",
            "Triamcinolone (TRI)",
            "Fludrocortisone (FLU)",
            "Hydrocortisone (HC)",
            "Beclomethasone (BECL)",
            "Methylprednisolone (MP)",
            "Budesonide (BUD)",
            "Ciclesonide (CIC)",
            "Fluticasone (FLU)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cortisol (COR)",
                "Progesterone (P4)",
                "Aldosterone (ALD)",
                "Corticosterone (CORT)",
                "11-Deoxycortisol (S)",
                "Prednisolone (PRED)",
                "Prednisone (PRD)",
                "Dexamethasone (DXM)",
                "Betamethasone (BET)",
                "Triamcinolone (TRI)",
                "Fludrocortisone (FLU)",
                "Hydrocortisone (HC)",
                "Beclomethasone (BECL)",
                "Budesonide (BUD)"
            ],
            "mismatches": [
                "Testosterone (T)",
                "Estradiol (E2)",
                "Estriol (E3)",
                "Estrone (E1)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione (A4)",
                "17\u03b1-hydroxyprogesterone (17-OHP)",
                "Methylprednisolone (MP)",
                "Ciclesonide (CIC)",
                "Fluticasone (FLU)"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "21-Deoxycortisone",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5alpha-pregnane-3,20-dione",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Delta(6)-prednisolone",
                "Delta(6)-prednisone",
                "Dexamethasone 21-(4-Pyridinecarboxylate)",
                "Dexamethasone acetate",
                "Ethyltestosterone",
                "Hydrocortisone cypionate",
                "aldosterone",
                "anabolic androgenic steroid",
                "beclomethasone",
                "beclomethasone dipropionate",
                "betamethasone",
                "betamethasone butyrate propionate",
                "budesonide",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "dexamethasone dipropionate",
                "estetrol",
                "fludrocortisone",
                "fludrocortisone acetate",
                "flumethasone",
                "fluocinolone acetonide",
                "hydrocortisone aceponate",
                "hydrocortisone caproate",
                "methylprednisolone aceponate",
                "methylprednisolone acetate",
                "mineralocorticoid",
                "norgesterone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone",
                "triamcinolone",
                "triamcinolone acetonide"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol",
            "Testosterone",
            "Estradiol",
            "Progesterone",
            "Aldosterone",
            "Dexamethasone",
            "Prednisone",
            "Androstenedione",
            "Dihydrotestosterone",
            "Dehydroepiandrosterone",
            "Corticosterone",
            "Estrone",
            "Fludrocortisone",
            "Methylprednisolone",
            "Medroxyprogesterone",
            "17\u03b1-Hydroxyprogesterone",
            "17\u03b2-Estradiol",
            "17\u03b1-Estradiol",
            "17\u03b2-Dihydroequilin",
            "17\u03b1-Dihydroequilin",
            "17\u03b1-Dihydroequilenin",
            "17\u03b2-Dihydroequilenin",
            "17\u03b2-Estradiol 3-sulfate",
            "17\u03b1-Estradiol 3-sulfate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cortisol",
                "Progesterone",
                "Aldosterone",
                "Dexamethasone",
                "Prednisone",
                "Corticosterone",
                "Fludrocortisone"
            ],
            "mismatches": [
                "Testosterone",
                "Estradiol",
                "Androstenedione",
                "Dihydrotestosterone",
                "Dehydroepiandrosterone",
                "Estrone",
                "Methylprednisolone",
                "Medroxyprogesterone",
                "17\u03b1-Hydroxyprogesterone",
                "17\u03b2-Estradiol",
                "17\u03b1-Estradiol",
                "17\u03b2-Dihydroequilin",
                "17\u03b1-Dihydroequilin",
                "17\u03b1-Dihydroequilenin",
                "17\u03b2-Dihydroequilenin",
                "17\u03b2-Estradiol 3-sulfate",
                "17\u03b1-Estradiol 3-sulfate"
            ],
            "true_referents": [
                "1-beta-hydroxymedroxyprogesterone",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "17-Hydroxy-5alpha,17alpha-pregn-1-en-3-one",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "3a,20b-Pregnanediol",
                "3alpha,17alpha-Dihydroxy-5beta-pregnane-11,20-dione",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "Delta(1)-dihydrotestosterone",
                "Dexamethasone acetate",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "boldenone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "dexamethasone dipropionate",
                "fludrocortisone",
                "fludrocortisone acetate",
                "flumethasone",
                "methylprednisolone acetate",
                "mineralocorticoid",
                "norgesterone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (Hydrocortisone)",
            "Aldosterone",
            "Testosterone",
            "Estradiol (E2)",
            "Progesterone",
            "Estrone",
            "Estriol",
            "Dehydroepiandrosterone (DHEA)",
            "Dihydrotestosterone (DHT)",
            "Prednisone",
            "Prednisolone",
            "Methylprednisolone",
            "Budesonide",
            "Fluticasone",
            "Androstenedione",
            "Estranediol",
            "Etonogestrel",
            "Nandrolone",
            "Oxandrolone",
            "Tibolone",
            "Androsterone",
            "Etiocholanolone",
            "Corticosterone",
            "17-Hydroxyprogesterone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aldosterone",
                "Progesterone",
                "Prednisone",
                "Prednisolone",
                "Budesonide",
                "Nandrolone",
                "Oxandrolone",
                "Corticosterone"
            ],
            "mismatches": [
                "Cortisol (Hydrocortisone)",
                "Testosterone",
                "Estradiol (E2)",
                "Estrone",
                "Estriol",
                "Dehydroepiandrosterone (DHEA)",
                "Dihydrotestosterone (DHT)",
                "Methylprednisolone",
                "Fluticasone",
                "Androstenedione",
                "Estranediol",
                "Etonogestrel",
                "Tibolone",
                "Androsterone",
                "Etiocholanolone",
                "17-Hydroxyprogesterone"
            ],
            "true_referents": [
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "17beta-hydroxyandrostan-1,3-dione",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "Alphaxolone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "Prednisolone acetate",
                "Pregnane-3,3-diol",
                "aldosterone",
                "anabolic androgenic steroid",
                "betamethasone",
                "boldenone",
                "budesonide",
                "clocortolone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "estetrol",
                "fludrocortisone",
                "flumethasone",
                "medrogestone",
                "metholone",
                "methylprednisolone acetate",
                "mineralocorticoid",
                "nandrolone",
                "nomegestrol",
                "norethandrolone",
                "norgesterone",
                "oxandrolone",
                "oxymetholone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone",
                "triamcinolone"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (CORT)",
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Aldosterone (ALDO)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (A4)",
            "Corticosterone (CORT)",
            "Estrone (E1)",
            "Estriol (E3)",
            "11-Deoxycorticosterone (DOC)",
            "17\u03b1-Hydroxyprogesterone (17-OHP)",
            "Dihydrotestosterone (DHT)",
            "Pregnenolone",
            "17\u03b1-Hydroxypregnenolone",
            "Androsterone",
            "11-Deoxycortisol",
            "Cortisone",
            "Dexamethasone (DEX)",
            "Prednisolone",
            "Hydrocortisone",
            "Cholecalciferol (Vitamin D3)",
            "Ergocalciferol (Vitamin D2)",
            "Calcitriol (1,25-dihydroxyvitamin D3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cortisol (CORT)",
                "Aldosterone (ALDO)",
                "Corticosterone (CORT)",
                "11-Deoxycorticosterone (DOC)",
                "11-Deoxycortisol",
                "Cortisone",
                "Dexamethasone (DEX)",
                "Prednisolone"
            ],
            "mismatches": [
                "Testosterone (T)",
                "Estradiol (E2)",
                "Progesterone (P4)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione (A4)",
                "Estrone (E1)",
                "Estriol (E3)",
                "17\u03b1-Hydroxyprogesterone (17-OHP)",
                "Dihydrotestosterone (DHT)",
                "Pregnenolone",
                "17\u03b1-Hydroxypregnenolone",
                "Androsterone",
                "Hydrocortisone",
                "Cholecalciferol (Vitamin D3)",
                "Ergocalciferol (Vitamin D2)",
                "Calcitriol (1,25-dihydroxyvitamin D3)"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "1alpha,2beta,25-trihydroxy vitamin D3",
                "21-Deoxycortisone",
                "24-Nor-5beta-chol-22-ene-3alpha,7alpha,12alpha-triol",
                "3alpha,17alpha-Dihydroxy-5beta-pregnane-11,20-dione",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Deoxycorticosterone acetate",
                "Dexamethasone acetate",
                "Ethyltestosterone",
                "Prednisolone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "dexamethasone dipropionate",
                "estetrol",
                "fludrocortisone",
                "hydrocortisone aceponate",
                "mineralocorticoid",
                "nandrolone",
                "norgesterone",
                "oxandrolone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Testosterone",
            "Estradiol (E2)",
            "Progesterone",
            "Cortisol",
            "Aldosterone",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione",
            "Corticosterone",
            "11-Deoxycorticosterone",
            "11-Deoxycortisol",
            "Estrone (E1)",
            "Estriol (E3)",
            "Dihydrotestosterone (DHT)",
            "Pregnenolone",
            "17\u03b1-Hydroxypregnenolone",
            "17\u03b1-Hydroxyprogesterone",
            "Dehydroepiandrosterone sulfate (DHEA-S)",
            "Androstenediol",
            "Androsterone",
            "Epiandrosterone",
            "Allopregnanolone",
            "Tetrahydrocorticosterone",
            "Tetrahydrocortisol",
            "Tetrahydrocortisone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Progesterone",
                "Cortisol",
                "Aldosterone",
                "Corticosterone",
                "11-Deoxycorticosterone",
                "11-Deoxycortisol",
                "Tetrahydrocorticosterone",
                "Tetrahydrocortisol"
            ],
            "mismatches": [
                "Testosterone",
                "Estradiol (E2)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione",
                "Estrone (E1)",
                "Estriol (E3)",
                "Dihydrotestosterone (DHT)",
                "Pregnenolone",
                "17\u03b1-Hydroxypregnenolone",
                "17\u03b1-Hydroxyprogesterone",
                "Dehydroepiandrosterone sulfate (DHEA-S)",
                "Androstenediol",
                "Androsterone",
                "Epiandrosterone",
                "Allopregnanolone",
                "Tetrahydrocortisone"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "21-Deoxycortisone",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "5a-Tetrahydrocorticosterone",
                "5a-Tetrahydrocortisol",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "estetrol",
                "fludrocortisone",
                "mineralocorticoid",
                "nandrolone",
                "norgesterone",
                "oxandrolone",
                "progesterone",
                "tetrahydrocortisol",
                "trenbolone"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Cortisol (C21H30O5)",
            "Aldosterone (C21H28O5)",
            "Testosterone (C19H28O2)",
            "Estradiol (C18H24O2)",
            "Progesterone (C21H30O2)",
            "Dehydroepiandrosterone (DHEA, C19H28O2)",
            "Androstenedione (C19H26O2)",
            "Pregnenolone (C21H32O2)",
            "Corticosterone (C21H30O4)",
            "Estrone (C18H22O2)",
            "Dihydrotestosterone (DHT, C19H30O2)",
            "Estriol (C18H24O3)",
            "Dexamethasone (C22H29FO5)",
            "Prednisone (C21H26O5)",
            "Hydrocortisone (C21H30O5)",
            "Fludrocortisone (C21H29FO6)",
            "Betamethasone (C22H29FO6)",
            "Methylprednisolone (C22H30O5)",
            "Triamcinolone (C21H27FO6)",
            "Prednisolone (C21H28O5)",
            "Danazol (C22H27NO2)",
            "Stanozolol (C21H32N2O)",
            "Oxandrolone (C19H30O3)",
            "Nandrolone (C18H26O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aldosterone (C21H28O5)",
                "Progesterone (C21H30O2)",
                "Corticosterone (C21H30O4)",
                "Dexamethasone (C22H29FO5)",
                "Prednisone (C21H26O5)",
                "Fludrocortisone (C21H29FO6)",
                "Betamethasone (C22H29FO6)",
                "Triamcinolone (C21H27FO6)",
                "Prednisolone (C21H28O5)",
                "Stanozolol (C21H32N2O)",
                "Oxandrolone (C19H30O3)",
                "Nandrolone (C18H26O2)"
            ],
            "mismatches": [
                "Cortisol (C21H30O5)",
                "Testosterone (C19H28O2)",
                "Estradiol (C18H24O2)",
                "Dehydroepiandrosterone (DHEA, C19H28O2)",
                "Androstenedione (C19H26O2)",
                "Pregnenolone (C21H32O2)",
                "Estrone (C18H22O2)",
                "Dihydrotestosterone (DHT, C19H30O2)",
                "Estriol (C18H24O3)",
                "Hydrocortisone (C21H30O5)",
                "Methylprednisolone (C22H30O5)",
                "Danazol (C22H27NO2)"
            ],
            "true_referents": [
                "(3a,5b,6a)- 17-(acetyloxy)-3-hydroxy-6-methyl- Pregnan-20-one",
                "11beta,21-Dihydroxypregn-4-ene-3,20-dione 21-acetate",
                "16-a-Hydroxypregnenolone",
                "16alpha,17-Dihydroxypregn-4-ene-3,20-dione cyclic acetal with 2-furyl methyl ketone",
                "16alpha,17-Isopropylidenedioxy-6alpha-methylprogesterone",
                "17,20,21-trihydroxypregn-1-ene-3,11-dione",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "18-hydroxypregna-1,4,20-trien-3-one",
                "18-oxocortisol",
                "21-Acetyloxy-17-hydroxypregna-1,4-diene-3,20-dione",
                "21-Deoxycortisone",
                "3alpha,6alpha-Dihydroxy-5beta-pregnan-20-one",
                "3beta,14beta-dihydroxy-5beta-pregnane-20-one",
                "5a-Tetrahydrocortisol",
                "5alpha-dihydrodeoxycorticosterone",
                "6alpha-Chloro-17-acetoxyprogesterone",
                "9alpha-Fluoro-6alpha-methylprednisolone 21-acetate",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Delta(6)-prednisolone",
                "Delta(6)-prednisone",
                "Dexamethasone 21-(4-Pyridinecarboxylate)",
                "Dexamethasone acetate",
                "Ethyltestosterone",
                "Prednisolone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "betamethasone",
                "corticosteroid hormone",
                "corticosterone",
                "dexamethasone",
                "fludrocortisone",
                "fludrocortisone acetate",
                "flumethasone",
                "methylprednisolone acetate",
                "nandrolone",
                "norethandrolone",
                "oxandrolone",
                "prednisolone",
                "prednisone",
                "progesterone",
                "stanozolol",
                "tetrahydrocortisol",
                "trenbolone",
                "triamcinolone",
                "triamcinolone acetonide"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Testosterone (T)",
            "Estradiol (E2)",
            "Progesterone (P4)",
            "Cortisol (CORT)",
            "Aldosterone (ALDO)",
            "Dehydroepiandrosterone (DHEA)",
            "Androstenedione (A4)",
            "Dihydrotestosterone (DHT)",
            "Estriol (E3)",
            "Estrone (E1)",
            "Corticosterone (B)",
            "11-Deoxycortisol (S)",
            "Pregnenolone (PREG)",
            "17\u03b1-Hydroxypregnenolone (17-OHPREG)",
            "17\u03b1-Hydroxyprogesterone (17-OHP4)",
            "11-Deoxycorticosterone (DOC)",
            "Cholecalciferol (Vitamin D3)",
            "Ergocalciferol (Vitamin D2)",
            "Calcitriol (1,25(OH)2D3)",
            "Calcidiol (25(OH)D3)",
            "Androstanediol (3\u03b1-Diol)",
            "Androstanedione (3\u03b2-Diol)",
            "Androsterone (ADT)",
            "Etiocholanolone (ETIO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Progesterone (P4)",
                "Cortisol (CORT)",
                "Aldosterone (ALDO)",
                "Corticosterone (B)",
                "11-Deoxycortisol (S)",
                "11-Deoxycorticosterone (DOC)"
            ],
            "mismatches": [
                "Testosterone (T)",
                "Estradiol (E2)",
                "Dehydroepiandrosterone (DHEA)",
                "Androstenedione (A4)",
                "Dihydrotestosterone (DHT)",
                "Estriol (E3)",
                "Estrone (E1)",
                "Pregnenolone (PREG)",
                "17\u03b1-Hydroxypregnenolone (17-OHPREG)",
                "17\u03b1-Hydroxyprogesterone (17-OHP4)",
                "Cholecalciferol (Vitamin D3)",
                "Ergocalciferol (Vitamin D2)",
                "Calcitriol (1,25(OH)2D3)",
                "Calcidiol (25(OH)D3)",
                "Androstanediol (3\u03b1-Diol)",
                "Androstanedione (3\u03b2-Diol)",
                "Androsterone (ADT)",
                "Etiocholanolone (ETIO)"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17-Hydroxy-19-nor-17alpha-pregn-5(10)-en-3-one",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "1alpha,2beta,25-trihydroxy vitamin D3",
                "21-Deoxycortisone",
                "24-Nor-5beta-chol-22-ene-3alpha,12alpha-diol",
                "24-Nor-5beta-chol-22-ene-3alpha,7alpha,12alpha-triol",
                "3alpha,17alpha-Dihydroxy-5beta-pregnane-11,20-dione",
                "3beta-Hydroxypregn-5-ene",
                "4-Chloroprogesterone",
                "5alpha-Pregnane-3alpha,20alpha-diol",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "Alphaxolone",
                "C21-steroid hormone",
                "Delta(1)-dihydrotestosterone",
                "Deoxycorticosterone acetate",
                "Ethyltestosterone",
                "Pregn-5-ene-3beta,20alpha-diol",
                "Pregnane-3,3-diol",
                "aldosterone",
                "anabolic androgenic steroid",
                "clocortolone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "estetrol",
                "mineralocorticoid",
                "norgesterone",
                "oxandrolone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Androstenedione (AD)",
            "Androstenediol",
            "Androstenol",
            "Androstenone",
            "Aldosterone (ALDO)",
            "Allopregnanolone",
            "Cholesterol",
            "Cortisol (CORT)",
            "Cortisone",
            "Dehydroepiandrosterone (DHEA)",
            "Estradiol (E2)",
            "Estrone (E1)",
            "Estriol",
            "Estrogen",
            "Progesterone (P4)",
            "Pregnenolone",
            "Testosterone (T)",
            "11-Deoxycortisol",
            "17-Hydroxyprogesterone",
            "17-Hydroxypregnenolone",
            "Allobetahyroxysterol",
            "Betamethasone",
            "Cholecalciferol",
            "Cortisol acetate",
            "Dehydroepiandrosterone sulfate",
            "Estrone sulfate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aldosterone (ALDO)",
                "Cortisol (CORT)",
                "Cortisone",
                "Betamethasone",
                "11-Deoxycortisol",
                "Progesterone (P4)"
            ],
            "mismatches": [
                "Androstenedione (AD)",
                "Androstenediol",
                "Androstenol",
                "Androstenone",
                "Allopregnanolone",
                "Cholesterol",
                "Dehydroepiandrosterone (DHEA)",
                "Estradiol (E2)",
                "Estrone (E1)",
                "Estriol",
                "Estrogen",
                "Pregnenolone",
                "Testosterone (T)",
                "17-Hydroxyprogesterone",
                "17-Hydroxypregnenolone",
                "Allobetahyroxysterol",
                "Cholecalciferol",
                "Cortisol acetate",
                "Dehydroepiandrosterone sulfate",
                "Estrone sulfate"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycortisol",
                "11alpha-Hydroxy-5beta-pregnane-3,20-dione",
                "15alpha-Hydroxyprogesterone",
                "15beta-hydroxycyproterone acetate",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "1alpha,2beta,25-trihydroxy vitamin D3",
                "21-Deoxycortisone",
                "24-Nor-5beta-chol-22-ene-3alpha,12alpha-diol",
                "24-Nor-5beta-chol-22-ene-3alpha,6alpha-diol",
                "24-Nor-5beta-chol-22-ene-3alpha,7alpha,12alpha-triol",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "C21-steroid hormone",
                "Cortisone acetate",
                "Delta(1)-dihydrotestosterone",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "aldosterone",
                "anabolic androgenic steroid",
                "betamethasone",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "dexamethasone",
                "flumethasone",
                "mineralocorticoid",
                "nandrolone",
                "norethandrolone",
                "norgesterone",
                "progesterone",
                "trenbolone"
            ],
            "TP": 6,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": [
            "Androstenedione (ASD)",
            "Androsterone (ADT)",
            "Aldosterone (ALD)",
            "Cortisol (CRT)",
            "Cortisone (CSN)",
            "Dihydrotestosterone (DHT)",
            "Estriol (E3)",
            "Estrone (E1)",
            "17\u03b2-Estradiol (E2)",
            "Estriol 16\u03b1-glucuronide (E3G)",
            "Estriol 3-glucuronide (E33G)",
            "17\u03b1-Hydroxyprogesterone (17\u03b1-OHP)",
            "11-Deoxycorticosterone (11-DOCS)",
            "11-Deoxycortisol (11-DOCSL)",
            "Dehydroepiandrosterone (DHEA)",
            "Dehydroepiandrosterone sulfate (DHEAS)",
            "Pregnenolone (P5)",
            "Pregnenolone sulfate (P5S)",
            "Progesterone (P4)",
            "17\u03b1-Hydroxypregnenolone (17\u03b1-OHP5)",
            "Testosterone (T)",
            "Triamcinolone (TRL)",
            "Triamcinolone acetonide (TRLA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aldosterone (ALD)",
                "Cortisol (CRT)",
                "Cortisone (CSN)",
                "11-Deoxycorticosterone (11-DOCS)",
                "11-Deoxycortisol (11-DOCSL)",
                "Progesterone (P4)",
                "Triamcinolone (TRL)",
                "Triamcinolone acetonide (TRLA)"
            ],
            "mismatches": [
                "Androstenedione (ASD)",
                "Androsterone (ADT)",
                "Dihydrotestosterone (DHT)",
                "Estriol (E3)",
                "Estrone (E1)",
                "17\u03b2-Estradiol (E2)",
                "Estriol 16\u03b1-glucuronide (E3G)",
                "Estriol 3-glucuronide (E33G)",
                "17\u03b1-Hydroxyprogesterone (17\u03b1-OHP)",
                "Dehydroepiandrosterone (DHEA)",
                "Dehydroepiandrosterone sulfate (DHEAS)",
                "Pregnenolone (P5)",
                "Pregnenolone sulfate (P5S)",
                "17\u03b1-Hydroxypregnenolone (17\u03b1-OHP5)",
                "Testosterone (T)"
            ],
            "true_referents": [
                "11-deoxycorticosterone",
                "11-deoxycorticosterone pivalate",
                "11-deoxycortisol",
                "15alpha-Hydroxyprogesterone",
                "15beta-hydroxycyproterone acetate",
                "16-Hydroxyprogesterone",
                "16-a-Hydroxypregnenolone",
                "17beta-hydroxy-17-methylestra-4,9,11-trien-3-one",
                "17beta-hydroxyandrostan-1,3-dione",
                "21-Deoxycortisone",
                "3alpha,17alpha-dihydroxy-5beta-pregnan-20-one",
                "3beta-Hydroxypregn-5-ene",
                "4-Chloroprogesterone",
                "5alpha-dihydrodeoxycorticosterone",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol",
                "5beta-Pregnane-3alpha,17alpha,20alpha-triol-11-one",
                "C21-steroid hormone",
                "Cortisone acetate",
                "Delta(1)-dihydrotestosterone",
                "Deoxycorticosterone acetate",
                "Ethyltestosterone",
                "Hydroxyprogesterone acetate",
                "Pregn-5-ene-3beta,20alpha-diol",
                "Pregn-5-ene-3beta,20beta-diol",
                "Triamcinolone hexacetonide",
                "aldosterone",
                "anabolic androgenic steroid",
                "corticosteroid hormone",
                "corticosterone",
                "cortisol",
                "cortisone",
                "estetrol",
                "mineralocorticoid",
                "norgesterone",
                "oxandrolone",
                "prednisolone steaglate",
                "progesterone",
                "stanozolol",
                "trenbolone",
                "triamcinolone",
                "triamcinolone acetonide"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "steroid hormone",
        "domain": "biology",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]