[
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "stanozolol",
            "boldenone",
            "bolandiol dipropionate",
            "oxymetholone",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "stanozolol",
                "boldenone",
                "bolandiol dipropionate",
                "oxymetholone",
                "oxandrolone"
            ],
            "mismatches": [
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 7,
            "FP": 1,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol dipropionate",
            "bolandiol",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol dipropionate",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "metholone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 6,
            "FP": 8,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 4,
            "FP": 0,
            "FN": 4
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone",
            "bolandiol",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [
                "fluoxymesterone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone",
                "bolandiol"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 6,
            "FP": 6,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "Medrysone",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "Medrysone",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 3,
            "FP": 0,
            "FN": 5
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "metholone",
            "prednisolone valerate acetate",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 8,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol dipropionate",
            "bolandiol",
            "oxandrolone",
            "LSM-4981",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol dipropionate",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [
                "LSM-4981",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 4,
            "FP": 0,
            "FN": 4
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "mismatches": [
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 6,
            "FP": 1,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "prednisolone valerate acetate",
            "bolandiol dipropionate",
            "bolandiol",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "bolandiol dipropionate",
                "bolandiol",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 6,
            "FP": 8,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(5alpha,25S)-3-oxocholestan-26-oic acid",
            "(5alpha)-3-oxocholestan-26-oic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "(5alpha)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "Medrysone",
            "prednisolone valerate acetate",
            "oxymetholone",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Medrysone",
                "prednisolone valerate acetate"
            ],
            "mismatches": [
                "fluoxymesterone",
                "oxymetholone",
                "oxandrolone"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 2,
            "FP": 3,
            "FN": 6
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "prednisolone valerate acetate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "oxandrolone",
            "19-Nor-5alpha-pregnane-3,20-dione"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione"
            ],
            "mismatches": [
                "fluoxymesterone",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "oxandrolone"
            ],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 5,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "dafachronic acids",
        "domain": "biology",
        "response": [
            "(25S)-Delta(7)-dafachronic acid",
            "Delta(4)-dafachronic acid",
            "(25R)-Delta(4)-dafachronic acid",
            "Delta(1),Delta(7)-dafachronic acid",
            "Delta(7)-dafachronic acid",
            "(25S)-Delta(4)-dafachronic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(4)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25S)-Delta(4)-dafachronic acid"
            ],
            "mismatches": [],
            "true_referents": [
                "(25S)-Delta(4)-dafachronic acid",
                "(25S)-Delta(7)-dafachronic acid",
                "Delta(7)-dafachronic acid",
                "(25R)-Delta(4)-dafachronic acid",
                "Delta(1),Delta(7)-dafachronic acid",
                "(5alpha,25S)-3-oxocholestan-26-oic acid",
                "Delta(4)-dafachronic acid",
                "(5alpha)-3-oxocholestan-26-oic acid"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "anabolic androgenic steroid",
        "domain": "biology",
        "response": [
            "fluoxymesterone",
            "metholone",
            "bolandiol dipropionate",
            "oxymetholone",
            "stanozolol",
            "boldenone",
            "bolandiol",
            "oxandrolone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "fluoxymesterone",
                "metholone",
                "bolandiol dipropionate",
                "oxymetholone",
                "stanozolol",
                "boldenone",
                "bolandiol",
                "oxandrolone"
            ],
            "mismatches": [],
            "true_referents": [
                "fluoxymesterone",
                "bolandiol dipropionate",
                "oxymetholone",
                "metholone",
                "boldenone",
                "bolandiol",
                "oxandrolone",
                "stanozolol"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "corticosteroid hormone",
        "domain": "biology",
        "response": [
            "6alpha-hydroxyprednisone",
            "5a-Tetrahydrocortisol",
            "Medrysone",
            "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
            "prednisolone valerate acetate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "6alpha-hydroxyprednisone",
                "5a-Tetrahydrocortisol",
                "Medrysone",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "prednisolone valerate acetate"
            ],
            "mismatches": [],
            "true_referents": [
                "Medrysone",
                "prednisolone valerate acetate",
                "19-Nor-5alpha-pregnane-3,20-dione",
                "5a-Tetrahydrocortisol",
                "HDOPA",
                "9-Fluoro-11beta-hydroxy-16alpha-methylpregn-4-ene-3,20-dione",
                "6alpha-hydroxyprednisone",
                "LSM-4981"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    }
]