[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "methane (CH4)",
            "ethane (C2H6)",
            "propane (C3H8)",
            "butane (C4H10)",
            "pentane (C5H12)",
            "hexane (C6H14)",
            "heptane (C7H16)",
            "octane (C8H18)",
            "nonane (C9H20)",
            "decane (C10H22)",
            "undecane (C11H24)",
            "dodecane (C12H26)",
            "tridecane (C13H28)",
            "tetradecane (C14H30)",
            "pentadecane (C15H32)",
            "hexadecane (C16H34)",
            "heptadecane (C17H36)",
            "octadecane (C18H38)",
            "nonadecane (C19H40)",
            "eicosane (C20H42)",
            "docosane (C22H46)",
            "tetracosane (C24H50)",
            "hexacosane (C26H54)",
            "octacosane (C28H58)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane (CH4)",
                "ethane (C2H6)",
                "butane (C4H10)",
                "pentane (C5H12)",
                "hexane (C6H14)",
                "heptane (C7H16)",
                "octane (C8H18)",
                "nonane (C9H20)",
                "decane (C10H22)",
                "undecane (C11H24)",
                "dodecane (C12H26)",
                "tridecane (C13H28)",
                "tetradecane (C14H30)",
                "pentadecane (C15H32)",
                "hexadecane (C16H34)",
                "heptadecane (C17H36)",
                "octadecane (C18H38)",
                "nonadecane (C19H40)",
                "docosane (C22H46)",
                "tetracosane (C24H50)",
                "hexacosane (C26H54)",
                "octacosane (C28H58)"
            ],
            "mismatches": [
                "propane (C3H8)",
                "eicosane (C20H42)"
            ],
            "true_referents": [
                "1,2-octacosanediol",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexacosane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octacosane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tridecane",
                "undecane"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Eicosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Eicosane (C20H42)",
                "Heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Heptane (C\u2087H\u2081\u2086)",
            "Octane (C\u2088H\u2081\u2088)",
            "Nonane (C\u2089H\u2082\u2080)",
            "Decane (C\u2081\u2080H\u2082\u2082)",
            "Undecane (C\u2081\u2081H\u2082\u2084)",
            "Dodecane (C\u2081\u2082H\u2082\u2086)",
            "Tridecane (C\u2081\u2083H\u2082\u2088)",
            "Tetradecane (C\u2081\u2084H\u2083\u2080)",
            "Pentadecane (C\u2081\u2085H\u2083\u2082)",
            "Hexadecane (C\u2081\u2086H\u2083\u2084)",
            "Heptadecane (C\u2081\u2087H\u2083\u2086)",
            "Octadecane (C\u2081\u2088H\u2083\u2088)",
            "Nonadecane (C\u2081\u2089H\u2084\u2080)",
            "Eicosane (C\u2082\u2080H\u2084\u2082)",
            "Heneicosane (C\u2082\u2081H\u2084\u2084)",
            "Docosane (C\u2082\u2082H\u2084\u2086)",
            "Tricosane (C\u2082\u2083H\u2084\u2088)",
            "Tetracosane (C\u2082\u2084H\u2085\u2080)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH\u2084)",
                "Ethane (C\u2082H\u2086)",
                "Propane (C\u2083H\u2088)",
                "Butane (C\u2084H\u2081\u2080)",
                "Pentane (C\u2085H\u2081\u2082)",
                "Hexane (C\u2086H\u2081\u2084)",
                "Heptane (C\u2087H\u2081\u2086)",
                "Octane (C\u2088H\u2081\u2088)",
                "Nonane (C\u2089H\u2082\u2080)",
                "Decane (C\u2081\u2080H\u2082\u2082)",
                "Undecane (C\u2081\u2081H\u2082\u2084)",
                "Dodecane (C\u2081\u2082H\u2082\u2086)",
                "Tridecane (C\u2081\u2083H\u2082\u2088)",
                "Tetradecane (C\u2081\u2084H\u2083\u2080)",
                "Pentadecane (C\u2081\u2085H\u2083\u2082)",
                "Hexadecane (C\u2081\u2086H\u2083\u2084)",
                "Heptadecane (C\u2081\u2087H\u2083\u2086)",
                "Octadecane (C\u2081\u2088H\u2083\u2088)",
                "Nonadecane (C\u2081\u2089H\u2084\u2080)",
                "Docosane (C\u2082\u2082H\u2084\u2086)",
                "Tricosane (C\u2082\u2083H\u2084\u2088)",
                "Tetracosane (C\u2082\u2084H\u2085\u2080)"
            ],
            "mismatches": [
                "Eicosane (C\u2082\u2080H\u2084\u2082)",
                "Heneicosane (C\u2082\u2081H\u2084\u2084)"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,4,6,10-tetramethylpentadecane",
                "2,4-dimethylhexane",
                "2,5,9-Trimethyldecane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2,9-dimethyldecane",
                "2-Methylhenicosane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "6-ethyl-2-methyl-Octane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "N-(eicosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "propane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Isobutane (2-Methylpropane)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Isopentane (2-Methylbutane)",
            "Neopentane (2,2-Dimethylpropane)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Cyclohexane (C\u2086H\u2081\u2082)",
            "Heptane (C\u2087H\u2081\u2086)",
            "Octane (C\u2088H\u2081\u2088)",
            "Nonane (C\u2089H\u2082\u2080)",
            "Decane (C\u2081\u2080H\u2082\u2082)",
            "Dodecane (C\u2081\u2082H\u2082\u2086)",
            "Tetradecane (C\u2081\u2084H\u2083\u2080)",
            "Hexadecane (C\u2081\u2086H\u2083\u2084)",
            "Octadecane (C\u2081\u2088H\u2083\u2088) - Stearic Acid component",
            "Eicosane (C\u2082\u2080H\u2084\u2082)",
            "Docosane (C\u2082\u2082H\u2084\u2086)",
            "Tetracosane (C\u2082\u2084H\u2085\u2080)",
            "Petroleum Jelly (mixture containing various alkanes)",
            "Paraffin Wax (mixture containing various alkanes)",
            "Natural Gas (primarily methane, with some ethane and propane)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane",
                "ethane",
                "propane",
                "butane",
                "isobutane",
                "pentane",
                "isopentane",
                "neopentane",
                "hexane",
                "heptane",
                "octane",
                "nonane",
                "decane",
                "dodecane",
                "tetradecane",
                "hexadecane",
                "octadecane",
                "docosane",
                "tetracosane"
            ],
            "mismatches": [
                "cyclohexane",
                "eicosane",
                "petroleum jelly",
                "paraffin wax",
                "natural gas"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,4-dimethylhexane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-Methylpentane",
                "2-methylicosane",
                "3,3-dimethylhexane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methylpentane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "6-Methylheptadecane",
                "6-ethyl-2-methyl-Octane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "N-(eicosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "neopentane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentane",
                "propane",
                "tetracosane",
                "tetradecane"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Icosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Icosane (C20H42)",
                "Heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Eicosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Eicosane (C20H42)",
                "Heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Icosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Icosane (C20H42)",
                "Heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Eicosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Eicosane (C20H42)",
                "Heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH\u2084)",
            "Ethane (C\u2082H\u2086)",
            "Propane (C\u2083H\u2088)",
            "Butane (C\u2084H\u2081\u2080)",
            "Pentane (C\u2085H\u2081\u2082)",
            "Hexane (C\u2086H\u2081\u2084)",
            "Heptane (C\u2087H\u2081\u2086)",
            "Octane (C\u2088H\u2081\u2088)",
            "Nonane (C\u2089H\u2082\u2080)",
            "Decane (C\u2081\u2080H\u2082\u2082)",
            "Undecane (C\u2081\u2081H\u2082\u2084)",
            "Dodecane (C\u2081\u2082H\u2082\u2086)",
            "Tridecane (C\u2081\u2083H\u2082\u2088)",
            "Tetradecane (C\u2081\u2084H\u2083\u2080)",
            "Pentadecane (C\u2081\u2085H\u2083\u2082)",
            "Hexadecane (C\u2081\u2086H\u2083\u2084)",
            "Heptadecane (C\u2081\u2087H\u2083\u2086)",
            "Octadecane (C\u2081\u2088H\u2083\u2088)",
            "Nonadecane (C\u2081\u2089H\u2084\u2080)",
            "Eicosane (C\u2082\u2080H\u2084\u2082)",
            "Heneicosane (C\u2082\u2081H\u2084\u2084)",
            "Docosane (C\u2082\u2082H\u2084\u2086)",
            "Tricosane (C\u2082\u2083H\u2084\u2088)",
            "Tetracosane (C\u2082\u2084H\u2085\u2080)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH\u2084)",
                "Ethane (C\u2082H\u2086)",
                "Propane (C\u2083H\u2088)",
                "Butane (C\u2084H\u2081\u2080)",
                "Pentane (C\u2085H\u2081\u2082)",
                "Hexane (C\u2086H\u2081\u2084)",
                "Heptane (C\u2087H\u2081\u2086)",
                "Octane (C\u2088H\u2081\u2088)",
                "Nonane (C\u2089H\u2082\u2080)",
                "Decane (C\u2081\u2080H\u2082\u2082)",
                "Undecane (C\u2081\u2081H\u2082\u2084)",
                "Dodecane (C\u2081\u2082H\u2082\u2086)",
                "Tridecane (C\u2081\u2083H\u2082\u2088)",
                "Tetradecane (C\u2081\u2084H\u2083\u2080)",
                "Pentadecane (C\u2081\u2085H\u2083\u2082)",
                "Hexadecane (C\u2081\u2086H\u2083\u2084)",
                "Heptadecane (C\u2081\u2087H\u2083\u2086)",
                "Octadecane (C\u2081\u2088H\u2083\u2088)",
                "Nonadecane (C\u2081\u2089H\u2084\u2080)",
                "Docosane (C\u2082\u2082H\u2084\u2086)",
                "Tricosane (C\u2082\u2083H\u2084\u2088)",
                "Tetracosane (C\u2082\u2084H\u2085\u2080)"
            ],
            "mismatches": [
                "Eicosane (C\u2082\u2080H\u2084\u2082)",
                "Heneicosane (C\u2082\u2081H\u2084\u2084)"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,4,6,10-tetramethylpentadecane",
                "2,4-dimethylhexane",
                "2,5,9-Trimethyldecane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2,9-dimethyldecane",
                "2-Methylhenicosane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "6-ethyl-2-methyl-Octane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "N-(eicosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "propane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Icosane (C20H42)",
            "Isobutane",
            "Neopentane",
            "Cyclopropane",
            "Cyclohexane"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Icosane (C20H42)",
                "Isobutane",
                "Neopentane"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Cyclopropane",
                "Cyclohexane"
            ],
            "true_referents": [
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-methyl-Pentadecane",
                "3,3-dimethylhexane",
                "3,5-dimethyl-Heptane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "dodecane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isooctane",
                "isopentane",
                "methane",
                "neopentane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentadecane",
                "pentane",
                "tetradecane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (C1)",
            "Ethane (C2)",
            "Propane (C3)",
            "Butane (C4)",
            "Pentane (C5)",
            "Hexane (C6)",
            "Heptane (C7)",
            "Octane (C8)",
            "Nonane (C9)",
            "Decane (C10)",
            "Undecane (C11)",
            "Dodecane (C12)",
            "Tridecane (C13)",
            "Tetradecane (C14)",
            "Pentadecane (C15)",
            "Hexadecane (C16)",
            "Heptadecane (C17)",
            "Octadecane (C18)",
            "Nonadecane (C19)",
            "Icosane (C20)",
            "Henicosane (C21)",
            "Docosane (C22)",
            "Tricosane (C23)",
            "Tetracosane (C24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane",
                "ethane",
                "propane",
                "butane",
                "pentane",
                "hexane",
                "heptane",
                "octane",
                "nonane",
                "decane",
                "undecane",
                "dodecane",
                "tridecane",
                "tetradecane",
                "pentadecane",
                "hexadecane",
                "heptadecane",
                "octadecane",
                "nonadecane",
                "icosane",
                "henicosane",
                "docosane",
                "tricosane",
                "tetracosane"
            ],
            "mismatches": [],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,6-Dimethyldecane",
                "2-Methylhenicosane",
                "2-methyl-Hexadecane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3-ethyl-3-methyldecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "Dodecane, 4-methyl-",
                "N-(docosanoyl)eicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "propane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Icosane (C20H42)",
            "Henicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane (CH4)",
                "Ethane (C2H6)",
                "Butane (C4H10)",
                "Pentane (C5H12)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)",
                "Decane (C10H22)",
                "Undecane (C11H24)",
                "Dodecane (C12H26)",
                "Tridecane (C13H28)",
                "Tetradecane (C14H30)",
                "Pentadecane (C15H32)",
                "Hexadecane (C16H34)",
                "Heptadecane (C17H36)",
                "Octadecane (C18H38)",
                "Nonadecane (C19H40)",
                "Henicosane (C21H44)",
                "Docosane (C22H46)",
                "Tricosane (C23H48)",
                "Tetracosane (C24H50)"
            ],
            "mismatches": [
                "Propane (C3H8)",
                "Icosane (C20H42)"
            ],
            "true_referents": [
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Methane (CH4)",
            "Ethane (C2H6)",
            "Propane (C3H8)",
            "Butane (C4H10)",
            "Pentane (C5H12)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)",
            "Decane (C10H22)",
            "Undecane (C11H24)",
            "Dodecane (C12H26)",
            "Tridecane (C13H28)",
            "Tetradecane (C14H30)",
            "Pentadecane (C15H32)",
            "Hexadecane (C16H34)",
            "Heptadecane (C17H36)",
            "Octadecane (C18H38)",
            "Nonadecane (C19H40)",
            "Eicosane (C20H42)",
            "Heneicosane (C21H44)",
            "Docosane (C22H46)",
            "Tricosane (C23H48)",
            "Tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane",
                "Ethane",
                "Butane",
                "Pentane",
                "Hexane",
                "Heptane",
                "Octane",
                "Nonane",
                "Decane",
                "Undecane",
                "Dodecane",
                "Tridecane",
                "Tetradecane",
                "Pentadecane",
                "Hexadecane",
                "Heptadecane",
                "Octadecane",
                "Nonadecane",
                "Docosane",
                "Tricosane",
                "Tetracosane"
            ],
            "mismatches": [
                "Propane",
                "Eicosane",
                "Heneicosane"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "2-methylicosane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "Hexane (C6H14)",
            "Octane (C8H18)",
            "Decane (C10H22)",
            "Hexadecane (C16H34)",
            "Octadecane (C18H38)",
            "Tetradecane (C14H30)",
            "Dodecane (C12H26)",
            "Nonane (C9H20)",
            "Undecane (C11H24)",
            "Tridecane (C13H28)",
            "Pentadecane (C15H32)",
            "Heptadecane (C17H36)",
            "Nonacosane (C29H60)",
            "Dotriacontane (C32H66)",
            "Octacosane (C28H58)",
            "Tetratriacontane (C34H70)",
            "Hexatriacontane (C36H74)",
            "Octatriacontane (C38H78)",
            "Decatriacontane (C40H82)",
            "Hexatriacontane (C42H86)",
            "Triacontane (C30H62)",
            "Octadecene (C18H36)",
            "Decene (C10H20)",
            "Hexene (C6H12)",
            "Octene (C8H16)",
            "Decene (C12H24)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexane (C6H14)",
                "Octane (C8H18)",
                "Decane (C10H22)",
                "Hexadecane (C16H34)",
                "Octadecane (C18H38)",
                "Tetradecane (C14H30)",
                "Dodecane (C12H26)",
                "Nonane (C9H20)",
                "Undecane (C11H24)",
                "Tridecane (C13H28)",
                "Pentadecane (C15H32)",
                "Heptadecane (C17H36)",
                "Nonacosane (C29H60)",
                "Dotriacontane (C32H66)",
                "Octacosane (C28H58)",
                "Tetratriacontane (C34H70)",
                "Hexatriacontane (C36H74)",
                "Triacontane (C30H62)"
            ],
            "mismatches": [
                "Octatriacontane (C38H78)",
                "Decatriacontane (C40H82)",
                "Hexatriacontane (C42H86)",
                "Octadecene (C18H36)",
                "Decene (C10H20)",
                "Hexene (C6H12)",
                "Octene (C8H16)",
                "Decene (C12H24)"
            ],
            "true_referents": [
                "1,2-octacosanediol",
                "1,2-octadecanediol",
                "10,22-Dimethyldotriacontane",
                "13,17-Dimethyltritetracontane",
                "15-Methyltetratriacontane",
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,4-Trimethyltriacontane",
                "2,3,5,8-tetramethyldecane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethyl-Nonane",
                "2,6-dimethylheptadecane",
                "2-methyl-Pentadecane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3,7-dimethyl-Decane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "5,6-dimethyl-Decane",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "decane",
                "dodecane",
                "dotriacontane",
                "heptadecane",
                "hexadecane",
                "hexane",
                "hexatriacontane",
                "isobutane",
                "nonacosane",
                "nonane",
                "octacosane",
                "octadecane",
                "octane",
                "pentadecane",
                "tetradecane",
                "tetratriacontane",
                "triacontane",
                "tridecane",
                "tritriacontane",
                "undecane"
            ],
            "TP": 18,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            "methane (CH4)",
            "ethane (C2H6)",
            "propane (C3H8)",
            "butane (C4H10)",
            "pentane (C5H12)",
            "hexane (C6H14)",
            "heptane (C7H16)",
            "octane (C8H18)",
            "nonane (C9H20)",
            "decane (C10H22)",
            "undecane (C11H24)",
            "dodecane (C12H26)",
            "tridecane (C13H28)",
            "tetradecane (C14H30)",
            "pentadecane (C15H32)",
            "hexadecane (C16H34)",
            "heptadecane (C17H36)",
            "octadecane (C18H38)",
            "nonadecane (C19H40)",
            "icosane (C20H42)",
            "heneicosane (C21H44)",
            "docosane (C22H46)",
            "tricosane (C23H48)",
            "tetracosane (C24H50)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methane (CH4)",
                "ethane (C2H6)",
                "butane (C4H10)",
                "pentane (C5H12)",
                "hexane (C6H14)",
                "heptane (C7H16)",
                "octane (C8H18)",
                "nonane (C9H20)",
                "decane (C10H22)",
                "undecane (C11H24)",
                "dodecane (C12H26)",
                "tridecane (C13H28)",
                "tetradecane (C14H30)",
                "pentadecane (C15H32)",
                "hexadecane (C16H34)",
                "heptadecane (C17H36)",
                "octadecane (C18H38)",
                "nonadecane (C19H40)",
                "docosane (C22H46)",
                "tricosane (C23H48)",
                "tetracosane (C24H50)"
            ],
            "mismatches": [
                "propane (C3H8)",
                "icosane (C20H42)",
                "heneicosane (C21H44)"
            ],
            "true_referents": [
                "2,2,4,4,6,8,8-heptamethylnonane",
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,4-dimethylhexane",
                "2,6,8-trimethyldecane",
                "2,6-Dimethyldecane",
                "2,6-dimethylheptadecane",
                "2-Methylhenicosane",
                "2-methyl-Pentadecane",
                "3,6-dimethyl-Decane",
                "3,6-dimethyl-Octane",
                "3-ethylpentane",
                "3-methyl-Tridecane",
                "3-methyl-pentadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "5,6-dimethyl-Decane",
                "9-Methylnonadecane",
                "N-(docosanoyl)-4-hydroxyeicosasphinganine",
                "Nonane, 2,5-dimethyl-",
                "Nonane, 4-methyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "butane",
                "decane",
                "docosane",
                "dodecane",
                "ethane",
                "henicosane",
                "heptacosane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "icosane",
                "isobutane",
                "isopentane",
                "methane",
                "nonadecane",
                "nonane",
                "octadecane",
                "octane",
                "pentacosane",
                "pentadecane",
                "pentane",
                "tetracosane",
                "tetradecane",
                "tricosane",
                "tridecane",
                "undecane"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "alkane",
        "domain": "chemistry",
        "response": [
            {
                "referent": "Methane",
                "identifier": "CH4"
            },
            {
                "referent": "Ethane",
                "identifier": "C2H6"
            },
            {
                "referent": "Propane",
                "identifier": "C3H8"
            },
            {
                "referent": "Butane",
                "identifier": "C4H10"
            },
            {
                "referent": "Pentane",
                "identifier": "C5H12"
            },
            {
                "referent": "Hexane",
                "identifier": "C6H14"
            },
            {
                "referent": "Heptane",
                "identifier": "C7H16"
            },
            {
                "referent": "Octane",
                "identifier": "C8H18"
            },
            {
                "referent": "Nonane",
                "identifier": "C9H20"
            },
            {
                "referent": "Decane",
                "identifier": "C10H22"
            },
            {
                "referent": "Undecane",
                "identifier": "C11H24"
            },
            {
                "referent": "Dodecane",
                "identifier": "C12H26"
            },
            {
                "referent": "Tridecane",
                "identifier": "C13H28"
            },
            {
                "referent": "Tetradecane",
                "identifier": "C14H30"
            },
            {
                "referent": "Pentadecane",
                "identifier": "C15H32"
            },
            {
                "referent": "Hexadecane",
                "identifier": "C16H34"
            },
            {
                "referent": "Heptadecane",
                "identifier": "C17H36"
            },
            {
                "referent": "Octadecane",
                "identifier": "C18H38"
            },
            {
                "referent": "Nonadecane",
                "identifier": "C19H40"
            },
            {
                "referent": "Eicosane",
                "identifier": "C20H42"
            },
            {
                "referent": "Heneicosane",
                "identifier": "C21H44"
            },
            {
                "referent": "Docosane",
                "identifier": "C22H46"
            },
            {
                "referent": "Tricosane",
                "identifier": "C23H48"
            },
            {
                "referent": "Tetracosane",
                "identifier": "C24H50"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methane",
                "Ethane",
                "Propane",
                "Hexane",
                "Heptane",
                "Octane",
                "Undecane",
                "Dodecane",
                "Tetradecane",
                "Pentadecane",
                "Hexadecane",
                "Heptadecane",
                "Octadecane",
                "Tricosane"
            ],
            "mismatches": [
                "Butane",
                "Pentane",
                "Nonane",
                "Decane",
                "Tridecane",
                "Nonadecane",
                "Eicosane",
                "Heneicosane",
                "Docosane",
                "Tetracosane"
            ],
            "true_referents": [
                "2,3,5,8-tetramethyldecane",
                "2,3-dimethylpentane",
                "2,6-Dimethyldecane",
                "3,6-dimethyl-Octane",
                "3-ethyl-3-methyldecane",
                "3-ethyl-5-(2-ethylbutyl)-octadecane",
                "4,6-dimethyldodecane",
                "4-methyl-Tetradecane",
                "Hexane, 2,2,5-trimethyl-",
                "Undecane, 2,9-dimethyl-",
                "Undecane, 2-methyl-",
                "dodecane",
                "ethane",
                "heptadecane",
                "heptane",
                "hexadecane",
                "hexane",
                "isobutane",
                "methane",
                "octadecane",
                "octane",
                "pentadecane",
                "pentane",
                "propane",
                "tetradecane",
                "tricosane",
                "undecane"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    }
]