[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "acetylene (C2H2)",
            "propyne (C3H4)",
            "1-butyne (C4H6)",
            "2-butyne (C4H6)",
            "1-pentyne (C5H8)",
            "2-pentyne (C5H8)",
            "3-pentyne (C5H8)",
            "1-hexyne (C6H10)",
            "2-hexyne (C6H10)",
            "3-hexyne (C6H10)",
            "1-heptyne (C7H12)",
            "2-heptyne (C7H12)",
            "3-heptyne (C7H12)",
            "1-octyne (C8H14)",
            "2-octyne (C8H14)",
            "3-octyne (C8H14)",
            "1-nonyne (C9H16)",
            "2-nonyne (C9H16)",
            "3-nonyne (C9H16)",
            "1-decyne (C10H18)",
            "2-decyne (C10H18)",
            "3-decyne (C10H18)",
            "1-undecyne (C11H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetylene (C2H2)",
                "propyne (C3H4)",
                "1-butyne (C4H6)",
                "2-butyne (C4H6)",
                "1-pentyne (C5H8)",
                "2-pentyne (C5H8)",
                "3-pentyne (C5H8)",
                "1-hexyne (C6H10)",
                "2-hexyne (C6H10)",
                "3-hexyne (C6H10)",
                "1-heptyne (C7H12)",
                "2-heptyne (C7H12)",
                "3-heptyne (C7H12)",
                "1-octyne (C8H14)",
                "2-octyne (C8H14)",
                "3-octyne (C8H14)",
                "1-nonyne (C9H16)",
                "2-nonyne (C9H16)",
                "3-nonyne (C9H16)",
                "1-decyne (C10H18)",
                "2-decyne (C10H18)",
                "3-decyne (C10H18)",
                "1-undecyne (C11H20)"
            ],
            "mismatches": [],
            "true_referents": [
                "1-butyne (C4H6)",
                "1-decyne (C10H18)",
                "1-heptyne (C7H12)",
                "1-hexyne (C6H10)",
                "1-nonyne (C9H16)",
                "1-octyne (C8H14)",
                "1-pentyne (C5H8)",
                "1-undecyne (C11H20)",
                "2-butyne (C4H6)",
                "2-decyne (C10H18)",
                "2-heptyne (C7H12)",
                "2-hexyne (C6H10)",
                "2-nonyne (C9H16)",
                "2-octyne (C8H14)",
                "2-pentyne (C5H8)",
                "3-decyne (C10H18)",
                "3-heptyne (C7H12)",
                "3-hexyne (C6H10)",
                "3-nonyne (C9H16)",
                "3-octyne (C8H14)",
                "3-pentyne (C5H8)",
                "acetylene (C2H2)",
                "propyne (C3H4)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (Acetylene)",
            "Propyne",
            "1-Butyne",
            "2-Butyne",
            "1-Pentyne",
            "2-Pentyne",
            "3-Pentyne",
            "1-Hexyne",
            "2-Hexyne",
            "3-Hexyne",
            "1-Heptyne",
            "2-Heptyne",
            "3-Heptyne",
            "4-Heptyne",
            "1-Octyne",
            "2-Octyne",
            "3-Octyne",
            "4-Octyne",
            "5-Octyne",
            "1-Nonyne",
            "2-Nonyne",
            "3-Nonyne",
            "4-Nonyne",
            "5-Nonyne",
            "1-Decyne"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (Acetylene)",
                "Propyne",
                "1-Butyne",
                "2-Butyne",
                "1-Pentyne",
                "2-Pentyne",
                "3-Pentyne",
                "1-Hexyne",
                "2-Hexyne",
                "3-Hexyne",
                "1-Heptyne",
                "2-Heptyne",
                "3-Heptyne",
                "4-Heptyne",
                "1-Octyne",
                "2-Octyne",
                "3-Octyne",
                "4-Octyne",
                "5-Octyne",
                "1-Nonyne",
                "2-Nonyne",
                "3-Nonyne",
                "4-Nonyne",
                "5-Nonyne"
            ],
            "mismatches": [
                "1-Decyne"
            ],
            "true_referents": [
                "1-Butyne",
                "1-Heptyne",
                "1-Hexyne",
                "1-Nonyne",
                "1-Octyne",
                "1-Pentyne",
                "2-Butyne",
                "2-Heptyne",
                "2-Hexyne",
                "2-Nonyne",
                "2-Octyne",
                "2-Pentyne",
                "3-Heptyne",
                "3-Hexyne",
                "3-Nonyne",
                "3-Octyne",
                "3-Pentyne",
                "4-Heptyne",
                "4-Nonyne",
                "4-Octyne",
                "5-Nonyne",
                "5-Octyne",
                "Ethyne (Acetylene)",
                "Propyne"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (Acetylene)",
            "Propyne",
            "1-Butyne",
            "2-Butyne",
            "1-Pentyne",
            "2-Pentyne",
            "1-Hexyne",
            "2-Hexyne",
            "3-Hexyne",
            "1-Heptyne",
            "2-Heptyne",
            "3-Heptyne",
            "1-Octyne",
            "2-Octyne",
            "3-Octyne",
            "4-Octyne",
            "1-Nonyne",
            "2-Nonyne",
            "3-Nonyne",
            "4-Nonyne",
            "1-Decyne",
            "2-Decyne",
            "3-Decyne",
            "4-Decyne"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (Acetylene)",
                "Propyne",
                "1-Butyne",
                "2-Butyne",
                "1-Pentyne",
                "2-Pentyne",
                "1-Hexyne",
                "2-Hexyne",
                "3-Hexyne",
                "1-Heptyne",
                "2-Heptyne",
                "3-Heptyne",
                "1-Octyne",
                "2-Octyne",
                "3-Octyne",
                "4-Octyne",
                "1-Nonyne",
                "2-Nonyne",
                "3-Nonyne",
                "4-Nonyne",
                "1-Decyne",
                "2-Decyne",
                "3-Decyne",
                "4-Decyne"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne",
                "1-Decyne",
                "1-Heptyne",
                "1-Hexyne",
                "1-Nonyne",
                "1-Octyne",
                "1-Pentyne",
                "2-Butyne",
                "2-Decyne",
                "2-Heptyne",
                "2-Hexyne",
                "2-Nonyne",
                "2-Octyne",
                "2-Pentyne",
                "3-Decyne",
                "3-Heptyne",
                "3-Hexyne",
                "3-Nonyne",
                "3-Octyne",
                "4-Decyne",
                "4-Nonyne",
                "4-Octyne",
                "Ethyne (Acetylene)",
                "Propyne"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (C2H2)",
            "Propyne (C3H4)",
            "1-Butyne (C4H6)",
            "2-Butyne (C4H6)",
            "1-Pentyne (C5H8)",
            "2-Pentyne (C5H8)",
            "3-Pentyne (C5H8)",
            "1-Hexyne (C6H10)",
            "2-Hexyne (C6H10)",
            "3-Hexyne (C6H10)",
            "1-Heptyne (C7H12)",
            "2-Heptyne (C7H12)",
            "3-Heptyne (C7H12)",
            "1-Octyne (C8H14)",
            "2-Octyne (C8H14)",
            "3-Octyne (C8H14)",
            "1-Nonyne (C9H16)",
            "2-Nonyne (C9H16)",
            "3-Nonyne (C9H16)",
            "1-Decyne (C10H18)",
            "2-Decyne (C10H18)",
            "3-Decyne (C10H18)",
            "1-Undecyne (C11H20)",
            "1-Dodecyne (C12H22)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (C2H2)",
                "Propyne (C3H4)",
                "1-Butyne (C4H6)",
                "2-Butyne (C4H6)",
                "1-Pentyne (C5H8)",
                "2-Pentyne (C5H8)",
                "3-Pentyne (C5H8)",
                "1-Hexyne (C6H10)",
                "2-Hexyne (C6H10)",
                "3-Hexyne (C6H10)",
                "1-Heptyne (C7H12)",
                "2-Heptyne (C7H12)",
                "3-Heptyne (C7H12)",
                "1-Octyne (C8H14)",
                "2-Octyne (C8H14)",
                "3-Octyne (C8H14)",
                "1-Nonyne (C9H16)",
                "2-Nonyne (C9H16)",
                "3-Nonyne (C9H16)",
                "1-Decyne (C10H18)",
                "2-Decyne (C10H18)",
                "3-Decyne (C10H18)",
                "1-Undecyne (C11H20)",
                "1-Dodecyne (C12H22)"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne (C4H6)",
                "1-Decyne (C10H18)",
                "1-Dodecyne (C12H22)",
                "1-Heptyne (C7H12)",
                "1-Hexyne (C6H10)",
                "1-Nonyne (C9H16)",
                "1-Octyne (C8H14)",
                "1-Pentyne (C5H8)",
                "1-Undecyne (C11H20)",
                "2-Butyne (C4H6)",
                "2-Decyne (C10H18)",
                "2-Heptyne (C7H12)",
                "2-Hexyne (C6H10)",
                "2-Nonyne (C9H16)",
                "2-Octyne (C8H14)",
                "2-Pentyne (C5H8)",
                "3-Decyne (C10H18)",
                "3-Heptyne (C7H12)",
                "3-Hexyne (C6H10)",
                "3-Nonyne (C9H16)",
                "3-Octyne (C8H14)",
                "3-Pentyne (C5H8)",
                "Ethyne (C2H2)",
                "Propyne (C3H4)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (C2H2)",
            "Propyne (C3H4)",
            "1-Butyne (C4H6)",
            "2-Butyne (C4H6)",
            "1-Pentyne (C5H8)",
            "2-Pentyne (C5H8)",
            "1-Hexyne (C6H10)",
            "2-Hexyne (C6H10)",
            "3-Hexyne (C6H10)",
            "1-Heptyne (C7H12)",
            "2-Heptyne (C7H12)",
            "3-Heptyne (C7H12)",
            "1-Octyne (C8H14)",
            "2-Octyne (C8H14)",
            "3-Octyne (C8H14)",
            "4-Octyne (C8H14)",
            "1-Nonyne (C9H16)",
            "2-Nonyne (C9H16)",
            "3-Nonyne (C9H16)",
            "4-Nonyne (C9H16)",
            "1-Decyne (C10H18)",
            "2-Decyne (C10H18)",
            "3-Decyne (C10H18)",
            "4-Decyne (C10H18)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (C2H2)",
                "Propyne (C3H4)",
                "1-Butyne (C4H6)",
                "2-Butyne (C4H6)",
                "1-Pentyne (C5H8)",
                "2-Pentyne (C5H8)",
                "1-Hexyne (C6H10)",
                "2-Hexyne (C6H10)",
                "3-Hexyne (C6H10)",
                "1-Heptyne (C7H12)",
                "2-Heptyne (C7H12)",
                "3-Heptyne (C7H12)",
                "1-Octyne (C8H14)",
                "2-Octyne (C8H14)",
                "3-Octyne (C8H14)",
                "4-Octyne (C8H14)",
                "1-Nonyne (C9H16)",
                "2-Nonyne (C9H16)",
                "3-Nonyne (C9H16)",
                "4-Nonyne (C9H16)",
                "1-Decyne (C10H18)",
                "2-Decyne (C10H18)",
                "3-Decyne (C10H18)",
                "4-Decyne (C10H18)"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne (C4H6)",
                "1-Decyne (C10H18)",
                "1-Heptyne (C7H12)",
                "1-Hexyne (C6H10)",
                "1-Nonyne (C9H16)",
                "1-Octyne (C8H14)",
                "1-Pentyne (C5H8)",
                "2-Butyne (C4H6)",
                "2-Decyne (C10H18)",
                "2-Heptyne (C7H12)",
                "2-Hexyne (C6H10)",
                "2-Nonyne (C9H16)",
                "2-Octyne (C8H14)",
                "2-Pentyne (C5H8)",
                "3-Decyne (C10H18)",
                "3-Heptyne (C7H12)",
                "3-Hexyne (C6H10)",
                "3-Nonyne (C9H16)",
                "3-Octyne (C8H14)",
                "4-Decyne (C10H18)",
                "4-Nonyne (C9H16)",
                "4-Octyne (C8H14)",
                "Ethyne (C2H2)",
                "Propyne (C3H4)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Propyne (C3H4)",
            "Butyne-1 (C4H6)",
            "Butyne-2 (C4H6)",
            "Pentyne-1 (C5H8)",
            "Pentyne-2 (C5H8)",
            "Hexyne-1 (C6H10)",
            "Hexyne-2 (C6H10)",
            "Heptyne-1 (C7H12)",
            "Heptyne-2 (C7H12)",
            "Octyne-1 (C8H14)",
            "Octyne-2 (C8H14)",
            "Nonyne-1 (C9H16)",
            "Nonyne-2 (C9H16)",
            "Decyne-1 (C10H18)",
            "Decyne-2 (C10H18)",
            "Undecyne-1 (C11H20)",
            "Undecyne-2 (C11H20)",
            "Dodecyne-1 (C12H22)",
            "Dodecyne-2 (C12H22)",
            "Tridecyne-1 (C13H24)",
            "Tridecyne-2 (C13H24)",
            "Tetradecyne-1 (C14H26)",
            "Tetradecyne-2 (C14H26)",
            "Pentadecyne-1 (C15H28)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Propyne (C3H4)",
                "Butyne-1 (C4H6)",
                "Butyne-2 (C4H6)",
                "Pentyne-1 (C5H8)",
                "Pentyne-2 (C5H8)",
                "Hexyne-1 (C6H10)",
                "Hexyne-2 (C6H10)",
                "Heptyne-1 (C7H12)",
                "Heptyne-2 (C7H12)",
                "Octyne-1 (C8H14)",
                "Octyne-2 (C8H14)",
                "Nonyne-1 (C9H16)",
                "Nonyne-2 (C9H16)",
                "Decyne-1 (C10H18)",
                "Decyne-2 (C10H18)",
                "Undecyne-1 (C11H20)",
                "Undecyne-2 (C11H20)",
                "Dodecyne-1 (C12H22)",
                "Dodecyne-2 (C12H22)",
                "Tridecyne-1 (C13H24)",
                "Tridecyne-2 (C13H24)",
                "Tetradecyne-1 (C14H26)",
                "Tetradecyne-2 (C14H26)"
            ],
            "mismatches": [
                "Pentadecyne-1 (C15H28)"
            ],
            "true_referents": [
                "Acetylene (C2H2)",
                "Butyne-1 (C4H6)",
                "Butyne-2 (C4H6)",
                "Decyne-1 (C10H18)",
                "Decyne-2 (C10H18)",
                "Dodecyne-1 (C12H22)",
                "Dodecyne-2 (C12H22)",
                "Heptyne-1 (C7H12)",
                "Heptyne-2 (C7H12)",
                "Hexyne-1 (C6H10)",
                "Hexyne-2 (C6H10)",
                "Nonyne-1 (C9H16)",
                "Nonyne-2 (C9H16)",
                "Octyne-1 (C8H14)",
                "Octyne-2 (C8H14)",
                "Pentyne-1 (C5H8)",
                "Pentyne-2 (C5H8)",
                "Propyne (C3H4)",
                "Tetradecyne-1 (C14H26)",
                "Tetradecyne-2 (C14H26)",
                "Tridecyne-1 (C13H24)",
                "Tridecyne-2 (C13H24)",
                "Undecyne-1 (C11H20)",
                "Undecyne-2 (C11H20)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (C2H2)",
            "Propyne (C3H4)",
            "But-1-yne (C4H6)",
            "Pent-1-yne (C5H8)",
            "Hex-1-yne (C6H10)",
            "Hept-1-yne (C7H12)",
            "Oct-1-yne (C8H14)",
            "Non-1-yne (C9H16)",
            "Dec-1-yne (C10H18)",
            "Undec-1-yne (C11H20)",
            "Dodec-1-yne (C12H22)",
            "Tridec-1-yne (C13H24)",
            "Tetradec-1-yne (C14H26)",
            "Pentadec-1-yne (C15H28)",
            "Hexadec-1-yne (C16H30)",
            "Heptadec-1-yne (C17H32)",
            "Octadec-1-yne (C18H34)",
            "Nonadec-1-yne (C19H36)",
            "Icos-1-yne (C20H38)",
            "Heneicos-1-yne (C21H40)",
            "Docos-1-yne (C22H42)",
            "Tricos-1-yne (C23H44)",
            "Tetracos-1-yne (C24H46)",
            "Pentacos-1-yne (C25H48)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (C2H2)",
                "Propyne (C3H4)",
                "But-1-yne (C4H6)",
                "Pent-1-yne (C5H8)",
                "Hex-1-yne (C6H10)",
                "Hept-1-yne (C7H12)",
                "Oct-1-yne (C8H14)",
                "Non-1-yne (C9H16)",
                "Dec-1-yne (C10H18)",
                "Undec-1-yne (C11H20)",
                "Dodec-1-yne (C12H22)",
                "Tridec-1-yne (C13H24)",
                "Tetradec-1-yne (C14H26)",
                "Pentadec-1-yne (C15H28)",
                "Hexadec-1-yne (C16H30)",
                "Heptadec-1-yne (C17H32)",
                "Octadec-1-yne (C18H34)",
                "Nonadec-1-yne (C19H36)",
                "Icos-1-yne (C20H38)",
                "Heneicos-1-yne (C21H40)",
                "Docos-1-yne (C22H42)",
                "Tricos-1-yne (C23H44)",
                "Tetracos-1-yne (C24H46)",
                "Pentacos-1-yne (C25H48)"
            ],
            "mismatches": [],
            "true_referents": [
                "But-1-yne (C4H6)",
                "Dec-1-yne (C10H18)",
                "Docos-1-yne (C22H42)",
                "Dodec-1-yne (C12H22)",
                "Ethyne (C2H2)",
                "Heneicos-1-yne (C21H40)",
                "Hept-1-yne (C7H12)",
                "Heptadec-1-yne (C17H32)",
                "Hex-1-yne (C6H10)",
                "Hexadec-1-yne (C16H30)",
                "Icos-1-yne (C20H38)",
                "Non-1-yne (C9H16)",
                "Nonadec-1-yne (C19H36)",
                "Oct-1-yne (C8H14)",
                "Octadec-1-yne (C18H34)",
                "Pent-1-yne (C5H8)",
                "Pentacos-1-yne (C25H48)",
                "Pentadec-1-yne (C15H28)",
                "Propyne (C3H4)",
                "Tetracos-1-yne (C24H46)",
                "Tetradec-1-yne (C14H26)",
                "Tricos-1-yne (C23H44)",
                "Tridec-1-yne (C13H24)",
                "Undec-1-yne (C11H20)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Propyne (C3H4)",
            "Butyne (C4H6)",
            "Pentyne (C5H8)",
            "Hexyne (C6H10)",
            "Heptyne (C7H12)",
            "Octyne (C8H14)",
            "Nonyne (C9H16)",
            "Decyne (C10H18)",
            "Undecyne (C11H20)",
            "Dodecyne (C12H22)",
            "Tridecyne (C13H24)",
            "Tetradecyne (C14H26)",
            "Pentadecyne (C15H28)",
            "Hexadecyne (C16H30)",
            "Heptadecyne (C17H32)",
            "Octadecyne (C18H34)",
            "Nonadecyne (C19H36)",
            "Icosyne (C20H38)",
            "Heneicosyne (C21H40)",
            "Docosyne (C22H42)",
            "Tricosyne (C23H44)",
            "Tetracosyne (C24H46)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Propyne (C3H4)",
                "Butyne (C4H6)",
                "Pentyne (C5H8)",
                "Hexyne (C6H10)",
                "Heptyne (C7H12)",
                "Octyne (C8H14)",
                "Nonyne (C9H16)",
                "Decyne (C10H18)",
                "Undecyne (C11H20)",
                "Dodecyne (C12H22)",
                "Tridecyne (C13H24)",
                "Tetradecyne (C14H26)",
                "Pentadecyne (C15H28)",
                "Hexadecyne (C16H30)",
                "Heptadecyne (C17H32)",
                "Octadecyne (C18H34)",
                "Nonadecyne (C19H36)",
                "Icosyne (C20H38)",
                "Heneicosyne (C21H40)",
                "Docosyne (C22H42)",
                "Tricosyne (C23H44)",
                "Tetracosyne (C24H46)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C2H2)",
                "Butyne (C4H6)",
                "Decyne (C10H18)",
                "Docosyne (C22H42)",
                "Dodecyne (C12H22)",
                "Heneicosyne (C21H40)",
                "Heptadecyne (C17H32)",
                "Heptyne (C7H12)",
                "Hexadecyne (C16H30)",
                "Hexyne (C6H10)",
                "Icosyne (C20H38)",
                "Nonadecyne (C19H36)",
                "Nonyne (C9H16)",
                "Octadecyne (C18H34)",
                "Octyne (C8H14)",
                "Pentadecyne (C15H28)",
                "Pentyne (C5H8)",
                "Propyne (C3H4)",
                "Tetracosyne (C24H46)",
                "Tetradecyne (C14H26)",
                "Tricosyne (C23H44)",
                "Tridecyne (C13H24)",
                "Undecyne (C11H20)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (Acetylene)",
            "Propyne",
            "1-Butyne",
            "2-Butyne",
            "1-Pentyne",
            "2-Pentyne",
            "1-Hexyne",
            "2-Hexyne",
            "3-Hexyne",
            "1-Heptyne",
            "2-Heptyne",
            "3-Heptyne",
            "4-Heptyne",
            "1-Octyne",
            "2-Octyne",
            "3-Octyne",
            "4-Octyne",
            "5-Octyne",
            "1-Nonyne",
            "2-Nonyne",
            "3-Nonyne",
            "4-Nonyne",
            "5-Nonyne",
            "6-Nonyne",
            "1-Decyne"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (Acetylene)",
                "Propyne",
                "1-Butyne",
                "2-Butyne",
                "1-Pentyne",
                "2-Pentyne",
                "1-Hexyne",
                "2-Hexyne",
                "3-Hexyne",
                "1-Heptyne",
                "2-Heptyne",
                "3-Heptyne",
                "4-Heptyne",
                "1-Octyne",
                "2-Octyne",
                "3-Octyne",
                "4-Octyne",
                "5-Octyne",
                "1-Nonyne",
                "2-Nonyne",
                "3-Nonyne",
                "4-Nonyne",
                "5-Nonyne",
                "6-Nonyne"
            ],
            "mismatches": [
                "1-Decyne"
            ],
            "true_referents": [
                "1-Butyne",
                "1-Heptyne",
                "1-Hexyne",
                "1-Nonyne",
                "1-Octyne",
                "1-Pentyne",
                "2-Butyne",
                "2-Heptyne",
                "2-Hexyne",
                "2-Nonyne",
                "2-Octyne",
                "2-Pentyne",
                "3-Heptyne",
                "3-Hexyne",
                "3-Nonyne",
                "3-Octyne",
                "4-Heptyne",
                "4-Nonyne",
                "4-Octyne",
                "5-Nonyne",
                "5-Octyne",
                "6-Nonyne",
                "Ethyne (Acetylene)",
                "Propyne"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Propyne (C3H4)",
            "1-Butyne (C4H6)",
            "2-Butyne (C4H6)",
            "1-Pentyne (C5H8)",
            "2-Pentyne (C5H8)",
            "3-Methyl-1-butyne (C5H8)",
            "1-Hexyne (C6H10)",
            "2-Hexyne (C6H10)",
            "3-Hexyne (C6H10)",
            "3-Methyl-1-pentyne (C6H10)",
            "4-Methyl-1-pentyne (C6H10)",
            "3,3-Dimethyl-1-butyne (C6H10)",
            "1-Heptyne (C7H12)",
            "2-Heptyne (C7H12)",
            "3-Heptyne (C7H12)",
            "1-Octyne (C8H14)",
            "2-Octyne (C8H14)",
            "3-Octyne (C8H14)",
            "4-Octyne (C8H14)",
            "1-Nonyne (C9H16)",
            "1-Decyne (C10H18)",
            "1-Undecyne (C11H20)",
            "1-Dodecyne (C12H22)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Propyne (C3H4)",
                "1-Butyne (C4H6)",
                "2-Butyne (C4H6)",
                "1-Pentyne (C5H8)",
                "2-Pentyne (C5H8)",
                "3-Methyl-1-butyne (C5H8)",
                "1-Hexyne (C6H10)",
                "2-Hexyne (C6H10)",
                "3-Hexyne (C6H10)",
                "3-Methyl-1-pentyne (C6H10)",
                "4-Methyl-1-pentyne (C6H10)",
                "3,3-Dimethyl-1-butyne (C6H10)",
                "1-Heptyne (C7H12)",
                "2-Heptyne (C7H12)",
                "3-Heptyne (C7H12)",
                "1-Octyne (C8H14)",
                "2-Octyne (C8H14)",
                "3-Octyne (C8H14)",
                "4-Octyne (C8H14)",
                "1-Nonyne (C9H16)",
                "1-Decyne (C10H18)",
                "1-Undecyne (C11H20)",
                "1-Dodecyne (C12H22)"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne (C4H6)",
                "1-Decyne (C10H18)",
                "1-Dodecyne (C12H22)",
                "1-Heptyne (C7H12)",
                "1-Hexyne (C6H10)",
                "1-Nonyne (C9H16)",
                "1-Octyne (C8H14)",
                "1-Pentyne (C5H8)",
                "1-Undecyne (C11H20)",
                "2-Butyne (C4H6)",
                "2-Heptyne (C7H12)",
                "2-Hexyne (C6H10)",
                "2-Octyne (C8H14)",
                "2-Pentyne (C5H8)",
                "3,3-Dimethyl-1-butyne (C6H10)",
                "3-Heptyne (C7H12)",
                "3-Hexyne (C6H10)",
                "3-Methyl-1-butyne (C5H8)",
                "3-Methyl-1-pentyne (C6H10)",
                "3-Octyne (C8H14)",
                "4-Methyl-1-pentyne (C6H10)",
                "4-Octyne (C8H14)",
                "Acetylene (C2H2)",
                "Propyne (C3H4)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Ethyne (Acetylene)",
            "Propyne",
            "1-Butyne",
            "2-Butyne",
            "1-Pentyne",
            "2-Pentyne",
            "3-Pentyne",
            "1-Hexyne",
            "2-Hexyne",
            "3-Hexyne",
            "1-Heptyne",
            "2-Heptyne",
            "3-Heptyne",
            "4-Heptyne",
            "1-Octyne",
            "2-Octyne",
            "3-Octyne",
            "4-Octyne",
            "1-Nonyne",
            "2-Nonyne",
            "3-Nonyne",
            "4-Nonyne",
            "5-Nonyne",
            "1-Decyne"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethyne (Acetylene)",
                "Propyne",
                "1-Butyne",
                "2-Butyne",
                "1-Pentyne",
                "2-Pentyne",
                "3-Pentyne",
                "1-Hexyne",
                "2-Hexyne",
                "3-Hexyne",
                "1-Heptyne",
                "2-Heptyne",
                "3-Heptyne",
                "4-Heptyne",
                "1-Octyne",
                "2-Octyne",
                "3-Octyne",
                "4-Octyne",
                "1-Nonyne",
                "2-Nonyne",
                "3-Nonyne",
                "4-Nonyne",
                "5-Nonyne",
                "1-Decyne"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne",
                "1-Decyne",
                "1-Heptyne",
                "1-Hexyne",
                "1-Nonyne",
                "1-Octyne",
                "1-Pentyne",
                "2-Butyne",
                "2-Heptyne",
                "2-Hexyne",
                "2-Nonyne",
                "2-Octyne",
                "2-Pentyne",
                "3-Heptyne",
                "3-Hexyne",
                "3-Nonyne",
                "3-Octyne",
                "3-Pentyne",
                "4-Heptyne",
                "4-Nonyne",
                "4-Octyne",
                "5-Nonyne",
                "Ethyne (Acetylene)",
                "Propyne"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Propyne (C3H4)",
            "1-Butyne (C4H6)",
            "2-Butyne (C4H6)",
            "1-Pentyne (C5H8)",
            "2-Pentyne (C5H8)",
            "3-Methyl-1-butyne (C5H8)",
            "1-Hexyne (C6H10)",
            "2-Hexyne (C6H10)",
            "3-Hexyne (C6H10)",
            "4-Methyl-1-pentyne (C6H10)",
            "3-Methyl-1-pentyne (C6H10)",
            "1-Heptyne (C7H12)",
            "2-Heptyne (C7H12)",
            "3-Heptyne (C7H12)",
            "4-Heptyne (C7H12)",
            "1-Octyne (C8H14)",
            "2-Octyne (C8H14)",
            "3-Octyne (C8H14)",
            "4-Octyne (C8H14)",
            "1-Nonyne (C9H16)",
            "2-Nonyne (C9H16)",
            "3-Nonyne (C9H16)",
            "4-Nonyne (C9H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Propyne (C3H4)",
                "1-Butyne (C4H6)",
                "2-Butyne (C4H6)",
                "1-Pentyne (C5H8)",
                "2-Pentyne (C5H8)",
                "3-Methyl-1-butyne (C5H8)",
                "1-Hexyne (C6H10)",
                "2-Hexyne (C6H10)",
                "3-Hexyne (C6H10)",
                "4-Methyl-1-pentyne (C6H10)",
                "3-Methyl-1-pentyne (C6H10)",
                "1-Heptyne (C7H12)",
                "2-Heptyne (C7H12)",
                "3-Heptyne (C7H12)",
                "4-Heptyne (C7H12)",
                "1-Octyne (C8H14)",
                "2-Octyne (C8H14)",
                "3-Octyne (C8H14)",
                "4-Octyne (C8H14)",
                "1-Nonyne (C9H16)",
                "2-Nonyne (C9H16)",
                "3-Nonyne (C9H16)",
                "4-Nonyne (C9H16)"
            ],
            "mismatches": [],
            "true_referents": [
                "1-Butyne (C4H6)",
                "1-Heptyne (C7H12)",
                "1-Hexyne (C6H10)",
                "1-Nonyne (C9H16)",
                "1-Octyne (C8H14)",
                "1-Pentyne (C5H8)",
                "2-Butyne (C4H6)",
                "2-Heptyne (C7H12)",
                "2-Hexyne (C6H10)",
                "2-Nonyne (C9H16)",
                "2-Octyne (C8H14)",
                "2-Pentyne (C5H8)",
                "3-Heptyne (C7H12)",
                "3-Hexyne (C6H10)",
                "3-Methyl-1-butyne (C5H8)",
                "3-Methyl-1-pentyne (C6H10)",
                "3-Nonyne (C9H16)",
                "3-Octyne (C8H14)",
                "4-Heptyne (C7H12)",
                "4-Methyl-1-pentyne (C6H10)",
                "4-Nonyne (C9H16)",
                "4-Octyne (C8H14)",
                "Acetylene (C2H2)",
                "Propyne (C3H4)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C\u2082H\u2082)",
            "Propyne (C\u2083H\u2084)",
            "Butyne (C\u2084H\u2086)",
            "Pentyne (C\u2085H\u2088)",
            "Hexyne (C\u2086H\u2081\u2080)",
            "Heptyne (C\u2087H\u2081\u2082)",
            "Octyne (C\u2088H\u2081\u2084)",
            "Nonyne (C\u2089H\u2081\u2086)",
            "Decyne (C\u2081\u2080H\u2081\u2088)",
            "Undecyne (C\u2081\u2081H\u2082\u2080)",
            "Dodecyne (C\u2081\u2082H\u2082\u2082)",
            "Tridecyne (C\u2081\u2083H\u2082\u2084)",
            "Tetradecyne (C\u2081\u2084H\u2082\u2086)",
            "Pentadecyne (C\u2081\u2085H\u2082\u2088)",
            "Hexadecyne (C\u2081\u2086H\u2083\u2080)",
            "Heptadecyne (C\u2081\u2087H\u2083\u2082)",
            "Octadecyne (C\u2081\u2088H\u2083\u2084)",
            "Nonadecyne (C\u2081\u2089H\u2083\u2086)",
            "Icosyne (C\u2082\u2080H\u2083\u2088)",
            "Heneicosyne (C\u2082\u2081H\u2084\u2080)",
            "Docosyne (C\u2082\u2082H\u2084\u2082)",
            "Tricosyne (C\u2082\u2083H\u2084\u2084)",
            "Tetracosyne (C\u2082\u2084H\u2084\u2086)",
            "Pentacosyne (C\u2082\u2085H\u2084\u2088)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C\u2082H\u2082)",
                "Propyne (C\u2083H\u2084)",
                "Butyne (C\u2084H\u2086)",
                "Pentyne (C\u2085H\u2088)",
                "Hexyne (C\u2086H\u2081\u2080)",
                "Heptyne (C\u2087H\u2081\u2082)",
                "Octyne (C\u2088H\u2081\u2084)",
                "Nonyne (C\u2089H\u2081\u2086)",
                "Decyne (C\u2081\u2080H\u2081\u2088)",
                "Undecyne (C\u2081\u2081H\u2082\u2080)",
                "Dodecyne (C\u2081\u2082H\u2082\u2082)",
                "Tridecyne (C\u2081\u2083H\u2082\u2084)",
                "Tetradecyne (C\u2081\u2084H\u2082\u2086)",
                "Pentadecyne (C\u2081\u2085H\u2082\u2088)",
                "Hexadecyne (C\u2081\u2086H\u2083\u2080)",
                "Heptadecyne (C\u2081\u2087H\u2083\u2082)",
                "Octadecyne (C\u2081\u2088H\u2083\u2084)",
                "Nonadecyne (C\u2081\u2089H\u2083\u2086)",
                "Icosyne (C\u2082\u2080H\u2083\u2088)",
                "Heneicosyne (C\u2082\u2081H\u2084\u2080)",
                "Docosyne (C\u2082\u2082H\u2084\u2082)",
                "Tricosyne (C\u2082\u2083H\u2084\u2084)",
                "Tetracosyne (C\u2082\u2084H\u2084\u2086)",
                "Pentacosyne (C\u2082\u2085H\u2084\u2088)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C\u2082H\u2082)",
                "Butyne (C\u2084H\u2086)",
                "Decyne (C\u2081\u2080H\u2081\u2088)",
                "Docosyne (C\u2082\u2082H\u2084\u2082)",
                "Dodecyne (C\u2081\u2082H\u2082\u2082)",
                "Heneicosyne (C\u2082\u2081H\u2084\u2080)",
                "Heptadecyne (C\u2081\u2087H\u2083\u2082)",
                "Heptyne (C\u2087H\u2081\u2082)",
                "Hexadecyne (C\u2081\u2086H\u2083\u2080)",
                "Hexyne (C\u2086H\u2081\u2080)",
                "Icosyne (C\u2082\u2080H\u2083\u2088)",
                "Nonadecyne (C\u2081\u2089H\u2083\u2086)",
                "Nonyne (C\u2089H\u2081\u2086)",
                "Octadecyne (C\u2081\u2088H\u2083\u2084)",
                "Octyne (C\u2088H\u2081\u2084)",
                "Pentacosyne (C\u2082\u2085H\u2084\u2088)",
                "Pentadecyne (C\u2081\u2085H\u2082\u2088)",
                "Pentyne (C\u2085H\u2088)",
                "Propyne (C\u2083H\u2084)",
                "Tetracosyne (C\u2082\u2084H\u2084\u2086)",
                "Tetradecyne (C\u2081\u2084H\u2082\u2086)",
                "Tricosyne (C\u2082\u2083H\u2084\u2084)",
                "Tridecyne (C\u2081\u2083H\u2082\u2084)",
                "Undecyne (C\u2081\u2081H\u2082\u2080)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Alkynes (C3H4)",
            "But-2-yne",
            "Cyclopropyne",
            "Cyclopropyne (C3H4)",
            "Ethynylbenzene",
            "Hex-3-yne",
            "Hex-5-yne",
            "Hex-6-yne",
            "Hexyne",
            "Methylacetylene",
            "Pent-2-yne",
            "Pent-3-yne",
            "Pent-4-yne",
            "Pent-5-yne",
            "Pent-6-yne",
            "Pent-7-yne",
            "Pent-8-yne",
            "Pent-9-yne",
            "Pent-10-yne",
            "Pent-11-yne",
            "Pent-12-yne",
            "Pent-13-yne",
            "Pent-14-yne",
            "Pent-15-yne",
            "Pent-16-yne",
            "Pent-17-yne",
            "Pent-18-yne",
            "Pent-19-yne",
            "Pent-20-yne",
            "Pent-21-yne",
            "Pent-22-yne",
            "Pent-23-yne",
            "Pent-24-yne"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Alkynes (C3H4)",
                "But-2-yne",
                "Cyclopropyne",
                "Cyclopropyne (C3H4)",
                "Ethynylbenzene",
                "Hex-3-yne",
                "Hex-5-yne",
                "Hex-6-yne",
                "Hexyne",
                "Methylacetylene",
                "Pent-2-yne",
                "Pent-3-yne",
                "Pent-4-yne",
                "Pent-5-yne",
                "Pent-6-yne",
                "Pent-7-yne",
                "Pent-8-yne",
                "Pent-9-yne",
                "Pent-10-yne",
                "Pent-11-yne",
                "Pent-12-yne",
                "Pent-13-yne",
                "Pent-14-yne"
            ],
            "mismatches": [
                "Pent-15-yne",
                "Pent-16-yne",
                "Pent-17-yne",
                "Pent-18-yne",
                "Pent-19-yne",
                "Pent-20-yne",
                "Pent-21-yne",
                "Pent-22-yne",
                "Pent-23-yne",
                "Pent-24-yne"
            ],
            "true_referents": [
                "Acetylene (C2H2)",
                "Alkynes (C3H4)",
                "But-2-yne",
                "Cyclopropyne",
                "Cyclopropyne (C3H4)",
                "Ethynylbenzene",
                "Hex-3-yne",
                "Hex-5-yne",
                "Hex-6-yne",
                "Hexyne",
                "Methylacetylene",
                "Pent-10-yne",
                "Pent-11-yne",
                "Pent-12-yne",
                "Pent-13-yne",
                "Pent-14-yne",
                "Pent-2-yne",
                "Pent-3-yne",
                "Pent-4-yne",
                "Pent-5-yne",
                "Pent-6-yne",
                "Pent-7-yne",
                "Pent-8-yne",
                "Pent-9-yne"
            ],
            "TP": 24,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            "Acetylene (C2H2)",
            "Propyne (C3H4)",
            "But-1-yne (C4H6)",
            "Pent-1-yne (C5H8)",
            "Hex-1-yne (C6H10)",
            "Hept-1-yne (C7H12)",
            "Oct-1-yne (C8H14)",
            "Non-1-yne (C9H16)",
            "Dec-1-yne (C10H18)",
            "Undec-1-yne (C11H20)",
            "Dodec-1-yne (C12H22)",
            "Tridec-1-yne (C13H24)",
            "Tetradec-1-yne (C14H26)",
            "Pentadec-1-yne (C15H28)",
            "Hexadec-1-yne (C16H30)",
            "Heptadec-1-yne (C17H32)",
            "Octadec-1-yne (C18H34)",
            "Nonadec-1-yne (C19H36)",
            "Icos-1-yne (C20H38)",
            "Heneicos-1-yne (C21H40)",
            "Docos-1-yne (C22H42)",
            "Tricos-1-yne (C23H44)",
            "Tetracos-1-yne (C24H46)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylene (C2H2)",
                "Propyne (C3H4)",
                "But-1-yne (C4H6)",
                "Pent-1-yne (C5H8)",
                "Hex-1-yne (C6H10)",
                "Hept-1-yne (C7H12)",
                "Oct-1-yne (C8H14)",
                "Non-1-yne (C9H16)",
                "Dec-1-yne (C10H18)",
                "Undec-1-yne (C11H20)",
                "Dodec-1-yne (C12H22)",
                "Tridec-1-yne (C13H24)",
                "Tetradec-1-yne (C14H26)",
                "Pentadec-1-yne (C15H28)",
                "Hexadec-1-yne (C16H30)",
                "Heptadec-1-yne (C17H32)",
                "Octadec-1-yne (C18H34)",
                "Nonadec-1-yne (C19H36)",
                "Icos-1-yne (C20H38)",
                "Heneicos-1-yne (C21H40)",
                "Docos-1-yne (C22H42)",
                "Tricos-1-yne (C23H44)",
                "Tetracos-1-yne (C24H46)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetylene (C2H2)",
                "But-1-yne (C4H6)",
                "Dec-1-yne (C10H18)",
                "Docos-1-yne (C22H42)",
                "Dodec-1-yne (C12H22)",
                "Heneicos-1-yne (C21H40)",
                "Hept-1-yne (C7H12)",
                "Heptadec-1-yne (C17H32)",
                "Hex-1-yne (C6H10)",
                "Hexadec-1-yne (C16H30)",
                "Icos-1-yne (C20H38)",
                "Non-1-yne (C9H16)",
                "Nonadec-1-yne (C19H36)",
                "Oct-1-yne (C8H14)",
                "Octadec-1-yne (C18H34)",
                "Pent-1-yne (C5H8)",
                "Pentadec-1-yne (C15H28)",
                "Propyne (C3H4)",
                "Tetracos-1-yne (C24H46)",
                "Tetradec-1-yne (C14H26)",
                "Tricos-1-yne (C23H44)",
                "Tridec-1-yne (C13H24)",
                "Undec-1-yne (C11H20)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "alkyne",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "ethene",
                "Canonical Name": "ethene"
            },
            {
                "Referent": "ethylene",
                "Canonical Name": "ethylene"
            },
            {
                "Referent": "acetylene",
                "Canonical Name": "acetylene"
            },
            {
                "Referent": "propyne",
                "Canonical Name": "propyne"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "ethene",
                "ethylene",
                "acetylene",
                "propyne"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"acetylene\", \"Canonical Name\": \"acetylene\"}",
                "{\"Referent\": \"ethene\", \"Canonical Name\": \"ethene\"}",
                "{\"Referent\": \"ethylene\", \"Canonical Name\": \"ethylene\"}",
                "{\"Referent\": \"propyne\", \"Canonical Name\": \"propyne\"}"
            ],
            "TP": 4,
            "FP": 0,
            "FN": 0
        }
    }
]