[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Biphenyl (C12H10)",
            "Terphenyl (C18H14)",
            "Quaterphenyl (C24H18)",
            "Anthraquinone (C14H8O2)",
            "Phenol (C6H5OH)",
            "Anisole (C8H10O)",
            "Naphthol (C10H7OH)",
            "Benzaldehyde (C7H6O)",
            "Benzamide (C7H7NO)",
            "Benzyl alcohol (C7H8O)",
            "Benzyl chloride (C7H7Cl)",
            "Benzylamine (C7H9N)",
            "Benzyl benzoate (C14H12O2)",
            "Benzyl cyanide (C8H7N)",
            "Benzyl methyl ether (C9H12O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "xylene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "fluorene",
                "pyrene",
                "chrysene",
                "anisole"
            ],
            "mismatches": [
                "biphenyl",
                "terphenyl",
                "quaterphenyl",
                "anthraquinone",
                "phenol",
                "naphthol",
                "benzaldehyde",
                "benzamide",
                "benzyl alcohol",
                "benzyl chloride",
                "benzylamine",
                "benzyl benzoate",
                "benzyl cyanide",
                "benzyl methyl ether"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Di-tert-butylbenzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "1-Ethyl-3,5-diisopropyl-benzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "2,2',7,7'-Tetrachlorohypericin",
                "2,3-Dimethylbenzamide",
                "2,3-dihydroxy-N-methoxy-6-propylbenzamide",
                "2,4,6-tri-tert-butylaniline",
                "2,4-Dihydroxy-3,6-dimethylbenzaldehyde",
                "2-Isopropyl-N-methylaniline",
                "2-methylbenzyl alcohol",
                "3,5-bis(2-cyanopropan-2-yl)benzoic acid",
                "3,5-dimethylbenzaldehyde",
                "3-methylbenzyl alcohol",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "4-propylbenzoic acid",
                "4-t-Butylbenzoic acid",
                "7,7'-Dichlorohypericin",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "9-Hydroxybenzo[a]pyrene",
                "Anisole, 2-isopropyl-4-methyl-",
                "Anisole, 4-sec-butyl",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dibenzo[a,h]pyrene",
                "N-(2,4-dimethylphenyl)formamide",
                "N3-[(4-tert-butylphenyl)methylideneamino]-1,2,4-triazole-3,4-diamine",
                "Naphtho[2,3-e]pyrene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "hexaphenylene",
                "methylbenzene",
                "methylbenzyl alcohol",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenol (C6H5OH)",
            "Aniline (C6H5NH2)",
            "Biphenyl (C12H10)",
            "Styrene (C8H8)",
            "Chlorobenzene (C6H5Cl)",
            "Nitrobenzene (C6H5NO2)",
            "Benzoic Acid (C7H6O2)",
            "Benzaldehyde (C7H6O)",
            "Benzyl Alcohol (C7H8O)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Terephthalic Acid (C8H6O4)",
            "Phthalic Acid (C8H6O4)",
            "Naphthol (C10H8O)",
            "Benzoyl Chloride (C7H5ClO)",
            "Acetophenone (C8H8O)",
            "Benzonitrile (C7H5N)",
            "Benzamide (C7H7NO)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "ethylbenzene",
                "xylene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "pyrene",
                "styrene"
            ],
            "mismatches": [
                "phenol",
                "aniline",
                "biphenyl",
                "chlorobenzene",
                "nitrobenzene",
                "benzoic acid",
                "benzaldehyde",
                "benzyl alcohol",
                "terephthalic acid",
                "phthalic acid",
                "naphthol",
                "benzoyl chloride",
                "acetophenone",
                "benzonitrile",
                "benzamide"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "10-Hydroxybenzo[a]pyrene",
                "2,2',7,7'-Tetrachlorohypericin",
                "2,3-Dimethylbenzamide",
                "2,3-dihydroxy-N-methoxy-6-propylbenzamide",
                "2,4-Dihydroxy-3,6-dimethylbenzaldehyde",
                "2,4-di-tert-butylphenol",
                "2,6-di-tert-butylphenol",
                "2-Isopropyl-N-methylaniline",
                "2-[3-(2-cyanopropan-2-yl)-5-(1,2,4-triazol-1-ylmethyl)phenyl]-3-hydroxy-2-methylpropanenitrile",
                "2-[3-(2-cyanopropan-2-yl)-5-(hydroxymethyl)phenyl]-2-methylpropanenitrile",
                "2-amino-1-(2-chlorophenyl)-1-propanol",
                "2-methylbenzyl alcohol",
                "3,5-dimethylbenzaldehyde",
                "3-(3,5-ditert-butyl-4-hydroxyphenyl)-2-isocyano-2-propenenitrile",
                "3-methylbenzyl alcohol",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-(4-hydroxyphenyl)-4-methyl-2-pentanone",
                "4-Propylphenol",
                "4-propylbenzoic acid",
                "4-t-Butylbenzoic acid",
                "4-tert-Octylphenol monoethoxylate",
                "6-Nitrobenzo[a]pyrene",
                "7,7'-Dichlorohypericin",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dibenzo[a,h]pyrene",
                "N-(2,4-dimethylphenyl)formamide",
                "N-(2-methoxycyclohexyl)-2,5-dimethylaniline",
                "N-(2-methoxycyclohexyl)-3,4-dimethylaniline",
                "Naphtho[2,3-e]pyrene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[c]phenanthrene",
                "biphenylene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "methylbenzene",
                "methylbenzyl alcohol",
                "naphthalene",
                "nitrosobenzene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Biphenyl (C12H10)",
            "Styrene (C8H8)",
            "Phenol (C6H5OH)",
            "Aniline (C6H5NH2)",
            "Chloro benzene (C6H5Cl)",
            "Nitrobenzene (C6H5NO2)",
            "Toluene (C6H5CH3)",
            "Benzaldehyde (C7H6O)",
            "Benzoyl chloride (C7H5ClO)",
            "Benzonitrile (C7H5N)",
            "Benzyl alcohol (C7H8O)",
            "Benzyl chloride (C7H7Cl)",
            "Benzylamine (C7H9N)",
            "Benzyl cyanide (C8H7N)",
            "Benzyl bromide (C7H7Br)",
            "Benzyl iodide (C7H7I)",
            "Benzyl acetate (C9H10O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "xylene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "pyrene",
                "styrene"
            ],
            "mismatches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Biphenyl (C12H10)",
                "Phenol (C6H5OH)",
                "Aniline (C6H5NH2)",
                "Chloro benzene (C6H5Cl)",
                "Nitrobenzene (C6H5NO2)",
                "Toluene (C6H5CH3)",
                "Benzaldehyde (C7H6O)",
                "Benzoyl chloride (C7H5ClO)",
                "Benzonitrile (C7H5N)",
                "Benzyl alcohol (C7H8O)",
                "Benzyl chloride (C7H7Cl)",
                "Benzylamine (C7H9N)",
                "Benzyl cyanide (C8H7N)",
                "Benzyl bromide (C7H7Br)",
                "Benzyl iodide (C7H7I)",
                "Benzyl acetate (C9H10O2)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Diisopropylbenzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "1-[3,5-Bis(benzyloxy)phenyl]acetone",
                "10-Hydroxybenzo[a]pyrene",
                "2,2',7,7'-Tetrachlorohypericin",
                "2,4,6-tri-tert-butylaniline",
                "2,4-Dihydroxy-3,6-dimethylbenzaldehyde",
                "2-(2-bromo-4-tert-butylphenoxy)-N-(2-furanylmethyl)acetamide",
                "2-Isopropyl-N-methylaniline",
                "2-[3-(2-cyanopropan-2-yl)-5-(1,2,4-triazol-1-ylmethyl)phenyl]-3-hydroxy-2-methylpropanenitrile",
                "2-[3-(2-cyanopropan-2-yl)-5-(hydroxymethyl)phenyl]-2-methylpropanenitrile",
                "2-amino-1-(2-chlorophenyl)-1-propanol",
                "2-methylbenzyl alcohol",
                "3,5-bis(2-cyanopropan-2-yl)benzoic acid",
                "3,5-dimethylbenzaldehyde",
                "3-(3,5-ditert-butyl-4-hydroxyphenyl)-2-isocyano-2-propenenitrile",
                "3-methylbenzyl alcohol",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "6-Nitrobenzo[a]pyrene",
                "7,7'-Dichlorohypericin",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dibenzo[a,h]pyrene",
                "N-(2-methoxycyclohexyl)-2,5-dimethylaniline",
                "N-(2-methoxycyclohexyl)-3,4-dimethylaniline",
                "N3-[(4-tert-butylphenyl)methylideneamino]-1,2,4-triazole-3,4-diamine",
                "Naphtho[2,3-e]pyrene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[c]phenanthrene",
                "biphenylene",
                "butylbenzene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "methylbenzyl alcohol",
                "naphthalene",
                "nitrosobenzene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 8,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Chrysene (C18H12)",
            "Pyrene (C16H10)",
            "Benzo[a]pyrene (C20H12)",
            "Coronene (C24H14)",
            "Tolvene (Methylbenzene, C7H8)",
            "Xylene (Dimethylbenzene, C8H10)",
            "Ethylbenzene (C8H10)",
            "Cumene (Isopropylbenzene, C9H12)",
            "Styrene (Vinylbenzene, C8H8)",
            "Phenol (C6H6O)",
            "Aniline (C6H7N)",
            "Benzoic Acid (C7H6O2)",
            "2-Naphthol (C10H8O)",
            "Indole (C8H7N)",
            "Quinoline (C9H7N)",
            "Isoquinoline (C9H7N)",
            "Acridine (C13H9N)",
            "Fluorene (C13H10)",
            "Biphenyl (C12H10)",
            "Diphenylmethane (C13H12)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Chrysene (C18H12)",
                "Pyrene (C16H10)",
                "Benzo[a]pyrene (C20H12)",
                "Coronene (C24H14)",
                "Ethylbenzene (C8H10)",
                "Cumene (Isopropylbenzene, C9H12)",
                "Fluorene (C13H10)",
                "Xylene (Dimethylbenzene, C8H10)",
                "Styrene (Vinylbenzene, C8H8)"
            ],
            "mismatches": [
                "Tolvene (Methylbenzene, C7H8)",
                "Phenol (C6H6O)",
                "Aniline (C6H7N)",
                "Benzoic Acid (C7H6O2)",
                "2-Naphthol (C10H8O)",
                "Indole (C8H7N)",
                "Quinoline (C9H7N)",
                "Isoquinoline (C9H7N)",
                "Acridine (C13H9N)",
                "Biphenyl (C12H10)",
                "Diphenylmethane (C13H12)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "1-Isopropyl-2-methylbenzene",
                "1-Methyl-2-propylbenzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2,3,5-trimethylhydroquinone",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "4-propylbenzoic acid",
                "4-t-Butylbenzoic acid",
                "5-phenyldodecane",
                "6-Nitrobenzo[a]pyrene",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "9-Hydroxybenzo[a]pyrene",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "isobutylbenzene",
                "methylbenzene",
                "naphthalene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetramethylbenzene",
                "toluene-d8",
                "vinylarene",
                "xylene"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Biphenyl (C12H10)",
            "Styrene (C8H8)",
            "Cumene (C9H12)",
            "Ethylbenzene (C8H10)",
            "Aniline (C6H5NH2)",
            "Phenol (C6H5OH)",
            "Benzoic Acid (C7H6O2)",
            "Chlorobenzene (C6H5Cl)",
            "Nitrobenzene (C6H5NO2)",
            "Acetophenone (C8H8O)",
            "Benzaldehyde (C7H6O)",
            "Benzonitrile (C7H5N)",
            "Benzyl Alcohol (C7H8O)",
            "Quinoline (C9H7N)",
            "Isoquinoline (C9H7N)",
            "Pyrene (C16H10)",
            "Benz[a]anthracene (C18H12)",
            "Fluorene (C13H10)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "xylene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "styrene",
                "cumene",
                "ethylbenzene",
                "pyrene",
                "fluorene"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Aniline (C6H5NH2)",
                "Phenol (C6H5OH)",
                "Benzoic Acid (C7H6O2)",
                "Chlorobenzene (C6H5Cl)",
                "Nitrobenzene (C6H5NO2)",
                "Acetophenone (C8H8O)",
                "Benzaldehyde (C7H6O)",
                "Benzonitrile (C7H5N)",
                "Benzyl Alcohol (C7H8O)",
                "Quinoline (C9H7N)",
                "Isoquinoline (C9H7N)",
                "Benz[a]anthracene (C18H12)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "10-Hydroxybenzo[a]pyrene",
                "2,2',7,7'-Tetrachlorohypericin",
                "2,3,5-trimethylhydroquinone",
                "2,4-Dihydroxy-3,6-dimethylbenzaldehyde",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "2-[3-(2-cyanopropan-2-yl)-5-(1,2,4-triazol-1-ylmethyl)phenyl]-3-hydroxy-2-methylpropanenitrile",
                "2-[3-(2-cyanopropan-2-yl)-5-(hydroxymethyl)phenyl]-2-methylpropanenitrile",
                "2-methylbenzyl alcohol",
                "3,5-dimethylbenzaldehyde",
                "3-(3,5-ditert-butyl-4-hydroxyphenyl)-2-isocyano-2-propenenitrile",
                "3-methylbenzyl alcohol",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-(4-hydroxyphenyl)-4-methyl-2-pentanone",
                "4-Propylphenol",
                "4-propylbenzoic acid",
                "4-t-Butylbenzoic acid",
                "6-Nitrobenzo[a]pyrene",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "9-Hydroxybenzo[a]pyrene",
                "Benzenepropanoic acid, 3,5-bis(1,1-dimethylethyl)-4-hydroxy-, methyl ester",
                "Dibenzo[a,h]pyrene",
                "N-(2-methoxycyclohexyl)-2,5-dimethylaniline",
                "N-(2-methoxycyclohexyl)-3,4-dimethylaniline",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "methylbenzene",
                "methylbenzyl alcohol",
                "naphthalene",
                "nitrosobenzene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Phenanthrene (C14H10)",
            "Anthracene (C14H10)",
            "Biphenyl (C12H10)",
            "Phenol (C6H5OH)",
            "Styrene (C8H8)",
            "Aniline (C6H5NH2)",
            "Cresol (C7H8O)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Acenaphthene (C12H10)",
            "Acenaphthylene (C12H8)",
            "Indole (C8H7N)",
            "Carbazole (C13H9N)",
            "Furan (C4H4O)",
            "Benzofuran (C8H6O)",
            "Benzothiophene (C8H6S)",
            "Benzimidazole (C7H6N2)",
            "Naphthylamine (C10H9N)",
            "Dibenzothiophene (C12H8S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "xylene",
                "naphthalene",
                "phenanthrene",
                "anthracene",
                "fluorene",
                "pyrene",
                "chrysene",
                "triphenylene",
                "acenaphthylene"
            ],
            "mismatches": [
                "biphenyl",
                "phenol",
                "styrene",
                "aniline",
                "cresol",
                "acenaphthene",
                "indole",
                "carbazole",
                "furan",
                "benzofuran",
                "benzothiophene",
                "benzimidazole",
                "naphthylamine",
                "dibenzothiophene"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(2',6'-dimethylphenyl)-2-n-propyl-1,2-dihydropyridazine-3,6-dione",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2,3-Dimethylbenzamide",
                "2-Isopropyl-1,4-benzenediol",
                "2-Isopropyl-N-methylaniline",
                "3-(3,4-dimethylanilino)-1-thiophen-2-yl-1-propanone",
                "3-(3,5-dimethylanilino)-5-(2-furanyl)-1-cyclohex-2-enone",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Dibenzo[h,rst]pentaphene",
                "N-(2-methoxycyclohexyl)-2,5-dimethylaniline",
                "N-(2-methoxycyclohexyl)-3,4-dimethylaniline",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 11,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Fluoranthene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Perylene (C20H12)",
            "Coronene (C24H12)",
            "Hexacene (C26H14)",
            "Heptacene (C28H16)",
            "Octacene (C30H18)",
            "Nonacene (C32H20)",
            "Decacene (C34H22)",
            "Undecacene (C36H24)",
            "Dodecacene (C38H26)",
            "Tridecacene (C40H28)",
            "Tetracene (C14H10)",
            "Pentacene (C22H14)",
            "Hexabenzocoronene (C42H18)",
            "Ovalene (C32H14)",
            "Circulene (C40H16)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Fluoranthene (C16H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Perylene (C20H12)",
                "Coronene (C24H12)",
                "Hexacene (C26H14)",
                "Heptacene (C28H16)",
                "Octacene (C30H18)",
                "Nonacene (C32H20)",
                "Ovalene (C32H14)",
                "Pentacene (C22H14)"
            ],
            "mismatches": [
                "Decacene (C34H22)",
                "Undecacene (C36H24)",
                "Dodecacene (C38H26)",
                "Tridecacene (C40H28)",
                "Hexabenzocoronene (C42H18)",
                "Circulene (C40H16)"
            ],
            "true_referents": [
                "10-Hydroxybenzo[a]pyrene",
                "4-tert-Octylphenol monoethoxylate",
                "8-Hydroxybenzo[a]pyrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "aromatic annulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "chrysene",
                "circumcircumpyrene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptacene",
                "heptaphene",
                "hexacene",
                "hexacyclopentacoronene",
                "hexamethylbenzene",
                "hexaphene",
                "naphthalene",
                "nonacene",
                "octacene",
                "octaphenylene",
                "ovalene",
                "pentacene",
                "pentaphene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "quaterrylene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 17,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Phenanthrene (C14H10)",
            "Anthracene (C14H10)",
            "Pyrene (C16H10)",
            "Fluorene (C13H10)",
            "Fluoranthene (C16H10)",
            "Chrysene (C18H12)",
            "Biphenyl (C12H10)",
            "Stilbene (C14H12)",
            "Perylene (C20H12)",
            "Indene (C9H8)",
            "Indole (C8H7N)",
            "Quinoline (C9H7N)",
            "Phenol (C6H6O)",
            "Aniline (C6H7N)",
            "Naphthol (C10H8O)",
            "Naphthylamine (C10H9N)",
            "Phenylamine (C6H7N)",
            "Phenylmethanol (C7H8O)",
            "Phenylacetic acid (C8H8O2)",
            "Phenylalanine (C9H11NO2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Phenanthrene (C14H10)",
                "Anthracene (C14H10)",
                "Pyrene (C16H10)",
                "Fluorene (C13H10)",
                "Fluoranthene (C16H10)",
                "Chrysene (C18H12)",
                "Perylene (C20H12)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Stilbene (C14H12)",
                "Indene (C9H8)",
                "Indole (C8H7N)",
                "Quinoline (C9H7N)",
                "Phenol (C6H6O)",
                "Aniline (C6H7N)",
                "Naphthol (C10H8O)",
                "Naphthylamine (C10H9N)",
                "Phenylamine (C6H7N)",
                "Phenylmethanol (C7H8O)",
                "Phenylacetic acid (C8H8O2)",
                "Phenylalanine (C9H11NO2)"
            ],
            "true_referents": [
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "1-Methyl-2-propylbenzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "2-Methyl-1-phenyl-2-propanol",
                "3,4-Dihydroxyphenylacetone",
                "3,4-dimethylbenzyl alcohol",
                "3-Phenyl-1-propanol",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "5-phenylundecane",
                "6-Nitrobenzo[a]pyrene",
                "6-phenylundecane",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "9-Hydroxybenzo[a]pyrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "N3-[(4-tert-butylphenyl)methylideneamino]-1,2,4-triazole-3,4-diamine",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "propylbenzene",
                "pyranthrene",
                "pyrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C\u2086H\u2086)",
            "Toluene (Methylbenzene)",
            "Aniline (Aminobenzene)",
            "Phenol",
            "Naphthalene",
            "Anthracene",
            "Phenanthrene",
            "Styrene (Ethenylbenzene)",
            "o-Xylene",
            "m-Xylene",
            "p-Xylene",
            "Ethylbenzene",
            "Cumene (Isopropylbenzene)",
            "Indene",
            "Fluorene",
            "Pyrene",
            "Chrysene",
            "Biphenyl",
            "Triphenylmethane",
            "Mesitylene (1,3,5-Trimethylbenzene)",
            "Resorcinol (1,3-Dihydroxybenzene)",
            "Carbazole",
            "Diphenylmethane",
            "Benzoic Acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C\u2086H\u2086)",
                "Toluene (Methylbenzene)",
                "Naphthalene",
                "Anthracene",
                "Phenanthrene",
                "Styrene (Ethenylbenzene)",
                "o-Xylene",
                "m-Xylene",
                "p-Xylene",
                "Ethylbenzene",
                "Cumene (Isopropylbenzene)",
                "Fluorene",
                "Pyrene",
                "Chrysene"
            ],
            "mismatches": [
                "Aniline (Aminobenzene)",
                "Phenol",
                "Indene",
                "Biphenyl",
                "Triphenylmethane",
                "Mesitylene (1,3,5-Trimethylbenzene)",
                "Resorcinol (1,3-Dihydroxybenzene)",
                "Carbazole",
                "Diphenylmethane",
                "Benzoic Acid"
            ],
            "true_referents": [
                "(S)-eberconazole",
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "1,3,5-trimethylbenzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "1H-indene",
                "2-Isopropyl-N-methylaniline",
                "2-butyl-5-propylresorcinol",
                "3-Hydroxybenzo[a]pyrene",
                "4,5-Dihydroxybenzo[a]pyrene",
                "4-Propylphenol",
                "4-propylbenzoic acid",
                "4-t-Butylbenzoic acid",
                "Naphtho[2,3-e]pyrene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "butylbenzene",
                "chrysene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "isobutylbenzene",
                "m-xylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "propylbenzene",
                "proxazole",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "trimethylbenzene",
                "triphenylene",
                "xylene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Mesitylene (C9H12)",
            "Biphenyl (C12H10)",
            "Fluorene (C13H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Coronene (C24H12)",
            "Azulene (C10H8)",
            "Indene (C9H8)",
            "Acenaphthylene (C12H8)",
            "Fluoranthene (C16H10)",
            "Triphenylene (C18H12)",
            "Perylene (C20H12)",
            "Ovalene (C32H14)",
            "Tetracene (C18H12)",
            "Pentacene (C22H14)",
            "Fullerene (C60)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Styrene (C8H8)",
                "Fluorene (C13H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Coronene (C24H12)",
                "Azulene (C10H8)",
                "Acenaphthylene (C12H8)",
                "Fluoranthene (C16H10)",
                "Triphenylene (C18H12)",
                "Perylene (C20H12)",
                "Ovalene (C32H14)",
                "Pentacene (C22H14)"
            ],
            "mismatches": [
                "Xylene (C8H10)",
                "Mesitylene (C9H12)",
                "Biphenyl (C12H10)",
                "Indene (C9H8)",
                "Tetracene (C18H12)",
                "Fullerene (C60)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "4-tert-Octylphenol monoethoxylate",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "alkylbenzene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptacene",
                "hexacene",
                "m-xylene",
                "naphthalene",
                "o-xylene",
                "ovalene",
                "p-xylene",
                "pentacene",
                "pentaphene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "quaterrylene",
                "styrene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Styrene (C8H8)",
            "Ethylbenzene (C8H10)",
            "Cumene (C9H12)",
            "Biphenyl (C12H10)",
            "Indene (C9H8)",
            "Azulene (C10H8)",
            "Acenaphthene (C12H10)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Coronene (C24H12)",
            "Ovalene (C32H14)",
            "Hexahelicene (C26H16)",
            "Corannulene (C20H10)",
            "Sumanene (C21H12)",
            "Fullerene (C60)",
            "Fullerene (C70)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Styrene (C8H8)",
                "Ethylbenzene (C8H10)",
                "Cumene (C9H12)",
                "Azulene (C10H8)",
                "Fluorene (C13H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Triphenylene (C18H12)",
                "Coronene (C24H12)",
                "Ovalene (C32H14)",
                "Hexahelicene (C26H16)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Indene (C9H8)",
                "Acenaphthene (C12H10)",
                "Corannulene (C20H10)",
                "Sumanene (C21H12)",
                "Fullerene (C60)",
                "Fullerene (C70)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "4-tert-Octylphenol monoethoxylate",
                "8H-cyclopenta[a]acenaphthylene",
                "Dibenzo[a,h]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptahelicene",
                "hexacene",
                "hexahelicene",
                "hexaphenylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "octacene",
                "ovalene",
                "p-xylene",
                "pentacene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "quaterrylene",
                "styrene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Perylene (C20H12)",
            "Coronene (C24H12)",
            "Biphenyl (C12H10)",
            "Diphenylmethane (C13H12)",
            "Triphenylmethane (C19H16)",
            "Fluorene (C13H10)",
            "Phenanthroline (C12H8N2)",
            "Acridine (C13H9N)",
            "Carbazole (C12H9N)",
            "Dibenzofuran (C12H8O)",
            "Dibenzothiophene (C12H8S)",
            "Xanthene (C13H10O)",
            "Thioxanthene (C13H10S)",
            "Azulene (C10H8)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Triphenylene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Perylene (C20H12)",
                "Coronene (C24H12)",
                "Fluorene (C13H10)",
                "Azulene (C10H8)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Diphenylmethane (C13H12)",
                "Triphenylmethane (C19H16)",
                "Phenanthroline (C12H8N2)",
                "Acridine (C13H9N)",
                "Carbazole (C12H9N)",
                "Dibenzofuran (C12H8O)",
                "Dibenzothiophene (C12H8S)",
                "Xanthene (C13H10O)",
                "Thioxanthene (C13H10S)"
            ],
            "true_referents": [
                "1-(4-tert-butylphenoxy)-3-(1H-1,2,4-triazol-5-ylthio)-2-propanol",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "3-(3,4-dimethylanilino)-1-thiophen-2-yl-1-propanone",
                "5-phenyldodecane",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Dibenzo[h,rst]pentaphene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "bisanthene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "dibenzo[a,l]pyrene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C6H5CH3)",
            "Ethylbenzene (C6H5CH2CH3)",
            "Xylene (C6H4(CH3)2)",
            "Cumene (C6H5CH(CH3)2)",
            "Styrene (C6H5CH=CH2)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Fluorene (C13H10)",
            "Acenaphthene (C12H10)",
            "Fluoranthene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Dibenzo[a,h]anthracene (C22H14)",
            "Coronene (C24H12)",
            "Triphenylene (C18H12)",
            "Benzo[a]anthracene (C18H12)",
            "Benzo[b]fluoranthene (C20H12)",
            "Benzo[k]fluoranthene (C20H12)",
            "Indeno[1,2,3-cd]pyrene (C22H12)",
            "Benzo[ghi]perylene (C22H12)",
            "Dibenzo[a,i]pyrene (C24H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C6H5CH3)",
                "Ethylbenzene (C6H5CH2CH3)",
                "Xylene (C6H4(CH3)2)",
                "Cumene (C6H5CH(CH3)2)",
                "Styrene (C6H5CH=CH2)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Fluorene (C13H10)",
                "Fluoranthene (C16H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Dibenzo[a,h]anthracene (C22H14)",
                "Coronene (C24H12)",
                "Triphenylene (C18H12)",
                "Benzo[b]fluoranthene (C20H12)",
                "Benzo[ghi]perylene (C22H12)",
                "Dibenzo[a,i]pyrene (C24H14)"
            ],
            "mismatches": [
                "Acenaphthene (C12H10)",
                "Benzo[a]anthracene (C18H12)",
                "Benzo[k]fluoranthene (C20H12)",
                "Indeno[1,2,3-cd]pyrene (C22H12)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Phenanthrene",
            "Fluorene",
            "Chrysene",
            "Perylene",
            "Tetraphenylmethane",
            "Corannulene",
            "C60 fullerene",
            "Naphthalene",
            "Anthracene",
            "Tetraphenylporphyrin",
            "Pentacene",
            "Rubrene",
            "Coronene",
            "Triphenylene",
            "Cholanthrene",
            "Benzo[a]pyrene",
            "Benzo[c]phenanthrene",
            "Benzo[e]pyrene",
            "Benzo[g]chrysene",
            "Pyrene",
            "Indene",
            "Isoindene",
            "Indanthrene"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Phenanthrene",
                "Fluorene",
                "Chrysene",
                "Perylene",
                "Naphthalene",
                "Anthracene",
                "Pentacene",
                "Coronene",
                "Triphenylene",
                "Benzo[a]pyrene",
                "Benzo[c]phenanthrene",
                "Benzo[e]pyrene",
                "Pyrene"
            ],
            "mismatches": [
                "Tetraphenylmethane",
                "Corannulene",
                "C60 fullerene",
                "Tetraphenylporphyrin",
                "Rubrene",
                "Cholanthrene",
                "Benzo[g]chrysene",
                "Indene",
                "Isoindene",
                "Indanthrene"
            ],
            "true_referents": [
                "11,12-epoxy-3-methylcholanthrene",
                "1H-indene",
                "3-methylcholanthrene",
                "Anthanthrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Naphtho[2,3-e]pyrene",
                "[14]annulene",
                "aceanthrylene",
                "anthracene",
                "aromatic annulene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "chrysene",
                "circumcircumcoronene",
                "circumcoronene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptacene",
                "hexacene",
                "isoaminile",
                "naphthalene",
                "octaphenylene",
                "pentacene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "rubicene",
                "tetramethylbenzene",
                "tetranaphthylene",
                "tetraphene",
                "tetraphenylene",
                "triphenylene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "Xylene (C8H10)",
            "Cumene (C9H12)",
            "Naphthalene (C10H8)",
            "Azulene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Coronene (C24H12)",
            "Ovaleyne (C24H12)",
            "Cyclohexylbenzene (C12H16)",
            "Biphenyl (C12H10)",
            "Terphenyl (C18H14)",
            "Quarterphenyl (C24H18)",
            "Perylene (C20H12)",
            "Pentaphene (C22H14)",
            "Hexaphene (C24H16)",
            "Heptaphene (C26H18)",
            "Octaphene (C28H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Ethylbenzene (C8H10)",
                "Xylene (C8H10)",
                "Cumene (C9H12)",
                "Naphthalene (C10H8)",
                "Azulene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Triphenylene (C18H12)",
                "Coronene (C24H12)",
                "Perylene (C20H12)",
                "Pentaphene (C22H14)",
                "Hexaphene (C24H16)",
                "Heptaphene (C26H18)",
                "Octaphene (C28H20)"
            ],
            "mismatches": [
                "Ovaleyne (C24H12)",
                "Cyclohexylbenzene (C12H16)",
                "Biphenyl (C12H10)",
                "Terphenyl (C18H14)",
                "Quarterphenyl (C24H18)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Di-tert-butylbenzene",
                "1-Methyl-2-propylbenzene",
                "10-Hydroxybenzo[a]pyrene",
                "4-tert-Octylphenol",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[h,rst]pentaphene",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "heptaphene",
                "hexaphene",
                "hexaphenylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "octacene",
                "octaphene",
                "octaphenylene",
                "ovalene",
                "p-xylene",
                "pentaphene",
                "perylene",
                "phenanthrene",
                "propylbenzene",
                "pyranthrene",
                "pyrene",
                "tetramethylbenzene",
                "tetraphene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 18,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "arene",
        "domain": "chemistry",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]