[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Biphenyl (C12H10)",
            "Naphthylphenanthrene (C24H16)",
            "Acenaphthylene (C12H8)",
            "Acenaphthene (C14H10)",
            "Fluoranthenes (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Coronene (C24H12)",
            "Pyrene (C16H10)",
            "Perylene (C20H12)",
            "Acenaphthylene (C12H8)",
            "Dibenzo[a,h]anthracene (C22H14)",
            "Dibenzo[a,l]pyrene (C22H14)",
            "Benzo[k]fluoranthene (C20H12)",
            "Indeno[1,2,3-cd]pyrene (C22H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Fluorene (C13H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Acenaphthylene (C12H8)",
                "Benzo[a]pyrene (C20H12)",
                "Coronene (C24H12)",
                "Perylene (C20H12)",
                "Dibenzo[a,h]anthracene (C22H14)",
                "Dibenzo[a,l]pyrene (C22H14)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Naphthylphenanthrene (C24H16)",
                "Acenaphthene (C14H10)",
                "Fluoranthenes (C18H12)",
                "Benzo[k]fluoranthene (C20H12)",
                "Indeno[1,2,3-cd]pyrene (C22H14)"
            ],
            "true_referents": [
                "10-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "dibenzo[a,l]pyrene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "pentaphene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 15,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Anthracene (C14H10)",
            "Indene (C9H8)",
            "Fluorene (C13H10)",
            "Acenaphthene (C12H10)",
            "Acenaphthylene (C12H8)",
            "Phenanthrene (C14H10)",
            "Perylene (C20H12)",
            "Fulvene (C6H6)",
            "Cyclopentadiene (C5H6)",
            "Tetralin (C10H12)",
            "Diphenyl (C12H10)",
            "Biphenyl (C12H10)",
            "Carbazole (C12H9N)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Ethylbenzene (C8H10)",
                "Styrene (C8H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Acenaphthylene (C12H8)",
                "Fluorene (C13H10)",
                "Perylene (C20H12)"
            ],
            "mismatches": [
                "Indene (C9H8)",
                "Acenaphthene (C12H10)",
                "Fulvene (C6H6)",
                "Cyclopentadiene (C5H6)",
                "Tetralin (C10H12)",
                "Diphenyl (C12H10)",
                "Biphenyl (C12H10)",
                "Carbazole (C12H9N)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "5-phenyldodecane",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "cyclopenta[l]phenanthrene",
                "cyclopentadienide",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "propylbenzene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 14,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Biphenyl (C12H10)",
            "Styrene (C8H8)",
            "Indene (C9H8)",
            "Fluorene (C13H10)",
            "Azulene (C10H8)",
            "Acenaphthene (C12H10)",
            "Fluoranthen (C16H10)",
            "Benzo[a]pyrene (C20H12)",
            "Chrysene (C18H12)",
            "Coronene (C24H12)",
            "Perylene (C20H12)",
            "Phenylacetylene (C8H6)",
            "Benzofuran (C8H6O)",
            "Indole (C8H7N)",
            "Pyrrole (C4H5N)",
            "Thiophene (C4H4S)",
            "Furan (C4H4O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Styrene (C8H8)",
                "Fluorene (C13H10)",
                "Azulene (C10H8)",
                "Benzo[a]pyrene (C20H12)",
                "Chrysene (C18H12)",
                "Coronene (C24H12)",
                "Perylene (C20H12)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Indene (C9H8)",
                "Acenaphthene (C12H10)",
                "Fluoranthen (C16H10)",
                "Phenylacetylene (C8H6)",
                "Benzofuran (C8H6O)",
                "Indole (C8H7N)",
                "Pyrrole (C4H5N)",
                "Thiophene (C4H4S)",
                "Furan (C4H4O)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "3-(3,4-dimethylanilino)-1-thiophen-2-yl-1-propanone",
                "3-(3,5-dimethylanilino)-5-(2-furanyl)-1-cyclohex-2-enone",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptaphene",
                "hexaphenylene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (Bz)",
            "Naphthalene (Nap)",
            "Anthracene (Ant)",
            "Phenanthrene (Phen)",
            "Chrysene (Chr)",
            "Benzo[a]pyrene (BaP)",
            "Fluoranthene (Flu)",
            "Pyrene (Pyr)",
            "Indene (Ind)",
            "Styrene (Sty)",
            "Toluene (Tol)",
            "Ethylbenzene (EtBz)",
            "Xylene (Xyl)",
            "Cumene (Cum)",
            "Mesitylene (Mes)",
            "Pseudocumene (Pcum)",
            "Trimethylbenzene (TMB)",
            "Tetralin (Tet)",
            "Decahydronaphthalene (Dec)",
            "Acenaphthene (Ace)",
            "Fluorene (Fluo)",
            "Dibenzofuran (DBF)",
            "Dibenzothiophene (DBT)",
            "Benzo[c]phenanthrene (BCP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "chrysene",
                "benzo[a]pyrene",
                "fluoranthene",
                "pyrene",
                "styrene",
                "toluene",
                "ethylbenzene",
                "xylene",
                "cumene",
                "trimethylbenzene",
                "fluorene",
                "benzo[c]phenanthrene"
            ],
            "mismatches": [
                "indene",
                "mesitylene",
                "pseudocumene",
                "tetralin",
                "decahydronaphthalene",
                "acenaphthene",
                "dibenzofuran",
                "dibenzothiophene"
            ],
            "true_referents": [
                "1,2,3-trimethylbenzene",
                "1,2,4-trimethylbenzene",
                "1,3-Di-tert-butylbenzene",
                "1-vinylnaphthalene",
                "1H-indene",
                "Anthanthrene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[h,rst]pentaphene",
                "Erythrohydrobupropion",
                "Naphtho[2,3-e]pyrene",
                "Trametol",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "butylbenzene",
                "chrysene",
                "circumcircumpyrene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "helicene",
                "m-xylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "pentamethylbenzene",
                "phenalene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "trimethylbenzene",
                "vinylarene",
                "vinylnaphthalene",
                "xylene"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Biphenyl (C12H10)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Ethylbenzene (C8H10)",
            "Cumene (C9H12)",
            "Mesitylene (C9H12)",
            "Durene (C10H14)",
            "Acenaphthene (C12H10)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Benzo[e]pyrene (C20H12)",
            "Coronene (C24H12)",
            "Perylene (C20H12)",
            "Indene (C9H8)",
            "Indole (C8H7N)",
            "Quinoline (C9H7N)",
            "Isoquinoline (C9H7N)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "styrene",
                "ethylbenzene",
                "cumene",
                "durene",
                "fluorene",
                "pyrene",
                "chrysene",
                "benzo[a]pyrene",
                "benzo[e]pyrene",
                "coronene",
                "perylene"
            ],
            "mismatches": [
                "biphenyl",
                "xylene",
                "mesitylene",
                "acenaphthene",
                "indene",
                "indole",
                "quinoline",
                "isoquinoline"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2,3,5-trimethylhydroquinone",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "6-Nitrobenzo[a]pyrene",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "9-Hydroxybenzo[a]pyrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "alkylbenzene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "cumene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "durene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "m-xylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Xylene (C8H10)",
            "Styrene (C8H8)",
            "Biphenyl (C12H10)",
            "Fluorene (C13H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Coronene (C24H12)",
            "Acridine (C13H9N)",
            "Phenol (C6H5OH)",
            "Indole (C8H7N)",
            "Furan (C4H4O)",
            "Benzofuran (C8H6O)",
            "Benzothiophene (C8H6S)",
            "Naphthofluorescein (C20H14O2)",
            "Cymene (C10H12)",
            "Dibenzothiophene (C12H8S)",
            "Quinoline (C9H7N)",
            "Isoquinoline (C9H7N)",
            "Carbazole (C13H9N)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "xylene",
                "styrene",
                "fluorene",
                "pyrene",
                "chrysene",
                "triphenylene",
                "coronene"
            ],
            "mismatches": [
                "biphenyl",
                "acridine",
                "phenol",
                "indole",
                "furan",
                "benzofuran",
                "benzothiophene",
                "naphthofluorescein",
                "cymene",
                "dibenzothiophene",
                "quinoline",
                "isoquinoline",
                "carbazole"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2,3,5-trimethylhydroquinone",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "3-(3,4-dimethylanilino)-1-thiophen-2-yl-1-propanone",
                "3-(3,5-dimethylanilino)-5-(2-furanyl)-1-cyclohex-2-enone",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "6-Nitrobenzo[a]pyrene",
                "7,12-dimethyltetraphene",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "9-Hydroxybenzo[a]pyrene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[h,rst]pentaphene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-Cymen-8-ol",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 12,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Fluoranthene (C16H10)",
            "Perylene (C20H12)",
            "Coronene (C24H12)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Ethylbenzene (C8H10)",
            "Propylbenzene (C9H12)",
            "Butylbenzene (C10H14)",
            "Pentylbenzene (C11H16)",
            "Hexylbenzene (C12H18)",
            "Heptylbenzene (C13H20)",
            "Octylbenzene (C14H22)",
            "Nonane (C15H24)",
            "Decylbenzene (C16H26)",
            "Undecylbenzene (C17H28)",
            "Dodecylbenzene (C18H30)",
            "Tridecylbenzene (C19H32)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Fluoranthene (C16H10)",
                "Perylene (C20H12)",
                "Coronene (C24H12)",
                "Styrene (C8H8)",
                "Xylene (C8H10)",
                "Ethylbenzene (C8H10)",
                "Propylbenzene (C9H12)",
                "Butylbenzene (C10H14)"
            ],
            "mismatches": [
                "Pentylbenzene (C11H16)",
                "Hexylbenzene (C12H18)",
                "Heptylbenzene (C13H20)",
                "Octylbenzene (C14H22)",
                "Nonane (C15H24)",
                "Decylbenzene (C16H26)",
                "Undecylbenzene (C17H28)",
                "Dodecylbenzene (C18H30)",
                "Tridecylbenzene (C19H32)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Di-tert-butylbenzene",
                "1-Methyl-2-propylbenzene",
                "10-Hydroxybenzo[a]pyrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "alkylbenzene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "butylbenzene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "hexamethylbenzene",
                "methylbenzene",
                "naphthalene",
                "nonacene",
                "o-xylene",
                "p-xylene",
                "pentamethylbenzene",
                "perylene",
                "phenanthrene",
                "propylbenzene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Phenanthrene (C14H10)",
            "Anthracene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Fluorene (C13H10)",
            "Indene (C9H8)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Aniline (C6H7N)",
            "Naphthoquinone (C10H6O2)",
            "Naphthols (C10H8O)",
            "Phenol (C6H6O)",
            "Biphenyl (C12H10)",
            "Naphthalene derivatives",
            "Benzene derivatives",
            "Toluene derivatives",
            "Polycyclic aromatic hydrocarbons",
            "Aromatic compounds",
            "Aromatic hydrocarbons",
            "Organic compounds",
            "Chemical compounds"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "naphthalene",
                "phenanthrene",
                "anthracene",
                "pyrene",
                "chrysene",
                "fluorene",
                "styrene"
            ],
            "mismatches": [
                "indene",
                "xylene",
                "aniline",
                "naphthoquinone",
                "naphthols",
                "phenol",
                "biphenyl",
                "naphthalene derivatives",
                "benzene derivatives",
                "toluene derivatives",
                "polycyclic aromatic hydrocarbons",
                "aromatic compounds",
                "aromatic hydrocarbons",
                "organic compounds",
                "chemical compounds"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2-Isopropyl-N-methylaniline",
                "4,4a,5,6,7,8-Hexahydro-6-(p-hydroxyphenyl)-2(3H)-naphthalenone",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "8-Hydroxybenzo[a]pyrene",
                "Dibenzo[a,h]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "butylbenzene",
                "chrysene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "toluene",
                "toluene-d8",
                "xylene"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "o-Xylene (C8H10)",
            "m-Xylene (C8H10)",
            "p-Xylene (C8H10)",
            "Cumene (Isopropylbenzene, C9H12)",
            "Naphthalene (C10H8)",
            "1-Methylnaphthalene (C11H10)",
            "2-Methylnaphthalene (C11H10)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Fluorene (C13H10)",
            "Biphenyl (C12H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Styrene (C8H8)",
            "Mesitylene (C9H12)",
            "Hexamethylbenzene (C12H18)",
            "Terphenyl (C18H14)",
            "Triphenylene (C18H12)",
            "Phenylacetylene (C8H6)",
            "Hexa-peri-hexabenzocoronene (C54H18)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "benzene",
                "toluene",
                "ethylbenzene",
                "o-xylene",
                "m-xylene",
                "p-xylene",
                "cumene",
                "naphthalene",
                "anthracene",
                "phenanthrene",
                "fluorene",
                "pyrene",
                "chrysene",
                "benzo[a]pyrene",
                "styrene",
                "hexamethylbenzene",
                "triphenylene"
            ],
            "mismatches": [
                "1-Methylnaphthalene (C11H10)",
                "2-Methylnaphthalene (C11H10)",
                "Biphenyl (C12H10)",
                "Mesitylene (C9H12)",
                "Terphenyl (C18H14)",
                "Phenylacetylene (C8H6)",
                "Hexa-peri-hexabenzocoronene (C54H18)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Di-tert-butylbenzene",
                "1-Isopropyl-2-methylbenzene",
                "1-vinylnaphthalene",
                "10-Hydroxybenzo[a]pyrene",
                "2-vinylnaphthalene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "alkylbenzene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "hexacyclopentacoronene",
                "hexamethylbenzene",
                "hexaphenylene",
                "isobutylbenzene",
                "m-xylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "vinylnaphthalene",
                "xylene"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Styrene (C8H8)",
            "Xylene (C8H10)",
            "Biphenyl (C12H10)",
            "Indene (C9H8)",
            "Fluorene (C13H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Azulene (C10H8)",
            "Acenaphthylene (C12H8)",
            "Fluoranthene (C16H10)",
            "Triphenylene (C18H12)",
            "Coronene (C24H12)",
            "Perylene (C20H12)",
            "Indole (C8H7N)",
            "Quinoline (C9H7N)",
            "Isoquinoline (C9H7N)",
            "Furan (C4H4O)",
            "Thiophene (C4H4S)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Styrene (C8H8)",
                "Xylene (C8H10)",
                "Fluorene (C13H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Azulene (C10H8)",
                "Acenaphthylene (C12H8)",
                "Fluoranthene (C16H10)",
                "Triphenylene (C18H12)",
                "Coronene (C24H12)",
                "Perylene (C20H12)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Indene (C9H8)",
                "Indole (C8H7N)",
                "Quinoline (C9H7N)",
                "Isoquinoline (C9H7N)",
                "Furan (C4H4O)",
                "Thiophene (C4H4S)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "2,3,5-trimethylhydroquinone",
                "2-Isopropyl-N-methylaniline",
                "2-Isopropylaniline",
                "3-(3,4-dimethylanilino)-1-thiophen-2-yl-1-propanone",
                "3-(3,5-dimethylanilino)-5-(2-furanyl)-1-cyclohex-2-enone",
                "6-Nitrobenzo[a]pyrene",
                "7-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "9-Hydroxybenzo[a]pyrene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "azulene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "heptaphene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "o-Xylene (C8H10)",
            "m-Xylene (C8H10)",
            "p-Xylene (C8H10)",
            "Styrene (C8H8)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Coronene (C24H12)",
            "Biphenyl (C12H10)",
            "Fluorene (C13H10)",
            "Acenaphthene (C12H10)",
            "Acenaphthylene (C12H8)",
            "Fluoranthene (C16H10)",
            "Benz[a]anthracene (C18H12)",
            "Triphenylene (C18H12)",
            "Benzo[b]fluoranthene (C20H12)",
            "Benzo[k]fluoranthene (C20H12)",
            "Indeno[1,2,3-cd]pyrene (C22H12)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Ethylbenzene (C8H10)",
                "o-Xylene (C8H10)",
                "m-Xylene (C8H10)",
                "p-Xylene (C8H10)",
                "Styrene (C8H8)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Coronene (C24H12)",
                "Acenaphthylene (C12H8)",
                "Fluoranthene (C16H10)",
                "Triphenylene (C18H12)",
                "Benzo[b]fluoranthene (C20H12)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Fluorene (C13H10)",
                "Acenaphthene (C12H10)",
                "Benz[a]anthracene (C18H12)",
                "Benzo[k]fluoranthene (C20H12)",
                "Indeno[1,2,3-cd]pyrene (C22H12)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "10-Hydroxybenzo[a]pyrene",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "m-xylene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Perylene (C20H12)",
            "Pentacene (C22H14)",
            "Coronene (C24H12)",
            "Ovalene (C32H14)",
            "Circumcoronene (C54H18)",
            "Hexabenzo[bc,ef,kl,no,qr,uv]coronene (C42H18)",
            "Kekulene (C48H24)",
            "Dicyclopentapyrene (C26H16)",
            "Dibenzo[a,l]pyrene (C24H14)",
            "Benzo[a]anthracene (C18H12)",
            "Benzo[b]fluoranthene (C20H12)",
            "Benzo[k]fluoranthene (C20H12)",
            "Indeno[1,2,3-cd]pyrene (C22H12)",
            "Dibenzo[a,h]anthracene (C22H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Triphenylene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Perylene (C20H12)",
                "Pentacene (C22H14)",
                "Coronene (C24H12)",
                "Ovalene (C32H14)",
                "Circumcoronene (C54H18)",
                "Kekulene (C48H24)",
                "Dibenzo[a,l]pyrene (C24H14)",
                "Benzo[b]fluoranthene (C20H12)",
                "Dibenzo[a,h]anthracene (C22H14)"
            ],
            "mismatches": [
                "Hexabenzo[bc,ef,kl,no,qr,uv]coronene (C42H18)",
                "Dicyclopentapyrene (C26H16)",
                "Benzo[a]anthracene (C18H12)",
                "Benzo[k]fluoranthene (C20H12)",
                "Indeno[1,2,3-cd]pyrene (C22H12)"
            ],
            "true_referents": [
                "10-Hydroxybenzo[a]pyrene",
                "4-tert-Octylphenol monoethoxylate",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Dibenzo[e,l]pyrene",
                "Naphtho[2,3-e]pyrene",
                "acene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "circumcircumcoronene",
                "circumcoronene",
                "coronene",
                "cyclopenta[l]phenanthrene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "dibenzo[a,l]pyrene",
                "ethylbenzene",
                "fluoranthene",
                "hexacene",
                "hexacyclopentacoronene",
                "kekulene",
                "naphthalene",
                "o-xylene",
                "ovalene",
                "p-xylene",
                "pentacene",
                "pentaphene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "quaterrylene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C6H5CH3)",
            "Xylene (C6H4(CH3)2)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Fluorene (C13H10)",
            "Acenaphthene (C12H10)",
            "Acenaphthylene (C12H8)",
            "Benzo[a]pyrene (C20H12)",
            "Benzo[b]fluoranthene (C20H12)",
            "Benzo[k]fluoranthene (C20H12)",
            "Dibenz[a,h]anthracene (C22H14)",
            "Indeno[1,2,3-cd]pyrene (C22H12)",
            "Chrysene (C18H12)",
            "Benzo[a]anthracene (C18H12)",
            "Fluoranthene (C16H10)",
            "Triphenylene (C18H12)",
            "Perylene (C20H12)",
            "Coronene (C24H12)",
            "Ovalene (C32H14)",
            "Benzo[ghi]perylene (C22H12)",
            "Dibenzo[a,l]pyrene (C24H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C6H5CH3)",
                "Xylene (C6H4(CH3)2)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Fluorene (C13H10)",
                "Acenaphthylene (C12H8)",
                "Benzo[a]pyrene (C20H12)",
                "Benzo[b]fluoranthene (C20H12)",
                "Dibenz[a,h]anthracene (C22H14)",
                "Chrysene (C18H12)",
                "Fluoranthene (C16H10)",
                "Triphenylene (C18H12)",
                "Perylene (C20H12)",
                "Coronene (C24H12)",
                "Ovalene (C32H14)",
                "Benzo[ghi]perylene (C22H12)",
                "Dibenzo[a,l]pyrene (C24H14)"
            ],
            "mismatches": [
                "Acenaphthene (C12H10)",
                "Benzo[k]fluoranthene (C20H12)",
                "Indeno[1,2,3-cd]pyrene (C22H12)",
                "Benzo[a]anthracene (C18H12)"
            ],
            "true_referents": [
                "10-Hydroxybenzo[a]pyrene",
                "4-tert-Octylphenol monoethoxylate",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,e]pyrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "dibenzo[a,l]pyrene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "ovalene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "quaterrylene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C6H5CH3)",
            "Xylene (C6H4(CH3)2)",
            "Ethylbenzene (C6H5CH2CH3)",
            "Styrene (C6H5CH=CH2)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Fluorene (C13H8)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Perylene (C20H12)",
            "Coronene (C24H12)",
            "Fluoranthene (C16H10)",
            "Triphenylene (C18H12)",
            "Benzo[a]pyrene (C20H12)",
            "Benzo[e]pyrene (C20H12)",
            "Benzo[k]fluoranthene (C22H12)",
            "Indene (C8H6)",
            "Carbazole (C12H9N)",
            "Acenaphthene (C12H8)",
            "Acenaphthylene (C12H8)",
            "Phenol (C6H5OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C6H5CH3)",
                "Xylene (C6H4(CH3)2)",
                "Ethylbenzene (C6H5CH2CH3)",
                "Styrene (C6H5CH=CH2)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Fluorene (C13H8)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Perylene (C20H12)",
                "Coronene (C24H12)",
                "Fluoranthene (C16H10)",
                "Triphenylene (C18H12)",
                "Benzo[a]pyrene (C20H12)",
                "Benzo[e]pyrene (C20H12)",
                "Acenaphthylene (C12H8)"
            ],
            "mismatches": [
                "Benzo[k]fluoranthene (C22H12)",
                "Indene (C8H6)",
                "Carbazole (C12H9N)",
                "Acenaphthene (C12H8)",
                "Phenol (C6H5OH)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1-(4-hydroxyphenyl)propane-1,2-diol",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "1H-indene",
                "4-(3,5-Diphenylcyclohexyl)phenol",
                "4-Propylphenol",
                "8H-cyclopenta[a]acenaphthylene",
                "Benzo[ghi]perylene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[a]pyrene",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "benzo[e]pyrene",
                "biphenylene",
                "chrysene",
                "coronene",
                "dibenz[a,h]anthracene",
                "dibenzo[a,d][7]annulene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "perylene",
                "phenanthrene",
                "pyranthrene",
                "pyrene",
                "styrene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 18,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "arene",
        "domain": "chemistry",
        "response": [
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Ethylbenzene (C8H10)",
            "Xylene (C8H10)",
            "Cumene (C9H12)",
            "Naphthalene (C10H8)",
            "Anthracene (C14H10)",
            "Phenanthrene (C14H10)",
            "Pyrene (C16H10)",
            "Chrysene (C18H12)",
            "Triphenylene (C18H12)",
            "Biphenyl (C12H10)",
            "Terphenyl (C18H14)",
            "Fluorene (C13H10)",
            "Fluoranthene (C16H10)",
            "Acenaphthene (C12H10)",
            "Acenaphthylene (C12H8)",
            "Diphenylmethane (C13H12)",
            "Diphenylethane (C14H14)",
            "Stilbene (C14H12)",
            "Benzanthracene (C18H12)",
            "Cholanthrene (C20H16)",
            "Triphenylmethane (C19H16)",
            "Dibenzo(a,h)anthracene (C22H14)",
            "Dibenzo(def,mno)chrysene (C24H14)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Ethylbenzene (C8H10)",
                "Xylene (C8H10)",
                "Cumene (C9H12)",
                "Naphthalene (C10H8)",
                "Anthracene (C14H10)",
                "Phenanthrene (C14H10)",
                "Pyrene (C16H10)",
                "Chrysene (C18H12)",
                "Triphenylene (C18H12)",
                "Fluorene (C13H10)",
                "Fluoranthene (C16H10)",
                "Acenaphthylene (C12H8)"
            ],
            "mismatches": [
                "Biphenyl (C12H10)",
                "Terphenyl (C18H14)",
                "Acenaphthene (C12H10)",
                "Diphenylmethane (C13H12)",
                "Diphenylethane (C14H14)",
                "Stilbene (C14H12)",
                "Benzanthracene (C18H12)",
                "Cholanthrene (C20H16)",
                "Triphenylmethane (C19H16)",
                "Dibenzo(a,h)anthracene (C22H14)",
                "Dibenzo(def,mno)chrysene (C24H14)"
            ],
            "true_referents": [
                "(1-Ethylpropyl)benzene",
                "1,3-Di-tert-butylbenzene",
                "1-Methyl-2-propylbenzene",
                "1-ethyl-2,4-dimethyl-Benzene",
                "10-Hydroxybenzo[a]pyrene",
                "11,12-epoxy-3-methylcholanthrene",
                "3-methylcholanthrene",
                "5-phenyldodecane",
                "8-Hydroxybenzo[a]pyrene",
                "8H-cyclopenta[a]acenaphthylene",
                "Anthanthrene",
                "Dibenzo[a,h]pyrene",
                "Dibenzo[a,i]pyrene",
                "Naphtho[2,3-e]pyrene",
                "aceanthrylene",
                "acenaphthylene",
                "acene",
                "acephenanthrylene",
                "anthracene",
                "benzene",
                "benzene-d6",
                "benzo[b]fluoranthene",
                "benzo[c]phenanthrene",
                "biphenylene",
                "chrysene",
                "cumene",
                "dibenz[a,h]anthracene",
                "ethylbenzene",
                "fluoranthene",
                "fluorene",
                "methylbenzene",
                "naphthalene",
                "o-xylene",
                "p-xylene",
                "phenanthrene",
                "propylbenzene",
                "pyranthrene",
                "pyrene",
                "tetramethylbenzene",
                "tetraphenylene",
                "toluene",
                "toluene-d8",
                "triphenylene",
                "xylene"
            ],
            "TP": 14,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "arene",
        "domain": "chemistry",
        "response": "Sorry - this model is unable to respond to this request.",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]