[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO)",
            "Formic acid (HCOOH)",
            "Glycerol (C3H8O3)",
            "Methanol (CH3OH)",
            "Nitromethane (CH3NO2)",
            "Propylene glycol (C3H8O2)",
            "Ammonia (NH3)",
            "Sulfuric acid (H2SO4)",
            "Phenol (C6H5OH)",
            "Urea ((NH2)2CO)",
            "Dimethylformamide (DMF)",
            "Hydrogen peroxide (H2O2)",
            "Isopropanol (C3H8O)",
            "Acetic acid (CH3COOH)",
            "Boric acid (H3BO3)",
            "Dichloromethane (CH2Cl2)",
            "Formamide (HCONH2)",
            "Sodium hydroxide (NaOH)",
            "Hydrochloric acid (HCl)",
            "Carbonic acid (H2CO3)",
            "Lactic acid (C3H6O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO)",
                "Formic acid (HCOOH)",
                "Methanol (CH3OH)",
                "Nitromethane (CH3NO2)",
                "Acetic acid (CH3COOH)",
                "Dichloromethane (CH2Cl2)",
                "Formamide (HCONH2)"
            ],
            "mismatches": [
                "Glycerol (C3H8O3)",
                "Propylene glycol (C3H8O2)",
                "Ammonia (NH3)",
                "Sulfuric acid (H2SO4)",
                "Phenol (C6H5OH)",
                "Urea ((NH2)2CO)",
                "Dimethylformamide (DMF)",
                "Hydrogen peroxide (H2O2)",
                "Isopropanol (C3H8O)",
                "Boric acid (H3BO3)",
                "Sodium hydroxide (NaOH)",
                "Hydrochloric acid (HCl)",
                "Carbonic acid (H2CO3)",
                "Lactic acid (C3H6O3)"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(2,6-dihydroxyphenyl)acetic acid",
                "(2-hydroxyphenyl)acetic acid",
                "(3,5-dihydroxyphenyl)acetic acid",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "Methyl (xi)-3-nonenoate",
                "N,N-dimethylformamide",
                "N-Boc-D-4-HO-3,5-Cl2-PhGly-D-4-HO-3,5-I2-PhGly-OMe",
                "N-benzylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "aminoethyl nitrate",
                "chloroacetic acid",
                "chlorophyll a",
                "cyanoacetic acid",
                "dichloromethane",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl (2R)-lactate",
                "ethyl (2S)-lactate",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 3-aminopyrazine-2-carboxylate",
                "nitric acid",
                "nitromethane",
                "propyl acetate",
                "sulfoacetic acid",
                "triethyl borate",
                "water"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone (C3H6O)",
            "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
            "Acetonitrile (CH3CN)",
            "Dimethylformamide (DMF, (CH3)2NC(O)H)",
            "Formamide (HCONH2)",
            "Ethylene glycol (C2H6O2)",
            "Glycerol (C3H8O3)",
            "Propylene glycol (C3H8O2)",
            "Tetrahydrofuran (THF, C4H8O)",
            "Dichloromethane (DCM, CH2Cl2)",
            "Pyridine (C5H5N)",
            "Urea (CH4N2O)",
            "Hydrogen peroxide (H2O2)",
            "Ammonia (NH3)",
            "Acetic acid (CH3COOH)",
            "Ethylene carbonate (C3H4O3)",
            "Propylene carbonate (C4H6O3)",
            "Nitromethane (CH3NO2)",
            "Methyl tert-butyl ether (MTBE, C5H12O)",
            "Dimethyl sulfoxide-d6 (DMSO-d6, (CD3)2SO)",
            "N,N-Dimethylformamide-d7 (DMF-d7, (CD3)2NC(O)D)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetic acid",
                "acetonitrile",
                "dichloromethane",
                "dimethyl sulfoxide",
                "formamide",
                "methanol",
                "nitromethane",
                "water"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Acetone (C3H6O)",
                "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
                "Acetonitrile (CH3CN)",
                "Dimethylformamide (DMF, (CH3)2NC(O)H)",
                "Formamide (HCONH2)",
                "Ethylene glycol (C2H6O2)",
                "Glycerol (C3H8O3)",
                "Propylene glycol (C3H8O2)",
                "Tetrahydrofuran (THF, C4H8O)",
                "Pyridine (C5H5N)",
                "Urea (CH4N2O)",
                "Hydrogen peroxide (H2O2)",
                "Ammonia (NH3)",
                "Ethylene carbonate (C3H4O3)",
                "Propylene carbonate (C4H6O3)",
                "Methyl tert-butyl ether (MTBE, C5H12O)",
                "Dimethyl sulfoxide-d6 (DMSO-d6, (CD3)2SO)",
                "N,N-Dimethylformamide-d7 (DMF-d7, (CD3)2NC(O)D)"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(-)-ureidoglycolic acid",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "Methyl (xi)-3-nonenoate",
                "N(3)-(4-methoxyfumaroyl)-2,3-diaminopropionic acid",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "chlorophyll a",
                "dichloromethane",
                "diethyl 2-sulfanylbutanedioate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl (R)-3-hydroxybutanoate",
                "ethyl 2-methylbutyrate",
                "ethyl 3-hexenoate",
                "ethyl 3-oxohexanoate",
                "ethyl acetate",
                "ethyl cyclohexanecarboxylate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-phenylpropanoate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "sulfoacetic acid",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 8,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Isopropanol (IPA)",
            "Acetone",
            "Dimethyl Sulfoxide (DMSO)",
            "N,N-Dimethylformamide (DMF)",
            "Acetic Acid",
            "Formic Acid",
            "Nitromethane",
            "Propylene Carbonate",
            "Ethylene Glycol",
            "Glycerol",
            "2-Propanol",
            "Tetrahydrofuran (THF)",
            "Dimethylacetamide (DMA)",
            "Acetonitrile (MeCN)",
            "1-Butanol",
            "Ethylene Glycol Monomethyl Ether",
            "Ethylene Glycol Monoethyl Ether",
            "Ethylene Glycol Monobutyl Ether",
            "Acetylacetone",
            "Ethanolamine",
            "Dimethylformamide (DMF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Dimethyl Sulfoxide (DMSO)",
                "N,N-Dimethylformamide (DMF)",
                "Acetic Acid",
                "Formic Acid",
                "Nitromethane",
                "Acetonitrile (MeCN)",
                "1-Butanol",
                "Ethylene Glycol Monoethyl Ether"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (MeOH)",
                "Ethanol (EtOH)",
                "Isopropanol (IPA)",
                "Propylene Carbonate",
                "Ethylene Glycol",
                "Glycerol",
                "2-Propanol",
                "Tetrahydrofuran (THF)",
                "Dimethylacetamide (DMA)",
                "Ethylene Glycol Monomethyl Ether",
                "Ethylene Glycol Monobutyl Ether",
                "Acetylacetone",
                "Ethanolamine",
                "Dimethylformamide (DMF)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(S)-propane-1,2-diol",
                "3-methyl-2-butanol",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-furfurylformamide",
                "acetate ester",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "benzyl acetate",
                "butan-1-ol",
                "butyl butanoate",
                "diethylene glycol monoethyl ether",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethanolammonium nitrate",
                "ethyl acetate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "ethylammonium nitrate",
                "formic acid",
                "glycol ether",
                "hydroxyacetone",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl ester",
                "methyl formate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "propane-1,2-diol",
                "propyl acetate",
                "propyl decanoate",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H\u2082O)",
            "Formamide (HCONH\u2082)",
            "Methanol (CH\u2083OH)",
            "Ethanol (C\u2082H\u2085OH)",
            "Propanol (C\u2083H\u2087OH)",
            "Isopropanol (C\u2083H\u2088O)",
            "Acetone (CH\u2083COCH\u2083)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Acetonitrile (CH\u2083CN)",
            "N,N-Dimethylacetamide (DMAc)",
            "N-Methylpyrrolidone (NMP)",
            "Propylene Carbonate (C\u2084H\u2086O\u2083)",
            "Glycerol (C\u2083H\u2088O\u2083)",
            "Ethylene Glycol (C\u2082H\u2086O\u2082)",
            "Tetrahydrofuran (THF)",
            "Dioxane (C\u2084H\u2088O\u2082)",
            "Pyridine (C\u2085H\u2085N)",
            "Sulfolane (C\u2084H\u2081\u2080SO\u2082)",
            "1-Butanol (C\u2084H\u2089OH)",
            "2-Butanol (C\u2084H\u2081\u2080O)",
            "tert-Butanol (C\u2084H\u2081\u2080O)",
            "Formic Acid (HCOOH)",
            "Acetic Acid (CH\u2083COOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water",
                "formamide",
                "methanol",
                "ethanol",
                "acetone",
                "dimethyl sulfoxide",
                "acetonitrile",
                "N-methylpyrrolidin-2-one",
                "sulfolane",
                "formic acid",
                "acetic acid"
            ],
            "mismatches": [
                "propanol",
                "isopropanol",
                "dimethylformamide",
                "N,N-dimethylacetamide",
                "propylene carbonate",
                "glycerol",
                "ethylene glycol",
                "tetrahydrofuran",
                "dioxane",
                "pyridine",
                "1-butanol",
                "2-butanol",
                "tert-butanol"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1-hexadecyl-2-acetyl-3-decanoyl-sn-glycerol",
                "2-ethoxyethanol",
                "3-methyl-2-butanol",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-ethylphenylacetamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "N-phenylacetamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butane-1,4-diol",
                "butyl butanoate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl (R)-3-hydroxybutanoate",
                "ethyl acetate",
                "ethyl glyoxylate",
                "formamide",
                "formic acid",
                "glycol ether",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-(4-hydroxyphenyl)propionate",
                "methyl 5-(hydroxymethyl)pyrrolidine-3-carboxylate",
                "phenylacetonitrile",
                "piperidine",
                "propane-1,3-diol",
                "propyl acetate",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Propanol (C3H7OH)",
            "Isopropanol (C3H8O)",
            "Acetone (C3H6O)",
            "Acetonitrile (CH3CN)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Hydrogen peroxide (H2O2)",
            "Glycerol (C3H8O3)",
            "Ethylene glycol (C2H6O2)",
            "Propylene glycol (C3H8O2)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Pyridine (C5H5N)",
            "Tetrahydrofuran (THF)",
            "Methyl ethyl ketone (MEK)",
            "N,N-Dimethylacetamide (DMAc)",
            "Nitromethane (CH3NO2)",
            "1,4-Dioxane (C4H8O2)",
            "Triethylamine (TEA)",
            "Butanol (C4H9OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Acetonitrile",
                "Dimethyl sulfoxide",
                "Formic acid",
                "Acetic acid",
                "Nitromethane",
                "N-Methyl-2-pyrrolidone"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Propanol (C3H7OH)",
                "Isopropanol (C3H8O)",
                "Dimethylformamide (DMF)",
                "Hydrogen peroxide (H2O2)",
                "Glycerol (C3H8O3)",
                "Ethylene glycol (C2H6O2)",
                "Propylene glycol (C3H8O2)",
                "Pyridine (C5H5N)",
                "Tetrahydrofuran (THF)",
                "Methyl ethyl ketone (MEK)",
                "N,N-Dimethylacetamide (DMAc)",
                "1,4-Dioxane (C4H8O2)",
                "Triethylamine (TEA)",
                "Butanol (C4H9OH)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "3-methyl-2-butanol",
                "Methyl (xi)-3-nonenoate",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N-ethylphenylacetamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "N-phenylacetamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "butane-1,4-diol",
                "butyl butanoate",
                "chlorophyll a",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 11-methyldodec-2-enoate",
                "ethyl 2-methylbutyrate",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "propyl decanoate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "triethanolammonium nitrate",
                "triethyl phosphate",
                "water"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Methanol (CH3OH)",
            "Acetic acid (CH3COOH)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetonitrile (CH3CN)",
            "Glycerol (C3H8O3)",
            "Formamide (HCONH2)",
            "Propylene glycol (C3H8O2)",
            "Sodium chloride solution (NaCl in H2O)",
            "Ammonium hydroxide (NH4OH)",
            "Isopropanol (C3H8O)",
            "Hydrochloric acid (HCl in H2O)",
            "Benzyl alcohol (C6H5CH2OH)",
            "Tetrahydrofuran (THF)",
            "Nitromethane (CH3NO2)",
            "Pyridine (C5H5N)",
            "Chloroform (CHCl3)",
            "Sodium bicarbonate solution (NaHCO3 in H2O)",
            "Urea (NH2CONH2)",
            "Ethylene glycol (C2H6O2)",
            "Hexamethylphosphoramide (HMPA)",
            "Thionyl chloride (SOCl2 in solvent)",
            "Dimethylformamide (DMF)",
            "Sodium acetate solution (CH3COONa in H2O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methanol",
                "Acetic acid",
                "Dimethyl sulfoxide",
                "Acetonitrile",
                "Formamide",
                "Nitromethane"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Glycerol (C3H8O3)",
                "Propylene glycol (C3H8O2)",
                "Sodium chloride solution (NaCl in H2O)",
                "Ammonium hydroxide (NH4OH)",
                "Isopropanol (C3H8O)",
                "Hydrochloric acid (HCl in H2O)",
                "Benzyl alcohol (C6H5CH2OH)",
                "Tetrahydrofuran (THF)",
                "Pyridine (C5H5N)",
                "Chloroform (CHCl3)",
                "Sodium bicarbonate solution (NaHCO3 in H2O)",
                "Urea (NH2CONH2)",
                "Ethylene glycol (C2H6O2)",
                "Hexamethylphosphoramide (HMPA)",
                "Thionyl chloride (SOCl2 in solvent)",
                "Dimethylformamide (DMF)",
                "Sodium acetate solution (CH3COONa in H2O)"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(-)-ureidoglycolic acid",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(S)-hydratropic acid",
                "1,1,1-trichloroethane",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-Acetamidoethylphosphonate",
                "2-ethoxyethanol",
                "2-thienylacetic acid",
                "2-thienylacetonitrile",
                "4,5,6,7-tetrachloro-2-benzofuran-1(3H)-one",
                "4-acetoxybenzyl alcohol",
                "CMP-5'-phosphonoformic acid",
                "Methyl (xi)-3-nonenoate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetonitrile",
                "aminoethyl nitrate",
                "ammonium formate",
                "dichloromethane",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-phenyl-2-\\{[4-(toluene-4-sulfonyl)thiomorpholine-3-carbonyl]amino\\}propionate",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl p-coumarate acetate",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 6,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Isopropanol (IPA)",
            "Acetone (C3H6O)",
            "Acetonitrile (CH3CN)",
            "Dimethyl Sulfoxide (DMSO)",
            "Formamide (NH2CHO)",
            "Glycerol (C3H8O3)",
            "Hydrogen Fluoride (HF)",
            "Nitromethane (CH3NO2)",
            "Propylene Carbonate (C4H6O3)",
            "Sulfolane (C4H8O2S)",
            "Tetrahydrofuran (THF)",
            "Dimethylformamide (DMF)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Ethylene Glycol (C2H6O2)",
            "1,4-Dioxane (C4H8O2)",
            "Diethyl Ether (C4H10O)",
            "Triethylamine (C6H15N)",
            "Pyridine (C5H5N)",
            "Ammonia (NH3)",
            "Hydrogen Peroxide (H2O2)",
            "Methyl Ethyl Ketone (C4H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetonitrile (CH3CN)",
                "Dimethyl Sulfoxide (DMSO)",
                "Formamide (NH2CHO)",
                "Nitromethane (CH3NO2)",
                "Sulfolane (C4H8O2S)"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (MeOH)",
                "Ethanol (EtOH)",
                "Isopropanol (IPA)",
                "Acetone (C3H6O)",
                "Glycerol (C3H8O3)",
                "Hydrogen Fluoride (HF)",
                "Propylene Carbonate (C4H6O3)",
                "Tetrahydrofuran (THF)",
                "Dimethylformamide (DMF)",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Ethylene Glycol (C2H6O2)",
                "1,4-Dioxane (C4H8O2)",
                "Diethyl Ether (C4H10O)",
                "Triethylamine (C6H15N)",
                "Pyridine (C5H5N)",
                "Ammonia (NH3)",
                "Hydrogen Peroxide (H2O2)",
                "Methyl Ethyl Ketone (C4H8O)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "4-Me-6E,8E-16:2 methyl ester",
                "8(9)-EET methyl ester",
                "Isopropyl propionate",
                "Methyl (xi)-3-nonenoate",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "chlorophyll a",
                "diethylene glycol monoethyl ether",
                "difluoroacetic acid",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl (6Z,9Z,12Z)-hexadeca-6,9,12,15-tetraenoate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "fluoroacetic acid",
                "formamide",
                "formic acid",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-phenylpropanoate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "trifluoroacetic acid",
                "water"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water",
            "Methanol",
            "Ethanol",
            "Isopropanol",
            "Acetone",
            "Acetonitrile",
            "DMSO (Dimethyl sulfoxide)",
            "DMF (Dimethylformamide)",
            "Ethyl acetate",
            "Methyl ethyl ketone",
            "Acetic acid",
            "Formic acid",
            "Propionic acid",
            "Butyric acid",
            "Glycerol",
            "Pyridine",
            "Tetrahydrofuran (THF)",
            "Diethyl ether",
            "Chloroform",
            "Carbon tetrachloride",
            "Dichloromethane",
            "Acetonitrile",
            "N,N-Dimethylacetamide (DMAc)",
            "N-Methyl-2-pyrrolidone (NMP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water",
                "Methanol",
                "Ethanol",
                "Acetone",
                "Acetonitrile",
                "Dimethyl sulfoxide",
                "Ethyl acetate",
                "Acetic acid",
                "Formic acid",
                "Dichloromethane",
                "N-methylpyrrolidin-2-one"
            ],
            "mismatches": [
                "Isopropanol",
                "DMF (Dimethylformamide)",
                "Methyl ethyl ketone",
                "Propionic acid",
                "Butyric acid",
                "Glycerol",
                "Pyridine",
                "Tetrahydrofuran (THF)",
                "Diethyl ether",
                "Chloroform",
                "Carbon tetrachloride",
                "N,N-Dimethylacetamide (DMAc)"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "5-formamidopyrimidine",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-ethylphenylacetamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "N-phenylacetamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acetonitrile oxide",
                "benzyl acetate",
                "butyl acetate",
                "butyl butanoate",
                "butyl propionate",
                "dichloromethane",
                "diethylene glycol monoethyl ether",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 2-methylbutyrate",
                "ethyl acetate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl hexadecanoate",
                "flavunoidine(2+)",
                "formic acid",
                "isoflurane",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl ester",
                "methyl formate",
                "phenylacetonitrile",
                "piperidine",
                "propyl butyrate",
                "propyl propionate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "trichloroacetic acid",
                "water"
            ],
            "TP": 11,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H\u2082O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Acetonitrile (MeCN)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Acetone (Me\u2082CO)",
            "Formamide (HCONH\u2082)",
            "Glycerol",
            "Propylene glycol",
            "Ethylene glycol",
            "N-Methyl-2-pyrrolidone (NMP)",
            "N,N-Dimethylacetamide (DMA)",
            "Pyridine",
            "Urea",
            "Sulfolane",
            "Triethylene glycol",
            "Hexafluoroisopropanol (HFIPA)",
            "1,4-Dioxane",
            "Sorbitol",
            "Trihydroxyethane",
            "Formic acid",
            "Acetic acid (AcOH)",
            "Butanol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water",
                "methanol",
                "ethanol",
                "acetonitrile",
                "dimethyl sulfoxide",
                "formamide",
                "sulfolane",
                "formic acid",
                "acetic acid"
            ],
            "mismatches": [
                "dimethylformamide",
                "acetone",
                "glycerol",
                "propylene glycol",
                "ethylene glycol",
                "n-methyl-2-pyrrolidone",
                "n,n-dimethylacetamide",
                "pyridine",
                "urea",
                "triethylene glycol",
                "hexafluoroisopropanol",
                "1,4-dioxane",
                "sorbitol",
                "trihydroxyethane",
                "butanol"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(-)-ureidoglycolic acid",
                "(2-methylphenyl)acetonitrile",
                "1,1,1,3,3,3-hexafluoropropan-2-ol",
                "1,4-diacetoxybutane",
                "3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,10-heptadecafluoro-1-decanol",
                "3-methyl-2-butanol",
                "5-formamidopyrimidine",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butane-1,4-diol",
                "butyl butanoate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-hydroxybutyrate",
                "ethyl 3-hydroxyhexanoate",
                "ethyl 3-hydroxytridecanoate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "ethyl sorbate",
                "flavunoidine(2+)",
                "formamide",
                "formic acid",
                "glycol ether",
                "methanol",
                "methanol-d4",
                "methoxyacetic acid",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl acetate",
                "methyl ester",
                "methyl formate",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "propyl butyrate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "triethanolammonium nitrate",
                "trifluoroacetic acid",
                "ureidoglycolic acid",
                "water"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (EtOH)",
            "Methanol (MeOH)",
            "Acetone",
            "Dimethyl sulfoxide (DMSO)",
            "Formic acid",
            "Acetic acid",
            "Propylene glycol",
            "Glycerol",
            "Ethylene glycol",
            "Acetonitrile",
            "N,N-Dimethylformamide (DMF)",
            "Tetrahydrofuran (THF)",
            "1,4-Dioxane",
            "Pyridine",
            "Formamide",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Hexamethylphosphoramide (HMPA)",
            "Nitromethane",
            "Ethyl acetate",
            "Isopropanol (IPA)",
            "Ammonia",
            "Hydrogen peroxide (H2O2)",
            "Hydrochloric acid (HCl)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Dimethyl sulfoxide",
                "Formic acid",
                "Acetic acid",
                "Acetonitrile",
                "N,N-Dimethylformamide",
                "Formamide",
                "Nitromethane",
                "Ethyl acetate"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (EtOH)",
                "Methanol (MeOH)",
                "Propylene glycol",
                "Glycerol",
                "Ethylene glycol",
                "Tetrahydrofuran (THF)",
                "1,4-Dioxane",
                "Pyridine",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Hexamethylphosphoramide (HMPA)",
                "Isopropanol (IPA)",
                "Ammonia",
                "Hydrogen peroxide (H2O2)",
                "Hydrochloric acid (HCl)"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "1,4-diacetoxybutane",
                "2-Acetamidoethylphosphonate",
                "5-formamidopyrimidine",
                "CMP-5'-phosphonoformic acid",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acetonitrile oxide",
                "aminoethyl nitrate",
                "benzyl acetate",
                "butane-1,4-diol",
                "chloroacetic acid",
                "chlorophyll a",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "flavunoidine(2+)",
                "formamide",
                "formamidine",
                "formic acid",
                "glycol ether",
                "haloacetic acid",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl ester",
                "methyl formate",
                "methyl p-coumarate acetate",
                "nitrates",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Isopropanol (IPA)",
            "Acetone (CH3COCH3)",
            "Acetonitrile (ACN)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Ethylene glycol (EG)",
            "Propylene glycol (PG)",
            "Glycerol",
            "Formic acid (FA)",
            "Acetic acid (AcOH)",
            "Propionic acid",
            "Butyric acid",
            "Formamide",
            "N-Methylformamide (NMF)",
            "N,N-Dimethylacetamide (DMA)",
            "Hexamethylphosphoramide (HMPA)",
            "Sulfolane",
            "Ethyl acetate (EtOAc)",
            "Tetrahydrofuran (THF)",
            "1,4-Dioxane",
            "Pyridine"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methanol (MeOH)",
                "Ethanol (EtOH)",
                "Acetone (CH3COCH3)",
                "Acetonitrile (ACN)",
                "Dimethylformamide (DMF)",
                "Dimethyl sulfoxide (DMSO)",
                "Formic acid (FA)",
                "Acetic acid (AcOH)",
                "Formamide",
                "N-Methylformamide (NMF)",
                "Sulfolane",
                "Ethyl acetate (EtOAc)"
            ],
            "mismatches": [
                "Water (H2O)",
                "Isopropanol (IPA)",
                "Ethylene glycol (EG)",
                "Propylene glycol (PG)",
                "Glycerol",
                "Propionic acid",
                "Butyric acid",
                "N,N-Dimethylacetamide (DMA)",
                "Hexamethylphosphoramide (HMPA)",
                "Tetrahydrofuran (THF)",
                "1,4-Dioxane",
                "Pyridine"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "1,4-diacetoxybutane",
                "2-Acetamidoethylphosphonate",
                "5-formamidopyrimidine",
                "CMP-5'-phosphonoformic acid",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-cyclohexylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acetonitrile oxide",
                "butane-1,4-diol",
                "butyl acetate",
                "butyl butanoate",
                "butyl propionate",
                "difluoroacetic acid",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl acetoacetate",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "ethyl phenylglyoxylate",
                "flavunoidine(2+)",
                "formamide",
                "formamidine",
                "formic acid",
                "glycol ether",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methoxyacetic acid",
                "methyl ester",
                "methyl p-coumarate acetate",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "propyl butyrate",
                "propyl propionate",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Ammonia (NH3)",
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Acetone (CH3COCH3)",
            "Acetonitrile (CH3CN)",
            "Dimethyl sulfoxide (DMSO)",
            "N,N-Dimethylformamide (DMF)",
            "Ethylene glycol (HOCH2CH2OH)",
            "Glycerol (C3H8O3)",
            "Formamide (HCONH2)",
            "Hexafluoroisopropanol (HFIP)",
            "Trifluoroethanol (CF3CH2OH)",
            "Nitromethane (CH3NO2)",
            "Pyridine (C5H5N)",
            "Sulfolane (C4H8SO2)",
            "Hexamethylphosphoramide (HMPA)",
            "N-Methylpyrrolidone (NMP)",
            "Ethyl acetate (CH3COOC2H5)",
            "Tetrahydrofuran (THF)",
            "1,4-Dioxane (C4H8O2)",
            "Propylene carbonate (C4H6O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetic acid (CH3COOH)",
                "Formic acid (HCOOH)",
                "Acetone (CH3COCH3)",
                "Acetonitrile (CH3CN)",
                "Dimethyl sulfoxide (DMSO)",
                "N,N-Dimethylformamide (DMF)",
                "Formamide (HCONH2)",
                "Trifluoroethanol (CF3CH2OH)",
                "Nitromethane (CH3NO2)",
                "Sulfolane (C4H8SO2)",
                "N-Methylpyrrolidone (NMP)",
                "Ethyl acetate (CH3COOC2H5)"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Ammonia (NH3)",
                "Ethylene glycol (HOCH2CH2OH)",
                "Glycerol (C3H8O3)",
                "Hexafluoroisopropanol (HFIP)",
                "Pyridine (C5H5N)",
                "Hexamethylphosphoramide (HMPA)",
                "Tetrahydrofuran (THF)",
                "1,4-Dioxane (C4H8O2)",
                "Propylene carbonate (C4H6O3)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2,2,2-trifluoroethoxy)acetic acid",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "1,1,1,3,3,3-hexafluoropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2,2,2-trifluoroethanol",
                "2-Acetamidoethylphosphonate",
                "2-ethoxyethanol",
                "3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,10-heptadecafluoro-1-decanol",
                "CMP-5'-phosphonoformic acid",
                "Methyl (xi)-3-nonenoate",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "haloacetic acid",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-phenylpropanoate",
                "methyl 5-(hydroxymethyl)pyrrolidine-3-carboxylate",
                "methyl p-coumarate acetate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "trifluoroacetic acid",
                "water"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Propanol (PrOH)",
            "Butanol (BuOH)",
            "Glycerol (Gly)",
            "Dimethyl Sulfoxide (DMSO)",
            "Acetone (Me2CO)",
            "Acetonitrile (MeCN)",
            "Formamide (FA)",
            "Acetic Acid (AcOH)",
            "Formic Acid (HCOOH)",
            "Ammonia (NH3)",
            "Pyridine (Py)",
            "Ethylene Glycol (EG)",
            "Propylene Glycol (PG)",
            "Tetrahydrofuran (THF)",
            "Dioxane (Diox)",
            "Dimethylformamide (DMF)",
            "Dimethylacetamide (DMA)",
            "Hexamethylphosphoramide (HMPA)",
            "Sulfolane (Sulfo)",
            "Nitromethane (MeNO2)",
            "Nitroethane (EtNO2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "dimethyl sulfoxide",
                "acetone",
                "acetonitrile",
                "formamide",
                "acetic acid",
                "formic acid",
                "sulfolane",
                "nitromethane"
            ],
            "mismatches": [
                "water (H2O)",
                "methanol (MeOH)",
                "ethanol (EtOH)",
                "propanol (PrOH)",
                "butanol (BuOH)",
                "glycerol (Gly)",
                "ammonia (NH3)",
                "pyridine (Py)",
                "ethylene glycol (EG)",
                "propylene glycol (PG)",
                "tetrahydrofuran (THF)",
                "dioxane (Diox)",
                "dimethylformamide (DMF)",
                "dimethylacetamide (DMA)",
                "hexamethylphosphoramide (HMPA)",
                "nitroethane (EtNO2)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-PYR-41",
                "(R)-malaoxon",
                "(R)-propane-1,2-diol",
                "(S)-malaoxon",
                "(Z)-PYR-41",
                "2-Acetamidoethylphosphonate",
                "2-nitropropane",
                "3-methyl-2-butanol",
                "CMP-5'-phosphonoformic acid",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "butyl butanoate",
                "dimethyl fumarate",
                "dimethyl maleate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 2-phenylethyl dimethylmalonate",
                "ethyl acetoacetate",
                "ethyl glyoxylate",
                "ethyl phenylglyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "glycol ether",
                "glycolate ester",
                "glycolic acid",
                "haloacetic acid",
                "methanol",
                "methanol-d4",
                "methoxyacetic acid",
                "methyl acetate",
                "methyl p-coumarate acetate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propan-1-ol",
                "propyl acetate",
                "propyl decanoate",
                "propyl hexanoate",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Glacial acetic acid",
            "Hydrogen peroxide (H2O2)",
            "Ammonia (NH3)",
            "Methylene chloride (CH2Cl2)",
            "Ethylene glycol (C2H4(OH)2)",
            "Glycerol (C3H8O3)",
            "Urea (CO(NH2)2)",
            "Formamide (HCO(NH2)2)",
            "Dimethylformamide (DMF)",
            "N-methyl-2-pyrrolidone (NMP)",
            "Pyridine (C5H5N)",
            "Acetonitrile (CH3CN)",
            "Dimethylacetamide (DMA)",
            "Triethylene glycol (C6H12O4)",
            "1,4-Dioxane (C4H8O2)",
            "N,N-Dimethylformamide (DMF)",
            "Ethylene carbonate (C3H4O3)",
            "Propylene carbonate (C3H4O3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetic acid",
                "acetone",
                "acetonitrile",
                "dimethyl sulfoxide",
                "ethanol",
                "formamide",
                "methanol",
                "N-methylpyrrolidin-2-one",
                "N,N-dimethylformamide",
                "water"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO)",
                "Acetic acid (CH3COOH)",
                "Formic acid (HCOOH)",
                "Glacial acetic acid",
                "Hydrogen peroxide (H2O2)",
                "Ammonia (NH3)",
                "Methylene chloride (CH2Cl2)",
                "Ethylene glycol (C2H4(OH)2)",
                "Glycerol (C3H8O3)",
                "Urea (CO(NH2)2)",
                "Formamide (HCO(NH2)2)",
                "Dimethylformamide (DMF)",
                "N-methyl-2-pyrrolidone (NMP)",
                "Pyridine (C5H5N)",
                "Acetonitrile (CH3CN)",
                "Dimethylacetamide (DMA)",
                "Triethylene glycol (C6H12O4)",
                "1,4-Dioxane (C4H8O2)",
                "N,N-Dimethylformamide (DMF)",
                "Ethylene carbonate (C3H4O3)",
                "Propylene carbonate (C3H4O3)"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(-)-ureidoglycolic acid",
                "(2-methylphenyl)acetonitrile",
                "(R)-propane-1,2-diol",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "chloroacetic acid",
                "chlorophyll a",
                "dichloromethane",
                "diethylene glycol monoethyl ether",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl (2R)-hydroxy(phenyl)acetate",
                "ethyl 3-hexenoate",
                "ethyl 3-oxohexanoate",
                "ethyl acetate",
                "ethyl cyclohexanecarboxylate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "glycol ether",
                "haloacetic acid",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-aminopyrazine-2-carboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "nitric acid",
                "phenylacetonitrile",
                "piperidine",
                "pyrrolidin-2-one",
                "water"
            ],
            "TP": 10,
            "FP": 25,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (EtOH)",
            "Methanol (MeOH)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetone",
            "Dimethylformamide (DMF)",
            "Formamide",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Ethylene glycol",
            "Propylene glycol",
            "Glycerol",
            "1,2-Dichloroethane",
            "2-Propanol (i-PrOH)",
            "1-Propanol (n-PrOH)",
            "2-Butanol (i-BuOH)",
            "1-Butanol (n-BuOH)",
            "Tetrahydrofuran (THF)",
            "2-Methyltetrahydrofuran (2-MeTHF)",
            "Dimethylacetamide (DMA)",
            "Hexamethylphosphoramide (HMPA)",
            "Acetonitrile (MeCN)",
            "Isopropanol (i-PrOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethyl sulfoxide",
                "Acetone",
                "Formamide",
                "Acetonitrile",
                "propan-1-ol",
                "propan-2-ol",
                "water",
                "N-methylpyrrolidin-2-one"
            ],
            "mismatches": [
                "Ethanol (EtOH)",
                "Methanol (MeOH)",
                "Dimethylformamide (DMF)",
                "Ethylene glycol",
                "Propylene glycol",
                "Glycerol",
                "1,2-Dichloroethane",
                "2-Butanol (i-BuOH)",
                "1-Butanol (n-BuOH)",
                "Tetrahydrofuran (THF)",
                "2-Methyltetrahydrofuran (2-MeTHF)",
                "Dimethylacetamide (DMA)",
                "Hexamethylphosphoramide (HMPA)",
                "Isopropanol (i-PrOH)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-methoprene",
                "(R)-propane-1,2-diol",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "2-Acetamidoethylphosphonate",
                "3-methyl-2-butanol",
                "CMP-5'-phosphonoformic acid",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "benzyl acetate",
                "butane-1,4-diol",
                "butyl butanoate",
                "dichloromethane",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "formamide",
                "formamidine",
                "glycol ether",
                "isobutyl acetate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl ester",
                "methyl p-coumarate acetate",
                "phenylacetonitrile",
                "propan-1-ol",
                "propan-2-ol",
                "propane-1,2-diol",
                "propyl acetate",
                "propyl hexanoate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 8,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Water",
                "Canonical Name": "H2O"
            },
            {
                "Referent": "Ethanol",
                "Canonical Name": "C2H5OH"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Acetone",
                "Canonical Name": "C3H6O"
            },
            {
                "Referent": "Dichloromethane",
                "Canonical Name": "CH2Cl2"
            },
            {
                "Referent": "Tetrahydrofuran",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            },
            {
                "Referent": "Dimethyl sulfoxide",
                "Canonical Name": "C2H6OS"
            },
            {
                "Referent": "Dimethylformamide",
                "Canonical Name": "C3H6NO2"
            },
            {
                "Referent": "Dioxane",
                "Canonical Name": "C4H8O2"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water",
                "Ethanol",
                "Dimethyl sulfoxide",
                "Dichloromethane"
            ],
            "mismatches": [
                "Acetone",
                "Tetrahydrofuran",
                "Dimethylformamide",
                "Dioxane"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1,1,1,3,3,3-hexafluoropropan-2-ol",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "N,N-dimethylformamide",
                "acetone d6",
                "dichloromethane",
                "dimethyl sulfoxide",
                "ethanol",
                "methanol",
                "propane-1,3-diol",
                "sulfoacetic acid",
                "water"
            ],
            "TP": 4,
            "FP": 4,
            "FN": 0
        }
    }
]