[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (EtOH)",
            "Acetone",
            "Dimethyl sulfoxide (DMSO)",
            "Formamide",
            "Glycerol",
            "Propylene glycol",
            "N,N-Dimethylformamide (DMF)",
            "Isopropanol",
            "Acetonitrile",
            "Dimethylformamide (DMF)",
            "Hexamethylphosphoramide (HMPA)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylacetamide (DMAc)",
            "Triethylamine",
            "Methanol",
            "Ethylene glycol",
            "Nitromethane",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Sulfolane",
            "Dioxane",
            "Tetrahydrofuran (THF)",
            "Acetic acid",
            "Phosphate buffered saline (PBS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone",
                "Dimethyl sulfoxide (DMSO)",
                "Formamide",
                "N,N-Dimethylformamide (DMF)",
                "Acetonitrile",
                "Methanol",
                "Nitromethane",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Sulfolane",
                "Acetic acid"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (EtOH)",
                "Glycerol",
                "Propylene glycol",
                "Isopropanol",
                "Dimethylformamide (DMF)",
                "Hexamethylphosphoramide (HMPA)",
                "Dimethylacetamide (DMAc)",
                "Triethylamine",
                "Ethylene glycol",
                "Dioxane",
                "Tetrahydrofuran (THF)",
                "Phosphate buffered saline (PBS)"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "2-Acetamidoethylphosphonate",
                "CMP-5'-phosphonoformic acid",
                "Dimethyl adipate",
                "Isopropyl propionate",
                "Methyl 3-mercaptobutanoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acetonitrile oxide",
                "aminoethyl nitrate",
                "benzyl acetate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 2-methylbutyrate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "formamide",
                "formamidine",
                "formic acid",
                "glycol ether",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methoxyethane",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl ester",
                "methyl p-coumarate acetate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "propyl acetate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "triethanolammonium nitrate",
                "water"
            ],
            "TP": 10,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone ((CH3)2CO)",
            "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
            "Acetonitrile (CH3CN)",
            "Ethylene glycol (C2H6O2)",
            "Glycerol (C3H8O3)",
            "Propylene carbonate (C4H6O3)",
            "Nitromethane (CH3NO2)",
            "Formamide (CH3NO)",
            "Dimethylformamide (DMF, C3H7NO)",
            "N,N-Dimethylacetamide (DMA, C4H9NO)",
            "Propylene glycol (C3H8O2)",
            "Butan-2-one (Methyl ethyl ketone, MEK, C4H8O)",
            "2-Propanol (Isopropanol, C3H8O)",
            "1,4-Dioxane (C4H8O2)",
            "Tetrahydrofuran (THF, C4H8O)",
            "Acetic acid (CH3COOH)",
            "Pyridine (C5H5N)",
            "Ethylene carbonate (C3H4O3)",
            "Acrylonitrile (C3H3N)",
            "Formic acid (HCOOH)",
            "Methyl acetate (C3H6O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetonitrile",
                "acetic acid",
                "acrylonitrile",
                "formamide",
                "dimethyl sulfoxide",
                "formic acid",
                "methanol",
                "methyl acetate",
                "nitromethane",
                "acetone"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Ethylene glycol (C2H6O2)",
                "Glycerol (C3H8O3)",
                "Propylene carbonate (C4H6O3)",
                "N,N-Dimethylacetamide (DMA, C4H9NO)",
                "Propylene glycol (C3H8O2)",
                "2-Propanol (Isopropanol, C3H8O)",
                "1,4-Dioxane (C4H8O2)",
                "Tetrahydrofuran (THF, C4H8O)",
                "Pyridine (C5H5N)",
                "Ethylene carbonate (C3H4O3)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "3-phenylpropionitrile",
                "Methyl (xi)-3-nonenoate",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acrylonitrile",
                "aminoethyl nitrate",
                "butan-1-ol",
                "butan-2-one",
                "butane-1,4-diol",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-hexenoate",
                "ethyl 3-oxohexanoate",
                "ethyl acetate",
                "ethyl cyclohexanecarboxylate",
                "ethyl ester",
                "ethyl glyoxylate",
                "formamide",
                "formic acid",
                "haloacetic acid",
                "isopropyl ester",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-phenylpropanoate",
                "methyl acetate",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "sulfoacetic acid",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 10,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Isopropanol (C3H8O)",
            "Acetone (C3H6O)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Ethylene glycol (C2H6O2)",
            "Glycerol (C3H8O3)",
            "Propylene glycol (C3H8O2)",
            "Acetic acid (CH3COOH)",
            "Formic acid (HCOOH)",
            "Acetonitrile (CH3CN)",
            "N,N-Dimethylacetamide (DMA)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Tetrahydrofuran (THF)",
            "1,4-Dioxane (C4H8O2)",
            "Ethyl acetate (C4H8O2)",
            "Propyl acetate (C5H10O2)",
            "Butyl acetate (C6H12O2)",
            "Methyl ethyl ketone (MEK)",
            "Acetylacetone (C5H8O2)",
            "Propionic acid (C3H6O2)",
            "Phenol (C6H5OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetic acid",
                "acetone",
                "acetonitrile",
                "dimethyl sulfoxide",
                "ethyl acetate",
                "formic acid",
                "N-methylpyrrolidin-2-one",
                "propyl acetate",
                "water"
            ],
            "mismatches": [
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Isopropanol (C3H8O)",
                "Dimethylformamide (DMF)",
                "Ethylene glycol (C2H6O2)",
                "Glycerol (C3H8O3)",
                "Propylene glycol (C3H8O2)",
                "N,N-Dimethylacetamide (DMA)",
                "Tetrahydrofuran (THF)",
                "1,4-Dioxane (C4H8O2)",
                "Methyl ethyl ketone (MEK)",
                "Acetylacetone (C5H8O2)",
                "Propionic acid (C3H6O2)",
                "Phenol (C6H5OH)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2,6-dihydroxyphenyl)acetic acid",
                "(2-hydroxyphenyl)acetic acid",
                "(2-methylphenyl)acetonitrile",
                "(3,5-dihydroxyphenyl)acetic acid",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-(3-hydroxyphenyl)propionic acid",
                "2-(pentyloxy)ethyl acetate",
                "2-ethoxyethanol",
                "3-(4'-acetoxyphenyl)propionic acid",
                "Methyl 4,8-decadienoate",
                "N,N-dimethylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetate ester",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butyl acetate",
                "butyl butanoate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 11-methyldodec-2-enoate",
                "ethyl 2-methylbutyrate",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "formic acid",
                "haloacetic acid",
                "hydroxyacetone",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "phenylacetonitrile",
                "propyl acetate",
                "propyl propionate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 9,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (EtOH)",
            "Methanol (MeOH)",
            "Formamide (HCONH2)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Acetonitrile (MeCN)",
            "Propanol (PrOH)",
            "Isopropanol (IPA)",
            "Butanol (BuOH)",
            "Glycerol (Gly)",
            "Ethylene Glycol (EG)",
            "Propylene Glycol (PG)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Sulfolane (Tetramethylene sulfone)",
            "Acetic Acid (AcOH)",
            "Formic Acid (HCOOH)",
            "Pyridine (C5H5N)",
            "Acetone (Propanone)",
            "Tetrahydrofuran (THF)",
            "Dioxane (1,4-Dioxane)",
            "Dimethylacetamide (DMAc)",
            "Propionitrile (Ethyl cyanide)",
            "1-Propanol (n-Propanol)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethyl sulfoxide",
                "acetonitrile",
                "formamide",
                "propionitrile",
                "sulfolane",
                "water",
                "ethanol",
                "methanol",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "formic acid",
                "acetone"
            ],
            "mismatches": [
                "Propanol (PrOH)",
                "Isopropanol (IPA)",
                "Butanol (BuOH)",
                "Glycerol (Gly)",
                "Ethylene Glycol (EG)",
                "Propylene Glycol (PG)",
                "Pyridine (C5H5N)",
                "Tetrahydrofuran (THF)",
                "Dioxane (1,4-Dioxane)",
                "Dimethylacetamide (DMAc)",
                "1-Propanol (n-Propanol)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "3-methyl-2-butanol",
                "Dimethyl adipate",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butane-1,4-diol",
                "butyl butanoate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetoacetate",
                "ethyl glyoxylate",
                "ethyl phenylglyoxylate",
                "formamide",
                "formic acid",
                "glycol ether",
                "glycolate ester",
                "glycolic acid",
                "haloacetic acid",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methoxyacetic acid",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "phenylacetonitrile",
                "piperidine",
                "propan-1-ol",
                "propane-1,2-diol",
                "propionitrile",
                "propyl acetate",
                "propyl decanoate",
                "propyl hexanoate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 12,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Propanol (C3H7OH)",
            "Isopropanol (C3H8O)",
            "Acetone (C3H6O)",
            "Acetic Acid (CH3COOH)",
            "Formic Acid (CH2O2)",
            "Dimethyl Sulfoxide (DMSO)",
            "Ethylene Glycol (C2H6O2)",
            "Glycerol (C3H8O3)",
            "Acetonitrile (CH3CN)",
            "Hydrogen Peroxide (H2O2)",
            "Nitromethane (CH3NO2)",
            "Propylene Carbonate (C4H6O3)",
            "Pyridine (C5H5N)",
            "Dimethylformamide (DMF)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Trifluoroacetic Acid (TFA)",
            "Ethylene Carbonate (C3H4O3)",
            "Sulfolane (C4H8O2S)",
            "Formamide (CH3NO)",
            "Tetrahydrofuran (THF)",
            "Butanol (C4H9OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetonitrile",
                "acetone",
                "acetic acid",
                "formic acid",
                "dimethyl sulfoxide",
                "nitromethane",
                "sulfolane",
                "formamide"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Propanol (C3H7OH)",
                "Isopropanol (C3H8O)",
                "Ethylene Glycol (C2H6O2)",
                "Glycerol (C3H8O3)",
                "Hydrogen Peroxide (H2O2)",
                "Propylene Carbonate (C4H6O3)",
                "Pyridine (C5H5N)",
                "Dimethylformamide (DMF)",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Trifluoroacetic Acid (TFA)",
                "Ethylene Carbonate (C3H4O3)",
                "Tetrahydrofuran (THF)",
                "Butanol (C4H9OH)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "3-methyl-2-butanol",
                "Methyl (xi)-3-nonenoate",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "butane-1,4-diol",
                "butyl butanoate",
                "chlorophyll a",
                "difluoroacetic acid",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-hexenoate",
                "ethyl 3-oxohexanoate",
                "ethyl acetate",
                "ethyl cyclohexanecarboxylate",
                "ethyl ester",
                "ethyl glyoxylate",
                "fluoroacetic acid",
                "formamide",
                "formic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-phenylpropanoate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl formate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "propyl decanoate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "trifluoroacetic acid",
                "water"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Acetic Acid (CH3COOH)",
            "Dimethyl Sulfoxide (DMSO)",
            "Methanol (CH3OH)",
            "Glycerol (C3H8O3)",
            "Formic Acid (HCOOH)",
            "Acetonitrile (CH3CN)",
            "Isopropanol (C3H8O)",
            "Propylene Glycol (C3H8O2)",
            "Sodium Acetate (CH3COONa) in solution",
            "Ammonium Hydroxide (NH4OH)",
            "Hydrochloric Acid (HCl) in solution",
            "Sodium Bicarbonate (NaHCO3) in solution",
            "Potassium Chloride (KCl) in solution",
            "Sodium Sulfate (Na2SO4) in solution",
            "Tetrahydrofuran (THF)",
            "Pyridine (C5H5N)",
            "Chloroform (CHCl3)",
            "Acetone (C3H6O)",
            "N,N-Dimethylformamide (DMF)",
            "N,N-Dimethylacetamide (DMA)",
            "Ethylene Glycol (C2H6O2)",
            "Cyclohexanone (C6H10O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ethanol",
                "Acetic Acid",
                "Dimethyl Sulfoxide",
                "Methanol",
                "Formic Acid",
                "Acetonitrile",
                "Acetone",
                "N,N-Dimethylformamide"
            ],
            "mismatches": [
                "Water (H2O)",
                "Glycerol (C3H8O3)",
                "Isopropanol (C3H8O)",
                "Propylene Glycol (C3H8O2)",
                "Sodium Acetate (CH3COONa) in solution",
                "Ammonium Hydroxide (NH4OH)",
                "Hydrochloric Acid (HCl) in solution",
                "Sodium Bicarbonate (NaHCO3) in solution",
                "Potassium Chloride (KCl) in solution",
                "Sodium Sulfate (Na2SO4) in solution",
                "Tetrahydrofuran (THF)",
                "Pyridine (C5H5N)",
                "Chloroform (CHCl3)",
                "N,N-Dimethylacetamide (DMA)",
                "Ethylene Glycol (C2H6O2)",
                "Cyclohexanone (C6H10O)"
            ],
            "true_referents": [
                "((18)O)water",
                "(1-hydroxycyclohexyl)acetic acid",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "1,1,1-trichloroethane",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "4,5,6,7-tetrachloro-2-benzofuran-1(3H)-one",
                "Indole-3-acetic acid sodium salt",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "ammonium formate",
                "carpronium chloride",
                "chloroacetic acid",
                "cyclohexylacetic acid",
                "dichloroacetic acid",
                "dichloromethane",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-oxohexanoate",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl acetate",
                "methyl-4-hydroxybenzoate O-sulfate",
                "phenylacetonitrile",
                "piperidine",
                "potassium formate",
                "propyl acetate",
                "protic solvent",
                "sulfoacetic acid",
                "tetrahydrofurfuryl alcohol",
                "triethanolammonium nitrate",
                "water"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Acetone (C3H6O)",
            "Ethanol (C2H5OH)",
            "Methanol (CH3OH)",
            "Dimethyl Sulfoxide (DMSO)",
            "Acetic Acid (CH3COOH)",
            "Formic Acid (HCOOH)",
            "Propylene Glycol (C3H8O2)",
            "Glycerol (C3H8O3)",
            "Isopropanol (C3H8O)",
            "Hydrogen Peroxide (H2O2)",
            "Ammonia (NH3)",
            "Sulfolane (C4H8O2S)",
            "Ethylene Glycol (C2H6O2)",
            "Diethyl Ether (C4H10O)",
            "Butanol (C4H10O)",
            "Pyridine (C5H5N)",
            "Dimethylformamide (C3H7NO)",
            "Tetrahydrofuran (C4H8O)",
            "Methyl Ethyl Ketone (C4H8O)",
            "Acetonitrile (C2H3N)",
            "Nitromethane (CH3NO2)",
            "Dimethylacetamide (C4H9NO)",
            "Ethyl Acetate (C4H8O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Ethanol (C2H5OH)",
                "Methanol (CH3OH)",
                "Dimethyl Sulfoxide (DMSO)",
                "Acetic Acid (CH3COOH)",
                "Formic Acid (HCOOH)",
                "Sulfolane (C4H8O2S)",
                "Acetonitrile (C2H3N)",
                "Nitromethane (CH3NO2)",
                "Ethyl Acetate (C4H8O2)"
            ],
            "mismatches": [
                "Water (H2O)",
                "Propylene Glycol (C3H8O2)",
                "Glycerol (C3H8O3)",
                "Isopropanol (C3H8O)",
                "Hydrogen Peroxide (H2O2)",
                "Ammonia (NH3)",
                "Ethylene Glycol (C2H6O2)",
                "Diethyl Ether (C4H10O)",
                "Butanol (C4H10O)",
                "Pyridine (C5H5N)",
                "Dimethylformamide (C3H7NO)",
                "Tetrahydrofuran (C4H8O)",
                "Methyl Ethyl Ketone (C4H8O)",
                "Dimethylacetamide (C4H9NO)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "2alpha-acetoxy-1alpha-hydroxy-6beta,9beta,15-tribenzoyloxy-beta-dihydroagarofuran",
                "3-methyl-2-butanol",
                "4-Me-6E,8E-16:2 methyl ester",
                "8(9)-EET methyl ester",
                "Methyl (xi)-3-nonenoate",
                "Methyl 9-undecenoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "aminoethyl nitrate",
                "butane-1,4-diol",
                "butyl butanoate",
                "chlorophyll a",
                "diethylene glycol monoethyl ether",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 4,9-dimethyldecanoate",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "water",
            "ethanol",
            "methanol",
            "acetone",
            "isopropyl alcohol",
            "ethylene glycol",
            "glycerol",
            "dimethyl sulfoxide (DMSO)",
            "acetic acid",
            "propylene glycol",
            "diethyl ether",
            "butanol",
            "formamide",
            "pyridine",
            "dimethylformamide (DMF)",
            "dimethylacetamide (DMA)",
            "tetrahydrofuran (THF)",
            "ethyl acetate",
            "acetonitrile",
            "N,N-dimethylpropionamide (DMPA)",
            "N-methyl-2-pyrrolidone (NMP)",
            "dimethyl sulfoxide-d6 (DMSO-d6)",
            "hexamethylphosphoramide (HMPA)",
            "chloroform"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water",
                "ethanol",
                "methanol",
                "acetone",
                "dimethyl sulfoxide",
                "acetic acid",
                "formamide",
                "ethyl acetate",
                "acetonitrile",
                "N-methylpyrrolidin-2-one"
            ],
            "mismatches": [
                "isopropyl alcohol",
                "ethylene glycol",
                "glycerol",
                "propylene glycol",
                "diethyl ether",
                "butanol",
                "pyridine",
                "dimethylformamide (DMF)",
                "dimethylacetamide (DMA)",
                "tetrahydrofuran (THF)",
                "N,N-dimethylpropionamide (DMPA)",
                "dimethyl sulfoxide-d6 (DMSO-d6)",
                "hexamethylphosphoramide (HMPA)",
                "chloroform"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "2-Acetamidoethylphosphonate",
                "3-methyl-2-butanol",
                "5-formamidopyrimidine",
                "CMP-5'-phosphonoformic acid",
                "Dimethyl succinate",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acetonitrile oxide",
                "benzyl acetate",
                "butyl butanoate",
                "dichloromethane",
                "diethylene glycol monoethyl ether",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl acetoacetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "flavunoidine(2+)",
                "formamide",
                "formamidine",
                "formic acid",
                "glycol ether",
                "isoflurane",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl ester",
                "methyl p-coumarate acetate",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 10,
            "FP": 14,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H\u2082O)",
            "Methanol (CH\u2083OH)",
            "Ethanol (C\u2082H\u2085OH)",
            "Isopropanol (Propan-2-ol)",
            "Acetone (CH\u2083COCH\u2083)",
            "Dimethyl sulfoxide (DMSO, (CH\u2083)\u2082SO)",
            "Dimethylformamide (DMF, HCON(CH\u2083)\u2082)",
            "Acetonitrile (CH\u2083CN)",
            "Ethylene glycol (HOCH\u2082CH\u2082OH)",
            "Propylene glycol (CH\u2083CH(OH)CH\u2082OH)",
            "Glycerol (C\u2083H\u2088O\u2083)",
            "N,N-Dimethylacetamide (DMAc, CH\u2083CON(CH\u2083)\u2082)",
            "Formic acid (HCOOH)",
            "Pyridine (C\u2085H\u2085N)",
            "N-Methyl-2-pyrrolidone (NMP, C\u2085H\u2089NO)",
            "Hexafluoroisopropanol (HFIP)",
            "Sulfolane (C\u2084H\u2088O\u2082S)",
            "Dimethyl carbonate (DMC, (CH\u2083O)\u2082CO)",
            "Formamide (HCONH\u2082)",
            "Triethylamine (Et\u2083N)",
            "1,4-Dioxane (C\u2084H\u2088O\u2082)",
            "N,N-Dimethylsulfoxide (DMSO, (CH\u2083)\u2082SO)",
            "Dimethylacetamide (DMAc, CH\u2083CON(CH\u2083)\u2082)",
            "Ethylene glycol monomethyl ether (EGME)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water (H\u2082O)",
                "Methanol (CH\u2083OH)",
                "Ethanol (C\u2082H\u2085OH)",
                "Acetone (CH\u2083COCH\u2083)",
                "Dimethyl sulfoxide (DMSO, (CH\u2083)\u2082SO)",
                "Acetonitrile (CH\u2083CN)",
                "Formic acid (HCOOH)",
                "N-Methyl-2-pyrrolidone (NMP, C\u2085H\u2089NO)",
                "Sulfolane (C\u2084H\u2088O\u2082S)",
                "Formamide (HCONH\u2082)"
            ],
            "mismatches": [
                "Isopropanol (Propan-2-ol)",
                "Ethylene glycol (HOCH\u2082CH\u2082OH)",
                "Propylene glycol (CH\u2083CH(OH)CH\u2082OH)",
                "Glycerol (C\u2083H\u2088O\u2083)",
                "N,N-Dimethylacetamide (DMAc, CH\u2083CON(CH\u2083)\u2082)",
                "Pyridine (C\u2085H\u2085N)",
                "Hexafluoroisopropanol (HFIP)",
                "Dimethyl carbonate (DMC, (CH\u2083O)\u2082CO)",
                "Triethylamine (Et\u2083N)",
                "1,4-Dioxane (C\u2084H\u2088O\u2082)",
                "N,N-Dimethylsulfoxide (DMSO, (CH\u2083)\u2082SO)",
                "Dimethylacetamide (DMAc, CH\u2083CON(CH\u2083)\u2082)",
                "Ethylene glycol monomethyl ether (EGME)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1,1,1,3,3,3-hexafluoropropan-2-ol",
                "1-hexadecyl-2-acetyl-3-decanoyl-sn-glycerol",
                "2,2,2-trifluoroethanol",
                "2-ethoxyethanol",
                "2-phenylacetamide",
                "3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,10-heptadecafluoro-1-decanol",
                "Dimethyl succinate",
                "Ethyl 3-(N-butylacetamido)propionate",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-acetyl-S-(1Z)-propenyl-cysteine-sulfoxide",
                "N-benzylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butane-1,4-diol",
                "dichloromethane",
                "diethylene glycol monoethyl ether",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-hexenoate",
                "ethyl 3-nonenoate",
                "ethyl acetate",
                "ethyl glyoxylate",
                "formamide",
                "formic acid",
                "glycol ether",
                "haloacetic acid",
                "isopropyl ester",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "phenylacetonitrile",
                "piperidine",
                "propyl acetate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "trifluoroacetic acid",
                "water"
            ],
            "TP": 10,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Methanol (CH3OH)",
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO)",
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Propanol (C3H7OH)",
            "Isopropanol (C3H7OH)",
            "Glycerol (C3H8O3)",
            "Ethylene glycol (C2H6O2)",
            "Acetonitrile (CH3CN)",
            "Formamide (CH3NO)",
            "N,N-Dimethylformamide (DMF)",
            "Tetrahydrofuran (THF)",
            "1,4-Dioxane",
            "Pyridine (C5H5N)",
            "Ammonia (NH3)",
            "Hydrogen peroxide (H2O2)",
            "Hydrazine (N2H4)",
            "Sulfuric acid (H2SO4)",
            "Nitric acid (HNO3)",
            "Hydrochloric acid (HCl)",
            "Phosphoric acid (H3PO4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetonitrile",
                "N,N-dimethylformamide",
                "acetic acid",
                "acetone",
                "dimethyl sulfoxide",
                "ethanol",
                "formamide",
                "methanol",
                "nitric acid",
                "water"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Methanol (CH3OH)",
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO)",
                "Formic acid (HCOOH)",
                "Acetic acid (CH3COOH)",
                "Propanol (C3H7OH)",
                "Isopropanol (C3H7OH)",
                "Glycerol (C3H8O3)",
                "Ethylene glycol (C2H6O2)",
                "Acetonitrile (CH3CN)",
                "Formamide (CH3NO)",
                "N,N-Dimethylformamide (DMF)",
                "Tetrahydrofuran (THF)",
                "1,4-Dioxane",
                "Pyridine (C5H5N)",
                "Ammonia (NH3)",
                "Hydrogen peroxide (H2O2)",
                "Hydrazine (N2H4)",
                "Sulfuric acid (H2SO4)",
                "Hydrochloric acid (HCl)",
                "Phosphoric acid (H3PO4)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "1,4-diacetoxybutane",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "2-hydrazinoethanol",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butane-1,4-diol",
                "chloroacetic acid",
                "chlorophyll a",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "methanol",
                "methanol-d4",
                "methyl 2-diazo-2-acetamidohexanoate",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "nitrates",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "phosphonoformic acid",
                "piperidine",
                "propyl decanoate",
                "sulfoacetic acid",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 10,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (MeOH)",
            "Ethanol (EtOH)",
            "Isopropanol (IPA)",
            "Acetone (CH3COCH3)",
            "Acetonitrile (MeCN)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Ethylene glycol",
            "Propylene glycol",
            "Glycerol",
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Propionic acid (CH3CH2COOH)",
            "Butyric acid (CH3CH2CH2COOH)",
            "Formamide (HCONH2)",
            "N-Methylformamide (HCONHCH3)",
            "N,N-Dimethylformamide ((CH3)2NCHO)",
            "N,N-Dimethylacetamide (DMA)",
            "1-Methyl-2-pyrrolidinone (NMP)",
            "Sulfolane",
            "Hexamethylphosphoramide (HMPA)",
            "Propylene carbonate (PC)",
            "Ethylene carbonate (EC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetonitrile",
                "dimethyl sulfoxide",
                "formic acid",
                "acetic acid",
                "formamide",
                "N-methylformamide",
                "N,N-dimethylformamide",
                "sulfolane"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (MeOH)",
                "Ethanol (EtOH)",
                "Isopropanol (IPA)",
                "Acetone (CH3COCH3)",
                "Ethylene glycol",
                "Propylene glycol",
                "Glycerol",
                "Propionic acid (CH3CH2COOH)",
                "Butyric acid (CH3CH2CH2COOH)",
                "N,N-Dimethylacetamide (DMA)",
                "1-Methyl-2-pyrrolidinone (NMP)",
                "Hexamethylphosphoramide (HMPA)",
                "Propylene carbonate (PC)",
                "Ethylene carbonate (EC)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "2-Acetamidoethylphosphonate",
                "3-phenylpropyl acetate",
                "CMP-5'-phosphonoformic acid",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-cyclohexylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "butyl acetate",
                "butyl butanoate",
                "butyl propionate",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl 3-hexenoate",
                "ethyl acetate",
                "ethyl cyclohexanecarboxylate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "formamide",
                "formic acid",
                "glycol ether",
                "haloacetic acid",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 5-(hydroxymethyl)pyrrolidine-3-carboxylate",
                "methyl ester",
                "methyl p-coumarate acetate",
                "phenylacetonitrile",
                "propyl acetate",
                "propyl propionate",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "water"
            ],
            "TP": 8,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "water (H2O)",
            "methanol (CH3OH)",
            "ethanol (C2H5OH)",
            "propanol (C3H7OH)",
            "butanol (C4H9OH)",
            "ammonia (NH3)",
            "hydrogen peroxide (H2O2)",
            "acetic acid (CH3COOH)",
            "formic acid (HCOOH)",
            "nitric acid (HNO3)",
            "sulfuric acid (H2SO4)",
            "hydrochloric acid (HCl)",
            "phosphoric acid (H3PO4)",
            "acetone (C3H6O)",
            "formaldehyde (CH2O)",
            "dimethyl sulfoxide (DMSO)",
            "ethylene glycol (C2H6O2)",
            "glycerol (C3H8O3)",
            "acetonitrile (CH3CN)",
            "hexane (C6H14)",
            "carbon tetrachloride (CCl4)",
            "dichloromethane (CH2Cl2)",
            "chloroform (CHCl3)",
            "carbon disulfide (CS2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetic acid",
                "acetone",
                "dimethyl sulfoxide",
                "acetonitrile",
                "dichloromethane",
                "formic acid",
                "nitric acid",
                "water"
            ],
            "mismatches": [
                "water (H2O)",
                "methanol (CH3OH)",
                "ethanol (C2H5OH)",
                "propanol (C3H7OH)",
                "butanol (C4H9OH)",
                "ammonia (NH3)",
                "hydrogen peroxide (H2O2)",
                "sulfuric acid (H2SO4)",
                "hydrochloric acid (HCl)",
                "phosphoric acid (H3PO4)",
                "formaldehyde (CH2O)",
                "ethylene glycol (C2H6O2)",
                "glycerol (C3H8O3)",
                "hexane (C6H14)",
                "carbon tetrachloride (CCl4)",
                "chloroform (CHCl3)",
                "carbon disulfide (CS2)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(E)-hex-2-enyl acetate",
                "(R)-propane-1,2-diol",
                "(S)-methoprene",
                "(S)-propane-1,2-diol",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "3-methyl-2-butanol",
                "4,5,6,7-tetrachloro-2-benzofuran-1(3H)-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "bis(4-chlorophenyl)acetic acid",
                "butane-1,4-diol",
                "butyl butanoate",
                "chloroacetic acid",
                "chlorophyll a",
                "chlorophyll c2",
                "dichloromethane",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "ethyl hexanoate",
                "ethylammonium nitrate",
                "formic acid",
                "haloacetic acid",
                "methanol",
                "methanol-d4",
                "nitrates",
                "nitric acid",
                "nitromethane",
                "phenylacetonitrile",
                "phosphonoformic acid",
                "propyl decanoate",
                "sulfoacetic acid",
                "water"
            ],
            "TP": 8,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO)",
            "Formamide (CH3NO)",
            "Pyridine (C5H5N)",
            "Acetic acid (CH3COOH)",
            "Ammonia (NH3)",
            "Hydrogen peroxide (H2O2)",
            "Glycerol (C3H8O3)",
            "Ethylene glycol (C2H6O2)",
            "Propylene glycol (C3H8O2)",
            "Isopropanol (C3H8O)",
            "Butanol (C4H10O)",
            "Hexane (C6H14)",
            "Chloroform (CHCl3)",
            "Dichloromethane (CH2Cl2)",
            "Diethyl ether (C4H10O)",
            "Tetrahydrofuran (C4H8O)",
            "Dimethylformamide (C3H7NO)",
            "Dimethylacetamide (C4H9NO)",
            "Pyrrole (C4H5N)",
            "Furan (C4H4O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Methanol",
                "Ethanol",
                "Acetone",
                "Dimethyl sulfoxide",
                "Formamide",
                "Acetic acid",
                "Dichloromethane",
                "water",
                "N,N-dimethylformamide"
            ],
            "mismatches": [
                "Water (H2O)",
                "Methanol (CH3OH)",
                "Ethanol (C2H5OH)",
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO)",
                "Formamide (CH3NO)",
                "Pyridine (C5H5N)",
                "Acetic acid (CH3COOH)",
                "Ammonia (NH3)",
                "Hydrogen peroxide (H2O2)",
                "Glycerol (C3H8O3)",
                "Ethylene glycol (C2H6O2)",
                "Propylene glycol (C3H8O2)",
                "Isopropanol (C3H8O)",
                "Butanol (C4H10O)",
                "Hexane (C6H14)",
                "Chloroform (CHCl3)",
                "Diethyl ether (C4H10O)",
                "Tetrahydrofuran (C4H8O)",
                "Dimethylacetamide (C4H9NO)",
                "Pyrrole (C4H5N)",
                "Furan (C4H4O)"
            ],
            "true_referents": [
                "((18)O)water",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-ethoxyethanol",
                "2alpha-acetoxy-1alpha-hydroxy-6beta,9beta,15-tribenzoyloxy-beta-dihydroagarofuran",
                "3-methyl-2-butanol",
                "4,5,6,7-tetrachloro-2-benzofuran-1(3H)-one",
                "4-[dimethylamino(oxo)methyl]-3,5-dimethyl-1H-pyrrole-2-carboxylic acid ethyl ester",
                "Methyl 9-undecenoate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-methylformamide",
                "acetic acid",
                "acetone",
                "acetone d6",
                "butane-1,4-diol",
                "butyl butanoate",
                "chlorophyll a",
                "dichloromethane",
                "diethylene glycol monoethyl ether",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "ethyl hexadecanoate",
                "ethyl hexanoate",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 4,9-dimethyldecanoate",
                "methyl 5-(hydroxymethyl)pyrrolidine-3-carboxylate",
                "methyl N-(2,6-dimethylphenyl)-N-2-furoylalaninate",
                "nitric acid",
                "piperidine",
                "propyl acetate",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 9,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Methanol (CH3OH)",
            "Ethanol (C2H5OH)",
            "Acetone (CH3COCH3)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetonitrile (CH3CN)",
            "Formamide (HCOONH2)",
            "Formic acid (HCOOH)",
            "Acetic acid (CH3COOH)",
            "Glycerol (C3H8O3)",
            "Urea (CO(NH2)2)",
            "Ammonia (NH3)",
            "Hydrogen peroxide (H2O2)",
            "Methyl acetate (CH3COOCH3)",
            "Ethyl acetate (C2H5COOCH3)",
            "Isopropanol (C3H7OH)",
            "Tetrahydrofuran (THF)",
            "N,N-Dimethylformamide (DMF)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Pyridine (C5H5N)",
            "Dioxane (C4H8O2)",
            "Sodium hydroxide (NaOH)",
            "Potassium hydroxide (KOH)",
            "Lithium hydroxide (LiOH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "methanol",
                "ethanol",
                "acetone",
                "dimethyl sulfoxide",
                "acetonitrile",
                "formamide",
                "formic acid",
                "acetic acid",
                "methyl acetate",
                "ethyl acetate",
                "N,N-dimethylformamide",
                "N-methylpyrrolidin-2-one"
            ],
            "mismatches": [
                "water (H2O)",
                "glycerol (C3H8O3)",
                "urea (CO(NH2)2)",
                "ammonia (NH3)",
                "hydrogen peroxide (H2O2)",
                "isopropanol (C3H7OH)",
                "tetrahydrofuran (THF)",
                "pyridine (C5H5N)",
                "dioxane (C4H8O2)",
                "sodium hydroxide (NaOH)",
                "potassium hydroxide (KOH)",
                "lithium hydroxide (LiOH)"
            ],
            "true_referents": [
                "((18)O)water",
                "(+)-ureidoglycolic acid",
                "(-)-ureidoglycolic acid",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(R)-propane-1,2-diol",
                "(S)-propane-1,2-diol",
                "(benzyloxy)acetic acid",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "Isopropyl propionate",
                "N,N-dimethylformamide",
                "N,N-dimethylformamide dimethyl acetal",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "chlorophyll a",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethylammonium nitrate",
                "formamide",
                "formic acid",
                "haloacetic acid",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 3-aminopyrazine-2-carboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl acetate",
                "nitric acid",
                "phenylacetonitrile",
                "piperidine",
                "potassium formate",
                "pyrrolidin-2-one",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Methanol (CH3OH)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Acetone (C3H6O)",
            "Isopropanol (C3H8O)",
            "Ethylene glycol (C2H4O2)",
            "Glycerol (C3H8O3)",
            "Formamide (CH3NO)",
            "Acetonitrile (C2H3N)",
            "N-Methyl-2-pyrrolidone (NMP)",
            "Pyridine (C5H5N)",
            "Dimethylacetamide (DMA)",
            "Hexamethylphosphoramide (HMPA)",
            "Sulfolane (C4H8SO2)",
            "Tetrahydrofuran (THF)",
            "1,2-Dichloroethane (C2H4Cl2)",
            "1,1-Dichloroethane (C2H4Cl2)",
            "2-Methoxyethanol (C3H8O2)",
            "2-Ethoxyethanol (C4H10O2)",
            "Benzonitrile (C7H5N)",
            "Acetic acid (C2H4O2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethyl sulfoxide",
                "Formamide",
                "Sulfolane",
                "2-Ethoxyethanol",
                "2-Methoxyethanol",
                "Acetic acid"
            ],
            "mismatches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Methanol (CH3OH)",
                "Dimethylformamide (DMF)",
                "Acetone (C3H6O)",
                "Isopropanol (C3H8O)",
                "Ethylene glycol (C2H4O2)",
                "Glycerol (C3H8O3)",
                "Acetonitrile (C2H3N)",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Pyridine (C5H5N)",
                "Dimethylacetamide (DMA)",
                "Hexamethylphosphoramide (HMPA)",
                "Tetrahydrofuran (THF)",
                "1,2-Dichloroethane (C2H4Cl2)",
                "1,1-Dichloroethane (C2H4Cl2)",
                "Benzonitrile (C7H5N)"
            ],
            "true_referents": [
                "((18)O)water",
                "(2-methylphenyl)acetonitrile",
                "(3R,3aS,6aR)-hexahydrofuro[2,3-b]furan-3-yl glycolate",
                "(E)-hex-2-enyl acetate",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "1-O-octadecyl-2-acetyl-sn-glycerol",
                "2-(2-methoxyethoxy)ethanol",
                "2-Acetamidoethylphosphonate",
                "2-ethoxyethanol",
                "2-ethylhexyl acetate",
                "2-methoxyethanol",
                "3-phenylpropionitrile",
                "CMP-5'-phosphonoformic acid",
                "N,N-dimethylformamide",
                "N-benzylformamide",
                "N-furfurylformamide",
                "N-methylformamide",
                "N-methylpyrrolidin-2-one",
                "acetic acid",
                "acetone",
                "acetone d6",
                "acetonitrile",
                "acrylonitrile",
                "dichloromethane",
                "dimethyl fumarate",
                "dimethyl sebacate",
                "dimethyl sulfoxide",
                "ethanol",
                "ethyl acetate",
                "ethyl ester",
                "ethyl glyoxylate",
                "formamide",
                "isopropyl dodecanoate",
                "isopropyl ester",
                "isopropyl palmitate",
                "methanol",
                "methanol-d4",
                "methyl 2-methylpropyl 2,6-dimethyl-4-(2-nitrophenyl)-1,4-dihydropyridine-3,5-dicarboxylate",
                "methyl 5-(hydroxymethyl)pyrrolidine-2-carboxylate",
                "methyl p-coumarate acetate",
                "phenylacetonitrile",
                "piperidine",
                "pyrrolidin-2-one",
                "sulfoacetic acid",
                "sulfolane",
                "tetrahydrofurfuryl alcohol",
                "water"
            ],
            "TP": 6,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "polar solvent",
        "domain": "chemistry",
        "response": [
            "water",
            "ethanol",
            "acetone",
            "methanol",
            "chloroform",
            "dichloromethane",
            "formic acid",
            "acetic acid",
            "propanol",
            "butanol",
            "pentanol",
            "hexanol",
            "heptanol",
            "octanol",
            "nonanol",
            "decanol",
            "dodecanol",
            "tetradecanol",
            "hexadecanol",
            "octadecanol",
            "nonadecanol",
            "eicosanol",
            "docosanol",
            "tetracosanol",
            "pentacosanol",
            "hexacosanol",
            "heptacosanol",
            "octacosanol",
            "nonacosanol",
            "triacontanol",
            "tetratriacontanol",
            "pentatriacontanol",
            "hexatriacontanol",
            "heptatriacontanol",
            "octatriacontanol",
            "nonatriacontanol",
            "triacontanol",
            "tetratriacontanol",
            "pentatriacontanol",
            "hexatriacontanol",
            "heptatriacontanol",
            "octatriacontanol",
            "nonatriacontanol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water",
                "ethanol",
                "acetone",
                "methanol",
                "dichloromethane",
                "formic acid",
                "acetic acid"
            ],
            "mismatches": [
                "chloroform",
                "propanol",
                "butanol",
                "pentanol",
                "hexanol",
                "heptanol",
                "octanol",
                "nonanol",
                "decanol",
                "dodecanol",
                "tetradecanol",
                "hexadecanol",
                "octadecanol",
                "nonadecanol",
                "eicosanol",
                "docosanol",
                "tetracosanol",
                "pentacosanol",
                "hexacosanol",
                "heptacosanol",
                "octacosanol",
                "nonacosanol",
                "triacontanol",
                "tetratriacontanol",
                "pentatriacontanol",
                "hexatriacontanol",
                "heptatriacontanol",
                "octatriacontanol",
                "nonatriacontanol"
            ],
            "true_referents": [
                "((18)O)water",
                "1,1,1-trichloroethane",
                "1,3-dichloropropan-2-ol",
                "13-Docosenoic acid, methyl ester",
                "3-methyl-2-butanol",
                "Methyl Hexacosanoate",
                "Octadecadienoic acid, methyl ester",
                "acetic acid",
                "acetone",
                "acetone d6",
                "benzyl acetate",
                "butyl butanoate",
                "butyl octanoate",
                "decan-1-ol",
                "dichloromethane",
                "ethanol",
                "ethyl (4Z,7Z,10Z,13Z,16Z)-docosapentaenoate",
                "ethyl acetoacetate",
                "ethyl arachidonate",
                "ethyl decanoate",
                "ethyl heptanoate",
                "ethyl hexadecanoate",
                "ethyl hexanoate",
                "ethyl nonanoate",
                "ethyl octadecanoate",
                "ethyl octanoate",
                "formic acid",
                "isoflurane",
                "isopropyl dodecanoate",
                "methanol",
                "methanol-d4",
                "methyl arachidonate",
                "methyl formate",
                "methyl heptadecanoate",
                "methyl hexanoate",
                "methyl nonadecanoate",
                "methyl nonanoate",
                "methyl tetracosanoate",
                "methyl tetradecanoate",
                "octyl acetate",
                "pentan-2-ol",
                "propyl acetate",
                "propyl decanoate",
                "water"
            ],
            "TP": 7,
            "FP": 29,
            "FN": 0
        }
    }
]