[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "hexane (C6H14)",
            "pentane (C5H12)",
            "heptane (C7H16)",
            "octane (C8H18)",
            "cyclohexane (C6H12)",
            "toluene (C7H8)",
            "benzene (C6H6)",
            "xylene (C8H10)",
            "diethyl ether (C4H10O)",
            "tetrahydrofuran (C4H8O)",
            "carbon tetrachloride (CCl4)",
            "dichloromethane (CH2Cl2)",
            "trichloromethane (CHCl3)",
            "carbon disulfide (CS2)",
            "nitromethane (CH3NO2)",
            "carbon tetrabromide (CBr4)",
            "chlorobenzene (C6H5Cl)",
            "dichlorobenzene (C6H4Cl2)",
            "trichlorobenzene (C6H3Cl3)",
            "tetrachlorobenzene (C6H2Cl4)",
            "dimethyl sulfoxide (C2H6SO)",
            "ethyl acetate (C4H8O2)",
            "acetone (C3H6O)",
            "methanol (CH3OH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "hexane (C6H14)",
                "pentane (C5H12)",
                "heptane (C7H16)",
                "octane (C8H18)",
                "cyclohexane (C6H12)",
                "toluene (C7H8)",
                "benzene (C6H6)",
                "xylene (C8H10)",
                "diethyl ether (C4H10O)",
                "tetrahydrofuran (C4H8O)",
                "carbon tetrachloride (CCl4)",
                "dichloromethane (CH2Cl2)",
                "trichloromethane (CHCl3)",
                "carbon disulfide (CS2)",
                "nitromethane (CH3NO2)",
                "carbon tetrabromide (CBr4)",
                "chlorobenzene (C6H5Cl)",
                "dichlorobenzene (C6H4Cl2)",
                "trichlorobenzene (C6H3Cl3)",
                "tetrachlorobenzene (C6H2Cl4)",
                "dimethyl sulfoxide (C2H6SO)",
                "ethyl acetate (C4H8O2)",
                "acetone (C3H6O)",
                "methanol (CH3OH)"
            ],
            "mismatches": [],
            "true_referents": [
                "acetone (C3H6O)",
                "benzene (C6H6)",
                "carbon disulfide (CS2)",
                "carbon tetrabromide (CBr4)",
                "carbon tetrachloride (CCl4)",
                "chlorobenzene (C6H5Cl)",
                "cyclohexane (C6H12)",
                "dichlorobenzene (C6H4Cl2)",
                "dichloromethane (CH2Cl2)",
                "diethyl ether (C4H10O)",
                "dimethyl sulfoxide (C2H6SO)",
                "ethyl acetate (C4H8O2)",
                "heptane (C7H16)",
                "hexane (C6H14)",
                "methanol (CH3OH)",
                "nitromethane (CH3NO2)",
                "octane (C8H18)",
                "pentane (C5H12)",
                "tetrachlorobenzene (C6H2Cl4)",
                "tetrahydrofuran (C4H8O)",
                "toluene (C7H8)",
                "trichlorobenzene (C6H3Cl3)",
                "trichloromethane (CHCl3)",
                "xylene (C8H10)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
            "Dimethylformamide (DMF, (CH3)2NC(O)H)",
            "Ethanol (EtOH, C2H5OH)",
            "Ethyl acetate (CH3COOCH2CH3)",
            "Hexane (C6H14)",
            "Methanol (MeOH, CH3OH)",
            "Tetrahydrofuran (THF, C4H8O)",
            "Toluene (C7H8)",
            "Carbon tetrachloride (CCl4)",
            "Chloroform (CHCl3)",
            "Diethyl ether (C4H10O)",
            "Benzene (C6H6)",
            "Cyclohexane (C6H12)",
            "Acetonitrile (CH3CN)",
            "Pyridine (C5H5N)",
            "Acetic acid (CH3COOH)",
            "Propylene carbonate (C4H6O3)",
            "1,4-Dioxane (C4H8O2)",
            "Isopropanol (C3H8O)",
            "Methyl tert-butyl ether (MTBE, C5H12O)",
            "Anisole (C7H8O)",
            "Glycerol (C3H8O3)",
            "1-Propanol (C3H8O)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
                "Dimethylformamide (DMF, (CH3)2NC(O)H)",
                "Ethanol (EtOH, C2H5OH)",
                "Ethyl acetate (CH3COOCH2CH3)",
                "Hexane (C6H14)",
                "Methanol (MeOH, CH3OH)",
                "Tetrahydrofuran (THF, C4H8O)",
                "Toluene (C7H8)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Diethyl ether (C4H10O)",
                "Benzene (C6H6)",
                "Cyclohexane (C6H12)",
                "Acetonitrile (CH3CN)",
                "Pyridine (C5H5N)",
                "Acetic acid (CH3COOH)",
                "Propylene carbonate (C4H6O3)",
                "1,4-Dioxane (C4H8O2)",
                "Isopropanol (C3H8O)",
                "Methyl tert-butyl ether (MTBE, C5H12O)",
                "Anisole (C7H8O)",
                "Glycerol (C3H8O3)",
                "1-Propanol (C3H8O)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,4-Dioxane (C4H8O2)",
                "1-Propanol (C3H8O)",
                "Acetic acid (CH3COOH)",
                "Acetone (CH3COCH3)",
                "Acetonitrile (CH3CN)",
                "Anisole (C7H8O)",
                "Benzene (C6H6)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Diethyl ether (C4H10O)",
                "Dimethyl sulfoxide (DMSO, (CH3)2SO)",
                "Dimethylformamide (DMF, (CH3)2NC(O)H)",
                "Ethanol (EtOH, C2H5OH)",
                "Ethyl acetate (CH3COOCH2CH3)",
                "Glycerol (C3H8O3)",
                "Hexane (C6H14)",
                "Isopropanol (C3H8O)",
                "Methanol (MeOH, CH3OH)",
                "Methyl tert-butyl ether (MTBE, C5H12O)",
                "Propylene carbonate (C4H6O3)",
                "Pyridine (C5H5N)",
                "Tetrahydrofuran (THF, C4H8O)",
                "Toluene (C7H8)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Acetone (C3H6O)",
            "Acetonitrile (CH3CN)",
            "Benzene (C6H6)",
            "Carbon tetrachloride (CCl4)",
            "Chloroform (CHCl3)",
            "Cyclohexane (C6H12)",
            "Dichloromethane (DCM)",
            "Diethyl ether (C4H10O)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Ethyl acetate (CH3COOCH2CH3)",
            "Hexane (C6H14)",
            "Methyl tert-butyl ether (MTBE)",
            "N,N-Dimethylacetamide (DMA)",
            "Nitromethane (CH3NO2)",
            "Propylene carbonate (C4H6O3)",
            "Tetrahydrofuran (THF)",
            "Toluene (C7H8)",
            "Triethylamine (N(C2H5)3)",
            "Xylene (C8H10)",
            "1,4-Dioxane (C4H8O2)",
            "1,2-Dichloroethane (DCE)",
            "1,2-Dimethoxyethane (DME)",
            "Ethylene glycol dimethyl ether (Glyme)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (C3H6O)",
                "Acetonitrile (CH3CN)",
                "Benzene (C6H6)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Dichloromethane (DCM)",
                "Diethyl ether (C4H10O)",
                "Dimethylformamide (DMF)",
                "Dimethyl sulfoxide (DMSO)",
                "Ethyl acetate (CH3COOCH2CH3)",
                "Hexane (C6H14)",
                "Methyl tert-butyl ether (MTBE)",
                "N,N-Dimethylacetamide (DMA)",
                "Nitromethane (CH3NO2)",
                "Propylene carbonate (C4H6O3)",
                "Tetrahydrofuran (THF)",
                "Toluene (C7H8)",
                "Triethylamine (N(C2H5)3)",
                "Xylene (C8H10)",
                "1,4-Dioxane (C4H8O2)",
                "1,2-Dichloroethane (DCE)",
                "1,2-Dimethoxyethane (DME)",
                "Ethylene glycol dimethyl ether (Glyme)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dichloroethane (DCE)",
                "1,2-Dimethoxyethane (DME)",
                "1,4-Dioxane (C4H8O2)",
                "Acetone (C3H6O)",
                "Acetonitrile (CH3CN)",
                "Benzene (C6H6)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Dichloromethane (DCM)",
                "Diethyl ether (C4H10O)",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Ethyl acetate (CH3COOCH2CH3)",
                "Ethylene glycol dimethyl ether (Glyme)",
                "Hexane (C6H14)",
                "Methyl tert-butyl ether (MTBE)",
                "N,N-Dimethylacetamide (DMA)",
                "Nitromethane (CH3NO2)",
                "Propylene carbonate (C4H6O3)",
                "Tetrahydrofuran (THF)",
                "Toluene (C7H8)",
                "Triethylamine (N(C2H5)3)",
                "Xylene (C8H10)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Diethyl ether (Et2O)",
            "Dichloromethane (DCM)",
            "Tetrahydrofuran (THF)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Hexane (C6H14)",
            "Pentane (C5H12)",
            "Chloroform (CHCl3)",
            "Carbon tetrachloride (CCl4)",
            "Dimethylformamide (DMF)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetonitrile (CH3CN)",
            "Ethyl acetate (EtOAc)",
            "Acetone (C3H6O)",
            "Dioxane (C4H8O2)",
            "Carbon disulfide (CS2)",
            "1,2-Dichloroethane (DCE)",
            "Cyclohexane (C6H12)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Methyl tert-butyl ether (MTBE)",
            "Diisopropyl ether (DIPE)",
            "n-Butyl ether (EtO-Bu)",
            "1,4-Dioxane (1,4-Diox)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Diethyl ether (Et2O)",
                "Dichloromethane (DCM)",
                "Tetrahydrofuran (THF)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Hexane (C6H14)",
                "Pentane (C5H12)",
                "Chloroform (CHCl3)",
                "Carbon tetrachloride (CCl4)",
                "Dimethylformamide (DMF)",
                "Dimethyl sulfoxide (DMSO)",
                "Acetonitrile (CH3CN)",
                "Ethyl acetate (EtOAc)",
                "Acetone (C3H6O)",
                "Dioxane (C4H8O2)",
                "Carbon disulfide (CS2)",
                "1,2-Dichloroethane (DCE)",
                "Cyclohexane (C6H12)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Methyl tert-butyl ether (MTBE)",
                "Diisopropyl ether (DIPE)",
                "n-Butyl ether (EtO-Bu)",
                "1,4-Dioxane (1,4-Diox)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dichloroethane (DCE)",
                "1,4-Dioxane (1,4-Diox)",
                "Acetone (C3H6O)",
                "Acetonitrile (CH3CN)",
                "Benzene (C6H6)",
                "Carbon disulfide (CS2)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Dichloromethane (DCM)",
                "Diethyl ether (Et2O)",
                "Diisopropyl ether (DIPE)",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Dioxane (C4H8O2)",
                "Ethyl acetate (EtOAc)",
                "Heptane (C7H16)",
                "Hexane (C6H14)",
                "Methyl tert-butyl ether (MTBE)",
                "Octane (C8H18)",
                "Pentane (C5H12)",
                "Tetrahydrofuran (THF)",
                "Toluene (C7H8)",
                "n-Butyl ether (EtO-Bu)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Carbon tetrachloride (CCl4)",
            "Chloroform (CHCl3)",
            "Dichloromethane (DCM)",
            "Tetrachloroethylene (C2Cl4)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Cyclohexane (C6H12)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Xylene (C8H10)",
            "Ethylbenzene (C8H10)",
            "Diethyl ether (C4H10O)",
            "Tetrahydrofuran (THF)",
            "Dioxane (C4H8O2)",
            "Carbon disulfide (CS2)",
            "Acetonitrile (CH3CN)",
            "Dimethyl sulfoxide (DMSO)",
            "Dimethylformamide (DMF)",
            "Acetone (C3H6O)",
            "Methyl ethyl ketone (MEK)",
            "Tetramethylsilane (TMS)",
            "Hexamethylphosphoramide (HMPA)",
            "Dichloroethane (DCE)",
            "Trichloroethylene (TCE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Dichloromethane (DCM)",
                "Tetrachloroethylene (C2Cl4)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Cyclohexane (C6H12)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Xylene (C8H10)",
                "Ethylbenzene (C8H10)",
                "Diethyl ether (C4H10O)",
                "Tetrahydrofuran (THF)",
                "Dioxane (C4H8O2)",
                "Carbon disulfide (CS2)",
                "Acetonitrile (CH3CN)",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Acetone (C3H6O)",
                "Methyl ethyl ketone (MEK)",
                "Tetramethylsilane (TMS)",
                "Hexamethylphosphoramide (HMPA)",
                "Dichloroethane (DCE)",
                "Trichloroethylene (TCE)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetone (C3H6O)",
                "Acetonitrile (CH3CN)",
                "Benzene (C6H6)",
                "Carbon disulfide (CS2)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Dichloroethane (DCE)",
                "Dichloromethane (DCM)",
                "Diethyl ether (C4H10O)",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Dioxane (C4H8O2)",
                "Ethylbenzene (C8H10)",
                "Heptane (C7H16)",
                "Hexamethylphosphoramide (HMPA)",
                "Hexane (C6H14)",
                "Methyl ethyl ketone (MEK)",
                "Tetrachloroethylene (C2Cl4)",
                "Tetrahydrofuran (THF)",
                "Tetramethylsilane (TMS)",
                "Toluene (C7H8)",
                "Trichloroethylene (TCE)",
                "Xylene (C8H10)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Hexane (C6H14)",
            "Benzene (C6H6)",
            "Toluene (C7H8)",
            "Cyclohexane (C6H12)",
            "Chloroform (CHCl3)",
            "Dichloromethane (CH2Cl2)",
            "Carbon tetrachloride (CCl4)",
            "Diethyl ether (C4H10O)",
            "Acetone (C3H6O)",
            "Ethyl acetate (C4H8O2)",
            "Xylene (C8H10)",
            "Petroleum ether (C5H12 to C6H14)",
            "Decane (C10H22)",
            "Octane (C8H18)",
            "Heptane (C7H16)",
            "Nonane (C9H20)",
            "Benzyl alcohol (C7H8O)",
            "Dimethyl sulfoxide (DMSO) (C2H6OS)",
            "Nitrobenzene (C6H5NO2)",
            "Cyclic ethers (e.g., tetrahydrofuran)",
            "Propylene carbonate (C4H6O3)",
            "1,4-Dioxane (C4H8O2)",
            "Ethylene glycol dimethyl ether (C4H10O2)",
            "1,2-Dichloroethane (C2H4Cl2)",
            "Bromoform (CHBr3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexane (C6H14)",
                "Benzene (C6H6)",
                "Toluene (C7H8)",
                "Cyclohexane (C6H12)",
                "Chloroform (CHCl3)",
                "Dichloromethane (CH2Cl2)",
                "Carbon tetrachloride (CCl4)",
                "Diethyl ether (C4H10O)",
                "Acetone (C3H6O)",
                "Ethyl acetate (C4H8O2)",
                "Xylene (C8H10)",
                "Petroleum ether (C5H12 to C6H14)",
                "Decane (C10H22)",
                "Octane (C8H18)",
                "Heptane (C7H16)",
                "Nonane (C9H20)",
                "Benzyl alcohol (C7H8O)",
                "Dimethyl sulfoxide (DMSO) (C2H6OS)",
                "Nitrobenzene (C6H5NO2)",
                "Cyclic ethers (e.g., tetrahydrofuran)",
                "Propylene carbonate (C4H6O3)",
                "1,4-Dioxane (C4H8O2)",
                "Ethylene glycol dimethyl ether (C4H10O2)",
                "1,2-Dichloroethane (C2H4Cl2)"
            ],
            "mismatches": [
                "Bromoform (CHBr3)"
            ],
            "true_referents": [
                "1,2-Dichloroethane (C2H4Cl2)",
                "1,4-Dioxane (C4H8O2)",
                "Acetone (C3H6O)",
                "Benzene (C6H6)",
                "Benzyl alcohol (C7H8O)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclic ethers (e.g., tetrahydrofuran)",
                "Cyclohexane (C6H12)",
                "Decane (C10H22)",
                "Dichloromethane (CH2Cl2)",
                "Diethyl ether (C4H10O)",
                "Dimethyl sulfoxide (DMSO) (C2H6OS)",
                "Ethyl acetate (C4H8O2)",
                "Ethylene glycol dimethyl ether (C4H10O2)",
                "Heptane (C7H16)",
                "Hexane (C6H14)",
                "Nitrobenzene (C6H5NO2)",
                "Nonane (C9H20)",
                "Octane (C8H18)",
                "Petroleum ether (C5H12 to C6H14)",
                "Propylene carbonate (C4H6O3)",
                "Toluene (C7H8)",
                "Xylene (C8H10)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Dimethyl Sulfoxide (DMSO)",
            "Acetone",
            "Ethanol",
            "Methanol",
            "Isopropanol",
            "Toluene",
            "Xylene",
            "Chloroform",
            "Dichloromethane",
            "Diethyl Ether",
            "Hexane",
            "Pentane",
            "Benzene",
            "Cyclohexane",
            "Carbon Tetrachloride",
            "Pyridine",
            "Tetrahydrofuran (THF)",
            "Acetonitrile",
            "N,N-Dimethylformamide (DMF)",
            "Dioxane",
            "1,4-Dioxane",
            "Isobutyl Alcohol",
            "Methyl Ethyl Ketone (MEK)",
            "Ethylene Glycol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethyl Sulfoxide (DMSO)",
                "Acetone",
                "Ethanol",
                "Methanol",
                "Isopropanol",
                "Toluene",
                "Xylene",
                "Chloroform",
                "Dichloromethane",
                "Diethyl Ether",
                "Hexane",
                "Pentane",
                "Benzene",
                "Cyclohexane",
                "Carbon Tetrachloride",
                "Pyridine",
                "Tetrahydrofuran (THF)",
                "Acetonitrile",
                "N,N-Dimethylformamide (DMF)",
                "Dioxane",
                "1,4-Dioxane",
                "Isobutyl Alcohol",
                "Methyl Ethyl Ketone (MEK)",
                "Ethylene Glycol"
            ],
            "mismatches": [],
            "true_referents": [
                "1,4-Dioxane",
                "Acetone",
                "Acetonitrile",
                "Benzene",
                "Carbon Tetrachloride",
                "Chloroform",
                "Cyclohexane",
                "Dichloromethane",
                "Diethyl Ether",
                "Dimethyl Sulfoxide (DMSO)",
                "Dioxane",
                "Ethanol",
                "Ethylene Glycol",
                "Hexane",
                "Isobutyl Alcohol",
                "Isopropanol",
                "Methanol",
                "Methyl Ethyl Ketone (MEK)",
                "N,N-Dimethylformamide (DMF)",
                "Pentane",
                "Pyridine",
                "Tetrahydrofuran (THF)",
                "Toluene",
                "Xylene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "dimethyl sulfoxide (DMSO)",
            "acetone",
            "ethyl acetate",
            "methanol",
            "acetonitrile",
            "tetrahydrofuran (THF)",
            "diethyl ether",
            "chloroform",
            "dichloromethane",
            "benzene",
            "toluene",
            "hexane",
            "ethyl ether",
            "petroleum ether",
            "carbon tetrachloride",
            "ethyl acetate",
            "ethyl alcohol",
            "isopropyl alcohol",
            "butanol",
            "ethylbenzene",
            "methyl tert-butyl ether (MTBE)",
            "ethyl methyl ketone (EMK)",
            "diisopropyl ether",
            "methyl ethyl ketone (MEK)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "dimethyl sulfoxide (DMSO)",
                "acetone",
                "ethyl acetate",
                "methanol",
                "acetonitrile",
                "tetrahydrofuran (THF)",
                "diethyl ether",
                "chloroform",
                "dichloromethane",
                "benzene",
                "toluene",
                "hexane",
                "ethyl ether",
                "petroleum ether",
                "carbon tetrachloride",
                "ethyl alcohol",
                "isopropyl alcohol",
                "butanol",
                "ethylbenzene",
                "methyl tert-butyl ether (MTBE)",
                "diisopropyl ether",
                "methyl ethyl ketone (MEK)"
            ],
            "mismatches": [
                "ethyl methyl ketone (EMK)"
            ],
            "true_referents": [
                "acetone",
                "acetonitrile",
                "benzene",
                "butanol",
                "carbon tetrachloride",
                "chloroform",
                "dichloromethane",
                "diethyl ether",
                "diisopropyl ether",
                "dimethyl sulfoxide (DMSO)",
                "ethyl acetate",
                "ethyl alcohol",
                "ethyl ether",
                "ethyl methyl ketone (EMK)",
                "ethylbenzene",
                "hexane",
                "isopropyl alcohol",
                "methanol",
                "methyl ethyl ketone (MEK)",
                "methyl tert-butyl ether (MTBE)",
                "petroleum ether",
                "tetrahydrofuran (THF)",
                "toluene"
            ],
            "TP": 22,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Hexane",
            "Heptane",
            "Octane",
            "Nonane",
            "Decane",
            "Cyclohexane",
            "Benzene",
            "Toluene",
            "Xylene",
            "Carbon tetrachloride (CCl\u2084)",
            "Carbon disulfide (CS\u2082)",
            "Perfluorohexane",
            "Perfluorooctane",
            "Nitrobenzene",
            "Mesitylene",
            "Methylcyclohexane",
            "Styrene",
            "n-Butylbenzene",
            "Cumene",
            "Ethylbenzene",
            "p-Dichlorobenzene",
            "o-Dichlorobenzene",
            "m-Dichlorobenzene",
            "n-Decane"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexane",
                "Heptane",
                "Octane",
                "Nonane",
                "Decane",
                "Cyclohexane",
                "Benzene",
                "Toluene",
                "Xylene",
                "Carbon tetrachloride (CCl\u2084)",
                "Carbon disulfide (CS\u2082)",
                "Perfluorohexane",
                "Perfluorooctane",
                "Nitrobenzene",
                "Mesitylene",
                "Methylcyclohexane",
                "Styrene",
                "n-Butylbenzene",
                "Cumene",
                "Ethylbenzene",
                "p-Dichlorobenzene",
                "o-Dichlorobenzene",
                "m-Dichlorobenzene",
                "n-Decane"
            ],
            "mismatches": [],
            "true_referents": [
                "Benzene",
                "Carbon disulfide (CS\u2082)",
                "Carbon tetrachloride (CCl\u2084)",
                "Cumene",
                "Cyclohexane",
                "Decane",
                "Ethylbenzene",
                "Heptane",
                "Hexane",
                "Mesitylene",
                "Methylcyclohexane",
                "Nitrobenzene",
                "Nonane",
                "Octane",
                "Perfluorohexane",
                "Perfluorooctane",
                "Styrene",
                "Toluene",
                "Xylene",
                "m-Dichlorobenzene",
                "n-Butylbenzene",
                "n-Decane",
                "o-Dichlorobenzene",
                "p-Dichlorobenzene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Hexane",
            "Benzene",
            "Carbon tetrachloride",
            "Diethyl ether",
            "Toluene",
            "Chloroform",
            "Dichloromethane",
            "Carbon disulfide",
            "Tetrachloroethylene",
            "Cyclohexane",
            "Pentane",
            "Heptane",
            "Xylene",
            "1,4-Dioxane",
            "Tetrahydrofuran (THF)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetone",
            "Ethyl acetate",
            "Petroleum ether",
            "Methyl tert-butyl ether (MTBE)",
            "Diisopropyl ether",
            "Acetonitrile",
            "Dimethylformamide (DMF)",
            "N-Methyl-2-pyrrolidone (NMP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexane",
                "Benzene",
                "Carbon tetrachloride",
                "Diethyl ether",
                "Toluene",
                "Chloroform",
                "Dichloromethane",
                "Carbon disulfide",
                "Tetrachloroethylene",
                "Cyclohexane",
                "Pentane",
                "Heptane",
                "Xylene",
                "1,4-Dioxane",
                "Tetrahydrofuran (THF)",
                "Dimethyl sulfoxide (DMSO)",
                "Acetone",
                "Ethyl acetate",
                "Petroleum ether",
                "Methyl tert-butyl ether (MTBE)",
                "Acetonitrile",
                "Dimethylformamide (DMF)",
                "N-Methyl-2-pyrrolidone (NMP)"
            ],
            "mismatches": [
                "Diisopropyl ether"
            ],
            "true_referents": [
                "1,4-Dioxane",
                "Acetone",
                "Acetonitrile",
                "Benzene",
                "Carbon disulfide",
                "Carbon tetrachloride",
                "Chloroform",
                "Cyclohexane",
                "Dichloromethane",
                "Diethyl ether",
                "Diisopropyl ether",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Ethyl acetate",
                "Heptane",
                "Hexane",
                "Methyl tert-butyl ether (MTBE)",
                "N-Methyl-2-pyrrolidone (NMP)",
                "Pentane",
                "Petroleum ether",
                "Tetrachloroethylene",
                "Tetrahydrofuran (THF)",
                "Toluene",
                "Xylene"
            ],
            "TP": 23,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (EtOH)",
            "Methanol (MeOH)",
            "Acetone (CH3COCH3)",
            "Dimethyl sulfoxide (DMSO)",
            "Acetonitrile (CH3CN)",
            "Tetrahydrofuran (THF)",
            "Dichloromethane (DCM)",
            "Chloroform (CHCl3)",
            "Diethyl ether (Et2O)",
            "Ethyl acetate (EtOAc)",
            "Toluene (C6H5CH3)",
            "Benzene (C6H6)",
            "Hexane (C6H14)",
            "Cyclohexane (C6H12)",
            "Carbon tetrachloride (CCl4)",
            "Pyridine (C5H5N)",
            "Nitromethane (CH3NO2)",
            "Propylene carbonate (C4H6O3)",
            "Sulfolane (C4H8SO2)",
            "Glycerol (C3H8O3)",
            "Propylene glycol (C3H8O2)",
            "Ethylene glycol (C2H6O2)",
            "Dimethylformamide (DMF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water (H2O)",
                "Ethanol (EtOH)",
                "Methanol (MeOH)",
                "Acetone (CH3COCH3)",
                "Dimethyl sulfoxide (DMSO)",
                "Acetonitrile (CH3CN)",
                "Tetrahydrofuran (THF)",
                "Dichloromethane (DCM)",
                "Chloroform (CHCl3)",
                "Diethyl ether (Et2O)",
                "Ethyl acetate (EtOAc)",
                "Toluene (C6H5CH3)",
                "Benzene (C6H6)",
                "Hexane (C6H14)",
                "Cyclohexane (C6H12)",
                "Carbon tetrachloride (CCl4)",
                "Pyridine (C5H5N)",
                "Nitromethane (CH3NO2)",
                "Propylene carbonate (C4H6O3)",
                "Sulfolane (C4H8SO2)",
                "Glycerol (C3H8O3)",
                "Propylene glycol (C3H8O2)",
                "Ethylene glycol (C2H6O2)",
                "Dimethylformamide (DMF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetone (CH3COCH3)",
                "Acetonitrile (CH3CN)",
                "Benzene (C6H6)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (C6H12)",
                "Dichloromethane (DCM)",
                "Diethyl ether (Et2O)",
                "Dimethyl sulfoxide (DMSO)",
                "Dimethylformamide (DMF)",
                "Ethanol (EtOH)",
                "Ethyl acetate (EtOAc)",
                "Ethylene glycol (C2H6O2)",
                "Glycerol (C3H8O3)",
                "Hexane (C6H14)",
                "Methanol (MeOH)",
                "Nitromethane (CH3NO2)",
                "Propylene carbonate (C4H6O3)",
                "Propylene glycol (C3H8O2)",
                "Pyridine (C5H5N)",
                "Sulfolane (C4H8SO2)",
                "Tetrahydrofuran (THF)",
                "Toluene (C6H5CH3)",
                "Water (H2O)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "water (H2O)",
            "ammonia (NH3)",
            "methane (CH4)",
            "carbon dioxide (CO2)",
            "hydrogen sulfide (H2S)",
            "sulfur dioxide (SO2)",
            "nitrogen (N2)",
            "oxygen (O2)",
            "argon (Ar)",
            "neon (Ne)",
            "helium (He)",
            "krypton (Kr)",
            "xenon (Xe)",
            "radon (Rn)",
            "ozone (O3)",
            "carbon monoxide (CO)",
            "nitrous oxide (N2O)",
            "dinitrogen oxide (N2O)",
            "sulfur trioxide (SO3)",
            "silicon tetrafluoride (SiF4)",
            "boron trifluoride (BF3)",
            "tetrachloromethane (CCl4)",
            "hexachloroethane (C2Cl6)",
            "trichloronitromethane (CCl3NO2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "water (H2O)",
                "ammonia (NH3)",
                "methane (CH4)",
                "carbon dioxide (CO2)",
                "hydrogen sulfide (H2S)",
                "sulfur dioxide (SO2)",
                "nitrogen (N2)",
                "oxygen (O2)",
                "argon (Ar)",
                "neon (Ne)",
                "helium (He)",
                "krypton (Kr)",
                "xenon (Xe)",
                "radon (Rn)",
                "ozone (O3)",
                "carbon monoxide (CO)",
                "nitrous oxide (N2O)",
                "dinitrogen oxide (N2O)",
                "sulfur trioxide (SO3)",
                "silicon tetrafluoride (SiF4)",
                "boron trifluoride (BF3)",
                "tetrachloromethane (CCl4)",
                "hexachloroethane (C2Cl6)",
                "trichloronitromethane (CCl3NO2)"
            ],
            "mismatches": [],
            "true_referents": [
                "ammonia (NH3)",
                "argon (Ar)",
                "boron trifluoride (BF3)",
                "carbon dioxide (CO2)",
                "carbon monoxide (CO)",
                "dinitrogen oxide (N2O)",
                "helium (He)",
                "hexachloroethane (C2Cl6)",
                "hydrogen sulfide (H2S)",
                "krypton (Kr)",
                "methane (CH4)",
                "neon (Ne)",
                "nitrogen (N2)",
                "nitrous oxide (N2O)",
                "oxygen (O2)",
                "ozone (O3)",
                "radon (Rn)",
                "silicon tetrafluoride (SiF4)",
                "sulfur dioxide (SO2)",
                "sulfur trioxide (SO3)",
                "tetrachloromethane (CCl4)",
                "trichloronitromethane (CCl3NO2)",
                "water (H2O)",
                "xenon (Xe)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Acetone (ACE)",
            "Benzene (BEN)",
            "Carbon disulfide (CS2)",
            "Carbon tetrachloride (CCl4)",
            "Chloroform (CHCl3)",
            "Cyclohexane (CYC)",
            "Diethyl ether (DEE)",
            "Dimethyl sulfoxide (DMSO)",
            "Ethyl acetate (EA)",
            "Ethylene glycol (EG)",
            "Formamide (FA)",
            "Hexane (HEX)",
            "Isobutyl alcohol (IBA)",
            "Isopropyl alcohol (IPA)",
            "Methanol (MeOH)",
            "N,N-Dimethylformamide (DMF)",
            "N-Methylpyrrolidone (NMP)",
            "Pentane (PEN)",
            "Pyridine (PYR)",
            "Tetrahydrofuran (THF)",
            "Toluene (TOL)",
            "Triethylamine (TEA)",
            "Xylene (XYL)",
            "Dimethyl ether (DME)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetone (ACE)",
                "Benzene (BEN)",
                "Carbon disulfide (CS2)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (CYC)",
                "Diethyl ether (DEE)",
                "Dimethyl sulfoxide (DMSO)",
                "Ethyl acetate (EA)",
                "Ethylene glycol (EG)",
                "Formamide (FA)",
                "Hexane (HEX)",
                "Isobutyl alcohol (IBA)",
                "Isopropyl alcohol (IPA)",
                "Methanol (MeOH)",
                "N,N-Dimethylformamide (DMF)",
                "N-Methylpyrrolidone (NMP)",
                "Pentane (PEN)",
                "Pyridine (PYR)",
                "Tetrahydrofuran (THF)",
                "Toluene (TOL)",
                "Triethylamine (TEA)",
                "Xylene (XYL)",
                "Dimethyl ether (DME)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetone (ACE)",
                "Benzene (BEN)",
                "Carbon disulfide (CS2)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Cyclohexane (CYC)",
                "Diethyl ether (DEE)",
                "Dimethyl ether (DME)",
                "Dimethyl sulfoxide (DMSO)",
                "Ethyl acetate (EA)",
                "Ethylene glycol (EG)",
                "Formamide (FA)",
                "Hexane (HEX)",
                "Isobutyl alcohol (IBA)",
                "Isopropyl alcohol (IPA)",
                "Methanol (MeOH)",
                "N,N-Dimethylformamide (DMF)",
                "N-Methylpyrrolidone (NMP)",
                "Pentane (PEN)",
                "Pyridine (PYR)",
                "Tetrahydrofuran (THF)",
                "Toluene (TOL)",
                "Triethylamine (TEA)",
                "Xylene (XYL)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Water (H2O)",
            "Ethanol (C2H5OH)",
            "Methanol (CH3OH)",
            "Acetone (C3H6O)",
            "Dichloromethane (CH2Cl2)",
            "Tetrahydrofuran (C4H8O)",
            "Dimethylformamide (C3H7NO)",
            "N-Methyl-2-pyrrolidone (C5H9NO)",
            "Acetonitrile (C2H3N)",
            "Pyridine (C5H5N)",
            "Triethylamine (C6H15N)",
            "N,N-Dimethylformamide (C3H7NO)",
            "1,4-Dioxane (C4H8O2)",
            "1,2-Dimethoxyethane (C4H10O2)",
            "Diethyl ether (C4H10O)",
            "Toluene (C6H5CH3)",
            "Xylene (C8H10)",
            "Benzene (C6H6)",
            "Chloroform (CHCl3)",
            "Carbon tetrachloride (CCl4)",
            "Hexane (C6H14)",
            "Heptane (C7H16)",
            "Octane (C8H18)",
            "Nonane (C9H20)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Water (H2O)",
                "Ethanol (C2H5OH)",
                "Methanol (CH3OH)",
                "Acetone (C3H6O)",
                "Dichloromethane (CH2Cl2)",
                "Tetrahydrofuran (C4H8O)",
                "Dimethylformamide (C3H7NO)",
                "N-Methyl-2-pyrrolidone (C5H9NO)",
                "Acetonitrile (C2H3N)",
                "Pyridine (C5H5N)",
                "Triethylamine (C6H15N)",
                "N,N-Dimethylformamide (C3H7NO)",
                "1,4-Dioxane (C4H8O2)",
                "1,2-Dimethoxyethane (C4H10O2)",
                "Diethyl ether (C4H10O)",
                "Toluene (C6H5CH3)",
                "Xylene (C8H10)",
                "Benzene (C6H6)",
                "Chloroform (CHCl3)",
                "Carbon tetrachloride (CCl4)",
                "Hexane (C6H14)",
                "Heptane (C7H16)",
                "Octane (C8H18)",
                "Nonane (C9H20)"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dimethoxyethane (C4H10O2)",
                "1,4-Dioxane (C4H8O2)",
                "Acetone (C3H6O)",
                "Acetonitrile (C2H3N)",
                "Benzene (C6H6)",
                "Carbon tetrachloride (CCl4)",
                "Chloroform (CHCl3)",
                "Dichloromethane (CH2Cl2)",
                "Diethyl ether (C4H10O)",
                "Dimethylformamide (C3H7NO)",
                "Ethanol (C2H5OH)",
                "Heptane (C7H16)",
                "Hexane (C6H14)",
                "Methanol (CH3OH)",
                "N,N-Dimethylformamide (C3H7NO)",
                "N-Methyl-2-pyrrolidone (C5H9NO)",
                "Nonane (C9H20)",
                "Octane (C8H18)",
                "Pyridine (C5H5N)",
                "Tetrahydrofuran (C4H8O)",
                "Toluene (C6H5CH3)",
                "Triethylamine (C6H15N)",
                "Water (H2O)",
                "Xylene (C8H10)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": [
            "Dimethyl sulfoxide (DMSO)",
            "Acetone",
            "Dichloromethane",
            "Tetrahydrofuran (THF)",
            "Dioxane",
            "Benzene",
            "Toluene",
            "Xylene",
            "Ether",
            "1,2-Dimethoxyethane",
            "1,4-Dioxane",
            "Cyclohexane",
            "Hexane",
            "Pentane",
            "Heptane",
            "Octane",
            "Nonane",
            "Decane",
            "Undecane",
            "Dodecane",
            "Tridecane",
            "Tetradecane",
            "Pentadecane",
            "Hexadecane"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Dimethyl sulfoxide (DMSO)",
                "Acetone",
                "Dichloromethane",
                "Tetrahydrofuran (THF)",
                "Dioxane",
                "Benzene",
                "Toluene",
                "Xylene",
                "Ether",
                "1,2-Dimethoxyethane",
                "1,4-Dioxane",
                "Cyclohexane",
                "Hexane",
                "Pentane",
                "Heptane",
                "Octane",
                "Nonane",
                "Decane",
                "Undecane",
                "Dodecane",
                "Tridecane",
                "Tetradecane",
                "Pentadecane",
                "Hexadecane"
            ],
            "mismatches": [],
            "true_referents": [
                "1,2-Dimethoxyethane",
                "1,4-Dioxane",
                "Acetone",
                "Benzene",
                "Cyclohexane",
                "Decane",
                "Dichloromethane",
                "Dimethyl sulfoxide (DMSO)",
                "Dioxane",
                "Dodecane",
                "Ether",
                "Heptane",
                "Hexadecane",
                "Hexane",
                "Nonane",
                "Octane",
                "Pentadecane",
                "Pentane",
                "Tetradecane",
                "Tetrahydrofuran (THF)",
                "Toluene",
                "Tridecane",
                "Undecane",
                "Xylene"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "aprotic solvent",
        "domain": "chemistry",
        "response": "{\n    \"referent\": \"ethanol\"\n},\n{\n    \"referent\": \"diethyl ether\"\n},\n{\n    \"referent\": \"acetone\"\n}",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]