[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Zinc ion (Zn^2+)",
            "Iron ion (Fe^2+)",
            "Copper ion (Cu^2+)",
            "Calcium ion (Ca^2+)",
            "Magnesium ion (Mg^2+)",
            "Manganese ion (Mn^2+)",
            "Nickel ion (Ni^2+)",
            "Cobalt ion (Co^2+)",
            "Mercury ion (Hg^2+)",
            "Silver ion (Ag^+)",
            "Potassium ion (K^+)",
            "Sodium ion (Na^+)",
            "Lithium ion (Li^+)",
            "Barium ion (Ba^2+)",
            "Strontium ion (Sr^2+)",
            "Beryllium ion (Be^2+)",
            "Aluminum ion (Al^3+)",
            "Scandium ion (Sc^3+)",
            "Titanium ion (Ti^4+)",
            "Vanadium ion (V^2+)",
            "Vanadium ion (V^3+)",
            "Chromium ion (Cr^2+)",
            "Chromium ion (Cr^3+)",
            "Molybdenum ion (Mo^2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Zinc ion (Zn^2+)",
                "Iron ion (Fe^2+)",
                "Copper ion (Cu^2+)",
                "Calcium ion (Ca^2+)",
                "Magnesium ion (Mg^2+)",
                "Manganese ion (Mn^2+)",
                "Nickel ion (Ni^2+)",
                "Cobalt ion (Co^2+)",
                "Mercury ion (Hg^2+)",
                "Silver ion (Ag^+)",
                "Potassium ion (K^+)",
                "Sodium ion (Na^+)",
                "Lithium ion (Li^+)",
                "Barium ion (Ba^2+)",
                "Strontium ion (Sr^2+)",
                "Beryllium ion (Be^2+)",
                "Aluminum ion (Al^3+)",
                "Scandium ion (Sc^3+)",
                "Titanium ion (Ti^4+)",
                "Vanadium ion (V^2+)",
                "Vanadium ion (V^3+)",
                "Chromium ion (Cr^2+)",
                "Chromium ion (Cr^3+)",
                "Molybdenum ion (Mo^2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminum ion (Al^3+)",
                "Barium ion (Ba^2+)",
                "Beryllium ion (Be^2+)",
                "Calcium ion (Ca^2+)",
                "Chromium ion (Cr^2+)",
                "Chromium ion (Cr^3+)",
                "Cobalt ion (Co^2+)",
                "Copper ion (Cu^2+)",
                "Iron ion (Fe^2+)",
                "Lithium ion (Li^+)",
                "Magnesium ion (Mg^2+)",
                "Manganese ion (Mn^2+)",
                "Mercury ion (Hg^2+)",
                "Molybdenum ion (Mo^2+)",
                "Nickel ion (Ni^2+)",
                "Potassium ion (K^+)",
                "Scandium ion (Sc^3+)",
                "Silver ion (Ag^+)",
                "Sodium ion (Na^+)",
                "Strontium ion (Sr^2+)",
                "Titanium ion (Ti^4+)",
                "Vanadium ion (V^2+)",
                "Vanadium ion (V^3+)",
                "Zinc ion (Zn^2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca^2+)",
            "Magnesium ion (Mg^2+)",
            "Iron(II) ion (Fe^2+)",
            "Copper(I) ion (Cu^+)",
            "Zinc ion (Zn^2+)",
            "Nickel(II) ion (Ni^2+)",
            "Cobalt(II) ion (Co^2+)",
            "Strontium ion (Sr^2+)",
            "Barium ion (Ba^2+)",
            "Manganese(II) ion (Mn^2+)",
            "Cadmium ion (Cd^2+)",
            "Lead(II) ion (Pb^2+)",
            "Beryllium ion (Be^2+)",
            "Radium ion (Ra^2+)",
            "Titanium(II) ion (Ti^2+)",
            "Vanadium(II) ion (V^2+)",
            "Chromium(II) ion (Cr^2+)",
            "Hafnium(II) ion (Hf^2+)",
            "Tungsten(II) ion (W^2+)",
            "Ruthenium(II) ion (Ru^2+)",
            "Osmium(II) ion (Os^2+)",
            "Iridium(II) ion (Ir^2+)",
            "Platinum(II) ion (Pt^2+)",
            "Gold(I) ion (Au^+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca^2+)",
                "Magnesium ion (Mg^2+)",
                "Iron(II) ion (Fe^2+)",
                "Copper(I) ion (Cu^+)",
                "Zinc ion (Zn^2+)",
                "Nickel(II) ion (Ni^2+)",
                "Cobalt(II) ion (Co^2+)",
                "Strontium ion (Sr^2+)",
                "Barium ion (Ba^2+)",
                "Manganese(II) ion (Mn^2+)",
                "Cadmium ion (Cd^2+)",
                "Lead(II) ion (Pb^2+)",
                "Beryllium ion (Be^2+)",
                "Radium ion (Ra^2+)",
                "Titanium(II) ion (Ti^2+)",
                "Vanadium(II) ion (V^2+)",
                "Chromium(II) ion (Cr^2+)",
                "Hafnium(II) ion (Hf^2+)",
                "Tungsten(II) ion (W^2+)",
                "Ruthenium(II) ion (Ru^2+)",
                "Osmium(II) ion (Os^2+)",
                "Iridium(II) ion (Ir^2+)",
                "Platinum(II) ion (Pt^2+)",
                "Gold(I) ion (Au^+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba^2+)",
                "Beryllium ion (Be^2+)",
                "Cadmium ion (Cd^2+)",
                "Calcium ion (Ca^2+)",
                "Chromium(II) ion (Cr^2+)",
                "Cobalt(II) ion (Co^2+)",
                "Copper(I) ion (Cu^+)",
                "Gold(I) ion (Au^+)",
                "Hafnium(II) ion (Hf^2+)",
                "Iridium(II) ion (Ir^2+)",
                "Iron(II) ion (Fe^2+)",
                "Lead(II) ion (Pb^2+)",
                "Magnesium ion (Mg^2+)",
                "Manganese(II) ion (Mn^2+)",
                "Nickel(II) ion (Ni^2+)",
                "Osmium(II) ion (Os^2+)",
                "Platinum(II) ion (Pt^2+)",
                "Radium ion (Ra^2+)",
                "Ruthenium(II) ion (Ru^2+)",
                "Strontium ion (Sr^2+)",
                "Titanium(II) ion (Ti^2+)",
                "Tungsten(II) ion (W^2+)",
                "Vanadium(II) ion (V^2+)",
                "Zinc ion (Zn^2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Beryllium ion (Be2+)",
            "Magnesium ion (Mg2+)",
            "Calcium ion (Ca2+)",
            "Strontium ion (Sr2+)",
            "Barium ion (Ba2+)",
            "Radium ion (Ra2+)",
            "Zinc ion (Zn2+)",
            "Cadmium ion (Cd2+)",
            "Mercury ion (Hg2+)",
            "Beryllium ion (Be2+)",
            "Chromium ion (Cr2+)",
            "Nickel ion (Ni2+)",
            "Copper ion (Cu2+)",
            "Iron ion (Fe2+)",
            "Cobalt ion (Co2+)",
            "Manganese ion (Mn2+)",
            "Titanium ion (Ti2+)",
            "Vanadium ion (V2+)",
            "Tin ion (Sn2+)",
            "Lead ion (Pb2+)",
            "Plutonium ion (Pu2+)",
            "Uranium ion (U2+)",
            "Neptunium ion (Np2+)",
            "Americium ion (Am2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Beryllium ion (Be2+)",
                "Magnesium ion (Mg2+)",
                "Calcium ion (Ca2+)",
                "Strontium ion (Sr2+)",
                "Barium ion (Ba2+)",
                "Radium ion (Ra2+)",
                "Zinc ion (Zn2+)",
                "Cadmium ion (Cd2+)",
                "Mercury ion (Hg2+)",
                "Chromium ion (Cr2+)",
                "Nickel ion (Ni2+)",
                "Copper ion (Cu2+)",
                "Iron ion (Fe2+)",
                "Cobalt ion (Co2+)",
                "Manganese ion (Mn2+)",
                "Titanium ion (Ti2+)",
                "Vanadium ion (V2+)",
                "Tin ion (Sn2+)",
                "Lead ion (Pb2+)",
                "Plutonium ion (Pu2+)",
                "Uranium ion (U2+)",
                "Neptunium ion (Np2+)",
                "Americium ion (Am2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Americium ion (Am2+)",
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium ion (Cr2+)",
                "Cobalt ion (Co2+)",
                "Copper ion (Cu2+)",
                "Iron ion (Fe2+)",
                "Lead ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese ion (Mn2+)",
                "Mercury ion (Hg2+)",
                "Neptunium ion (Np2+)",
                "Nickel ion (Ni2+)",
                "Plutonium ion (Pu2+)",
                "Radium ion (Ra2+)",
                "Strontium ion (Sr2+)",
                "Tin ion (Sn2+)",
                "Titanium ion (Ti2+)",
                "Uranium ion (U2+)",
                "Vanadium ion (V2+)",
                "Zinc ion (Zn2+)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Magnesium ion (Mg\u00b2\u207a)",
            "Calcium ion (Ca\u00b2\u207a)",
            "Iron(II) ion (Fe\u00b2\u207a)",
            "Zinc ion (Zn\u00b2\u207a)",
            "Strontium ion (Sr\u00b2\u207a)",
            "Barium ion (Ba\u00b2\u207a)",
            "Copper(II) ion (Cu\u00b2\u207a)",
            "Nickel(II) ion (Ni\u00b2\u207a)",
            "Manganese(II) ion (Mn\u00b2\u207a)",
            "Cobalt(II) ion (Co\u00b2\u207a)",
            "Potassium ion (K\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
            "Sodium ion (Na\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
            "Aluminum ion (Al\u00b3\u207a) - *formed via loss of three electrons, but represents the concept of metal cation formation*",
            "Titanium(II) ion (Ti\u00b2\u207a)",
            "Vanadium(II) ion (V\u00b2\u207a)",
            "Chromium(II) ion (Cr\u00b2\u207a)",
            "Cadmium ion (Cd\u00b2\u207a)",
            "Lead(II) ion (Pb\u00b2\u207a)",
            "Tin(II) ion (Sn\u00b2\u207a)",
            "Mercury(II) ion (Hg\u00b2\u207a)",
            "Silver ion (Ag\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
            "Platinum(II) ion (Pt\u00b2\u207a)",
            "Palladium(II) ion (Pd\u00b2\u207a)",
            "Rhodium(II) ion (Rh\u00b2\u207a)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Magnesium ion (Mg\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Barium ion (Ba\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Potassium ion (K\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Sodium ion (Na\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Aluminum ion (Al\u00b3\u207a) - *formed via loss of three electrons, but represents the concept of metal cation formation*",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Silver ion (Ag\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Platinum(II) ion (Pt\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)",
                "Rhodium(II) ion (Rh\u00b2\u207a)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminum ion (Al\u00b3\u207a) - *formed via loss of three electrons, but represents the concept of metal cation formation*",
                "Barium ion (Ba\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)",
                "Platinum(II) ion (Pt\u00b2\u207a)",
                "Potassium ion (K\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Rhodium(II) ion (Rh\u00b2\u207a)",
                "Silver ion (Ag\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Sodium ion (Na\u207a) - *formed via loss of single electron, but represents the concept of metal cation formation*",
                "Strontium ion (Sr\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca2+)",
            "Magnesium ion (Mg2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Zinc ion (Zn2+)",
            "Cadmium ion (Cd2+)",
            "Cobalt ion (Co2+)",
            "Nickel ion (Ni2+)",
            "Copper(II) ion (Cu2+)",
            "Iron(II) ion (Fe2+)",
            "Manganese(II) ion (Mn2+)",
            "Mercury(II) ion (Hg2+)",
            "Lead(II) ion (Pb2+)",
            "Tin(II) ion (Sn2+)",
            "Chromium(II) ion (Cr2+)",
            "Titanium(II) ion (Ti2+)",
            "Vanadium(II) ion (V2+)",
            "Palladium(II) ion (Pd2+)",
            "Platinum(II) ion (Pt2+)",
            "Beryllium ion (Be2+)",
            "Radium ion (Ra2+)",
            "Thallium(II) ion (Tl2+)",
            "Osmium(II) ion (Os2+)",
            "Ruthenium(II) ion (Ru2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca2+)",
                "Magnesium ion (Mg2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Zinc ion (Zn2+)",
                "Cadmium ion (Cd2+)",
                "Cobalt ion (Co2+)",
                "Nickel ion (Ni2+)",
                "Copper(II) ion (Cu2+)",
                "Iron(II) ion (Fe2+)",
                "Manganese(II) ion (Mn2+)",
                "Mercury(II) ion (Hg2+)",
                "Lead(II) ion (Pb2+)",
                "Tin(II) ion (Sn2+)",
                "Chromium(II) ion (Cr2+)",
                "Titanium(II) ion (Ti2+)",
                "Vanadium(II) ion (V2+)",
                "Palladium(II) ion (Pd2+)",
                "Platinum(II) ion (Pt2+)",
                "Beryllium ion (Be2+)",
                "Radium ion (Ra2+)",
                "Thallium(II) ion (Tl2+)",
                "Osmium(II) ion (Os2+)",
                "Ruthenium(II) ion (Ru2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium(II) ion (Cr2+)",
                "Cobalt ion (Co2+)",
                "Copper(II) ion (Cu2+)",
                "Iron(II) ion (Fe2+)",
                "Lead(II) ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese(II) ion (Mn2+)",
                "Mercury(II) ion (Hg2+)",
                "Nickel ion (Ni2+)",
                "Osmium(II) ion (Os2+)",
                "Palladium(II) ion (Pd2+)",
                "Platinum(II) ion (Pt2+)",
                "Radium ion (Ra2+)",
                "Ruthenium(II) ion (Ru2+)",
                "Strontium ion (Sr2+)",
                "Thallium(II) ion (Tl2+)",
                "Tin(II) ion (Sn2+)",
                "Titanium(II) ion (Ti2+)",
                "Vanadium(II) ion (V2+)",
                "Zinc ion (Zn2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca\u00b2+)",
            "Magnesium ion (Mg\u00b2+)",
            "Barium ion (Ba\u00b2+)",
            "Strontium ion (Sr\u00b2+)",
            "Lead(II) ion (Pb\u00b2+)",
            "Copper(II) ion (Cu\u00b2+)",
            "Zinc ion (Zn\u00b2+)",
            "Cadmium ion (Cd\u00b2+)",
            "Iron(II) ion (Fe\u00b2+)",
            "Nickel(II) ion (Ni\u00b2+)",
            "Cobalt(II) ion (Co\u00b2+)",
            "Tin(II) ion (Sn\u00b2+)",
            "Mercury(II) ion (Hg\u00b2+)",
            "Chromium(II) ion (Cr\u00b2+)",
            "Manganese(II) ion (Mn\u00b2+)",
            "Titanium(II) ion (Ti\u00b2+)",
            "Ruthenium(II) ion (Ru\u00b2+)",
            "Palladium(II) ion (Pd\u00b2+)",
            "Radium ion (Ra\u00b2+)",
            "Thallium(II) ion (Tl\u00b2+)",
            "Gold(II) ion (Au\u00b2+)",
            "Silver(II) ion (Ag\u00b2+)",
            "Vanadium(II) ion (V\u00b2+)",
            "Lithium ion (Li\u00b2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca\u00b2+)",
                "Magnesium ion (Mg\u00b2+)",
                "Barium ion (Ba\u00b2+)",
                "Strontium ion (Sr\u00b2+)",
                "Lead(II) ion (Pb\u00b2+)",
                "Copper(II) ion (Cu\u00b2+)",
                "Zinc ion (Zn\u00b2+)",
                "Cadmium ion (Cd\u00b2+)",
                "Iron(II) ion (Fe\u00b2+)",
                "Nickel(II) ion (Ni\u00b2+)",
                "Cobalt(II) ion (Co\u00b2+)",
                "Tin(II) ion (Sn\u00b2+)",
                "Mercury(II) ion (Hg\u00b2+)",
                "Chromium(II) ion (Cr\u00b2+)",
                "Manganese(II) ion (Mn\u00b2+)",
                "Titanium(II) ion (Ti\u00b2+)",
                "Ruthenium(II) ion (Ru\u00b2+)",
                "Palladium(II) ion (Pd\u00b2+)",
                "Radium ion (Ra\u00b2+)",
                "Thallium(II) ion (Tl\u00b2+)",
                "Gold(II) ion (Au\u00b2+)",
                "Silver(II) ion (Ag\u00b2+)",
                "Vanadium(II) ion (V\u00b2+)",
                "Lithium ion (Li\u00b2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba\u00b2+)",
                "Cadmium ion (Cd\u00b2+)",
                "Calcium ion (Ca\u00b2+)",
                "Chromium(II) ion (Cr\u00b2+)",
                "Cobalt(II) ion (Co\u00b2+)",
                "Copper(II) ion (Cu\u00b2+)",
                "Gold(II) ion (Au\u00b2+)",
                "Iron(II) ion (Fe\u00b2+)",
                "Lead(II) ion (Pb\u00b2+)",
                "Lithium ion (Li\u00b2+)",
                "Magnesium ion (Mg\u00b2+)",
                "Manganese(II) ion (Mn\u00b2+)",
                "Mercury(II) ion (Hg\u00b2+)",
                "Nickel(II) ion (Ni\u00b2+)",
                "Palladium(II) ion (Pd\u00b2+)",
                "Radium ion (Ra\u00b2+)",
                "Ruthenium(II) ion (Ru\u00b2+)",
                "Silver(II) ion (Ag\u00b2+)",
                "Strontium ion (Sr\u00b2+)",
                "Thallium(II) ion (Tl\u00b2+)",
                "Tin(II) ion (Sn\u00b2+)",
                "Titanium(II) ion (Ti\u00b2+)",
                "Vanadium(II) ion (V\u00b2+)",
                "Zinc ion (Zn\u00b2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca2+)",
            "Magnesium ion (Mg2+)",
            "Iron(II) ion (Fe2+)",
            "Zinc ion (Zn2+)",
            "Copper(II) ion (Cu2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Lead(II) ion (Pb2+)",
            "Nickel(II) ion (Ni2+)",
            "Cobalt(II) ion (Co2+)",
            "Manganese(II) ion (Mn2+)",
            "Cadmium ion (Cd2+)",
            "Mercury(II) ion (Hg2+)",
            "Chromium(II) ion (Cr2+)",
            "Tin(II) ion (Sn2+)",
            "Silver ion (Ag+)",
            "Gold(III) ion (Au3+)",
            "Aluminium ion (Al3+)",
            "Chromium(III) ion (Cr3+)",
            "Iron(III) ion (Fe3+)",
            "Cobalt(III) ion (Co3+)",
            "Manganese(III) ion (Mn3+)",
            "Ruthenium(III) ion (Ru3+)",
            "Rhodium(III) ion (Rh3+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca2+)",
                "Magnesium ion (Mg2+)",
                "Iron(II) ion (Fe2+)",
                "Zinc ion (Zn2+)",
                "Copper(II) ion (Cu2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Lead(II) ion (Pb2+)",
                "Nickel(II) ion (Ni2+)",
                "Cobalt(II) ion (Co2+)",
                "Manganese(II) ion (Mn2+)",
                "Cadmium ion (Cd2+)",
                "Mercury(II) ion (Hg2+)",
                "Chromium(II) ion (Cr2+)",
                "Tin(II) ion (Sn2+)",
                "Silver ion (Ag+)",
                "Gold(III) ion (Au3+)",
                "Aluminium ion (Al3+)",
                "Chromium(III) ion (Cr3+)",
                "Iron(III) ion (Fe3+)",
                "Cobalt(III) ion (Co3+)",
                "Manganese(III) ion (Mn3+)",
                "Ruthenium(III) ion (Ru3+)",
                "Rhodium(III) ion (Rh3+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminium ion (Al3+)",
                "Barium ion (Ba2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium(II) ion (Cr2+)",
                "Chromium(III) ion (Cr3+)",
                "Cobalt(II) ion (Co2+)",
                "Cobalt(III) ion (Co3+)",
                "Copper(II) ion (Cu2+)",
                "Gold(III) ion (Au3+)",
                "Iron(II) ion (Fe2+)",
                "Iron(III) ion (Fe3+)",
                "Lead(II) ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese(II) ion (Mn2+)",
                "Manganese(III) ion (Mn3+)",
                "Mercury(II) ion (Hg2+)",
                "Nickel(II) ion (Ni2+)",
                "Rhodium(III) ion (Rh3+)",
                "Ruthenium(III) ion (Ru3+)",
                "Silver ion (Ag+)",
                "Strontium ion (Sr2+)",
                "Tin(II) ion (Sn2+)",
                "Zinc ion (Zn2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca2+)",
            "Magnesium ion (Mg2+)",
            "Iron(II) ion (Fe2+)",
            "Zinc ion (Zn2+)",
            "Copper(II) ion (Cu2+)",
            "Lead(II) ion (Pb2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Manganese(II) ion (Mn2+)",
            "Cadmium ion (Cd2+)",
            "Nickel(II) ion (Ni2+)",
            "Cobalt(II) ion (Co2+)",
            "Mercury(II) ion (Hg2+)",
            "Silver ion (Ag+)",
            "Beryllium ion (Be2+)",
            "Aluminum ion (Al3+)",
            "Lithium ion (Li+)",
            "Sodium ion (Na+)",
            "Potassium ion (K+)",
            "Rubidium ion (Rb+)",
            "Cesium ion (Cs+)",
            "Silver ion (Ag+)",
            "Gold ion (Au+)",
            "Platinum ion (Pt2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca2+)",
                "Magnesium ion (Mg2+)",
                "Iron(II) ion (Fe2+)",
                "Zinc ion (Zn2+)",
                "Copper(II) ion (Cu2+)",
                "Lead(II) ion (Pb2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Manganese(II) ion (Mn2+)",
                "Cadmium ion (Cd2+)",
                "Nickel(II) ion (Ni2+)",
                "Cobalt(II) ion (Co2+)",
                "Mercury(II) ion (Hg2+)",
                "Silver ion (Ag+)",
                "Beryllium ion (Be2+)",
                "Aluminum ion (Al3+)",
                "Lithium ion (Li+)",
                "Sodium ion (Na+)",
                "Potassium ion (K+)",
                "Rubidium ion (Rb+)",
                "Cesium ion (Cs+)",
                "Gold ion (Au+)",
                "Platinum ion (Pt2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminum ion (Al3+)",
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Cesium ion (Cs+)",
                "Cobalt(II) ion (Co2+)",
                "Copper(II) ion (Cu2+)",
                "Gold ion (Au+)",
                "Iron(II) ion (Fe2+)",
                "Lead(II) ion (Pb2+)",
                "Lithium ion (Li+)",
                "Magnesium ion (Mg2+)",
                "Manganese(II) ion (Mn2+)",
                "Mercury(II) ion (Hg2+)",
                "Nickel(II) ion (Ni2+)",
                "Platinum ion (Pt2+)",
                "Potassium ion (K+)",
                "Rubidium ion (Rb+)",
                "Silver ion (Ag+)",
                "Sodium ion (Na+)",
                "Strontium ion (Sr2+)",
                "Zinc ion (Zn2+)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca\u00b2\u207a)",
            "Magnesium ion (Mg\u00b2\u207a)",
            "Iron(II) ion (Fe\u00b2\u207a)",
            "Zinc ion (Zn\u00b2\u207a)",
            "Copper(II) ion (Cu\u00b2\u207a)",
            "Cobalt(II) ion (Co\u00b2\u207a)",
            "Manganese(II) ion (Mn\u00b2\u207a)",
            "Lead(II) ion (Pb\u00b2\u207a)",
            "Cadmium ion (Cd\u00b2\u207a)",
            "Barium ion (Ba\u00b2\u207a)",
            "Strontium ion (Sr\u00b2\u207a)",
            "Titanium(II) ion (Ti\u00b2\u207a)",
            "Chromium(II) ion (Cr\u00b2\u207a)",
            "Vanadium(II) ion (V\u00b2\u207a)",
            "Mercury(II) ion (Hg\u00b2\u207a)",
            "Beryllium ion (Be\u00b2\u207a)",
            "Tin(II) ion (Sn\u00b2\u207a)",
            "Europium(II) ion (Eu\u00b2\u207a)",
            "Samarium(II) ion (Sm\u00b2\u207a)",
            "Gadolinium(II) ion (Gd\u00b2\u207a)",
            "Molybdenum(II) ion (Mo\u00b2\u207a)",
            "Nickel ion (Ni\u00b2\u207a)",
            "Ruthenium(II) ion (Ru\u00b2\u207a)",
            "Palladium(II) ion (Pd\u00b2\u207a)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Barium ion (Ba\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Europium(II) ion (Eu\u00b2\u207a)",
                "Samarium(II) ion (Sm\u00b2\u207a)",
                "Gadolinium(II) ion (Gd\u00b2\u207a)",
                "Molybdenum(II) ion (Mo\u00b2\u207a)",
                "Nickel ion (Ni\u00b2\u207a)",
                "Ruthenium(II) ion (Ru\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba\u00b2\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Europium(II) ion (Eu\u00b2\u207a)",
                "Gadolinium(II) ion (Gd\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Molybdenum(II) ion (Mo\u00b2\u207a)",
                "Nickel ion (Ni\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)",
                "Ruthenium(II) ion (Ru\u00b2\u207a)",
                "Samarium(II) ion (Sm\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca\u00b2\u207a)",
            "Magnesium ion (Mg\u00b2\u207a)",
            "Barium ion (Ba\u00b2\u207a)",
            "Strontium ion (Sr\u00b2\u207a)",
            "Zinc ion (Zn\u00b2\u207a)",
            "Iron(II) ion (Fe\u00b2\u207a)",
            "Copper(II) ion (Cu\u00b2\u207a)",
            "Nickel(II) ion (Ni\u00b2\u207a)",
            "Cobalt(II) ion (Co\u00b2\u207a)",
            "Lead(II) ion (Pb\u00b2\u207a)",
            "Manganese(II) ion (Mn\u00b2\u207a)",
            "Tin(II) ion (Sn\u00b2\u207a)",
            "Mercury(II) ion (Hg\u00b2\u207a)",
            "Cadmium ion (Cd\u00b2\u207a)",
            "Beryllium ion (Be\u00b2\u207a)",
            "Radium ion (Ra\u00b2\u207a)",
            "Chromium(II) ion (Cr\u00b2\u207a)",
            "Platinum(II) ion (Pt\u00b2\u207a)",
            "Palladium(II) ion (Pd\u00b2\u207a)",
            "Vanadium(II) ion (V\u00b2\u207a)",
            "Titanium(II) ion (Ti\u00b2\u207a)",
            "Europium(II) ion (Eu\u00b2\u207a)",
            "Samarium(II) ion (Sm\u00b2\u207a)",
            "Ytterbium(II) ion (Yb\u00b2\u207a)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Barium ion (Ba\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Radium ion (Ra\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Platinum(II) ion (Pt\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Europium(II) ion (Eu\u00b2\u207a)",
                "Samarium(II) ion (Sm\u00b2\u207a)",
                "Ytterbium(II) ion (Yb\u00b2\u207a)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba\u00b2\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Europium(II) ion (Eu\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Palladium(II) ion (Pd\u00b2\u207a)",
                "Platinum(II) ion (Pt\u00b2\u207a)",
                "Radium ion (Ra\u00b2\u207a)",
                "Samarium(II) ion (Sm\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Vanadium(II) ion (V\u00b2\u207a)",
                "Ytterbium(II) ion (Yb\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Magnesium ion (Mg2+)",
            "Calcium ion (Ca2+)",
            "Iron(II) ion (Fe2+)",
            "Copper(II) ion (Cu2+)",
            "Zinc ion (Zn2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Nickel(II) ion (Ni2+)",
            "Cobalt(II) ion (Co2+)",
            "Manganese(II) ion (Mn2+)",
            "Cadmium ion (Cd2+)",
            "Tin(II) ion (Sn2+)",
            "Lead(II) ion (Pb2+)",
            "Chromium(II) ion (Cr2+)",
            "Vanadium(II) ion (V2+)",
            "Titanium(II) ion (Ti2+)",
            "Mercury(II) ion (Hg2+)",
            "Platinum(II) ion (Pt2+)",
            "Palladium(II) ion (Pd2+)",
            "Beryllium ion (Be2+)",
            "Scandium(II) ion (Sc2+)",
            "Yttrium(II) ion (Y2+)",
            "Lanthanum(II) ion (La2+)",
            "Radium ion (Ra2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Magnesium ion (Mg2+)",
                "Calcium ion (Ca2+)",
                "Iron(II) ion (Fe2+)",
                "Copper(II) ion (Cu2+)",
                "Zinc ion (Zn2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Nickel(II) ion (Ni2+)",
                "Cobalt(II) ion (Co2+)",
                "Manganese(II) ion (Mn2+)",
                "Cadmium ion (Cd2+)",
                "Tin(II) ion (Sn2+)",
                "Lead(II) ion (Pb2+)",
                "Chromium(II) ion (Cr2+)",
                "Vanadium(II) ion (V2+)",
                "Titanium(II) ion (Ti2+)",
                "Mercury(II) ion (Hg2+)",
                "Platinum(II) ion (Pt2+)",
                "Palladium(II) ion (Pd2+)",
                "Beryllium ion (Be2+)",
                "Scandium(II) ion (Sc2+)",
                "Yttrium(II) ion (Y2+)",
                "Lanthanum(II) ion (La2+)",
                "Radium ion (Ra2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium(II) ion (Cr2+)",
                "Cobalt(II) ion (Co2+)",
                "Copper(II) ion (Cu2+)",
                "Iron(II) ion (Fe2+)",
                "Lanthanum(II) ion (La2+)",
                "Lead(II) ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese(II) ion (Mn2+)",
                "Mercury(II) ion (Hg2+)",
                "Nickel(II) ion (Ni2+)",
                "Palladium(II) ion (Pd2+)",
                "Platinum(II) ion (Pt2+)",
                "Radium ion (Ra2+)",
                "Scandium(II) ion (Sc2+)",
                "Strontium ion (Sr2+)",
                "Tin(II) ion (Sn2+)",
                "Titanium(II) ion (Ti2+)",
                "Vanadium(II) ion (V2+)",
                "Yttrium(II) ion (Y2+)",
                "Zinc ion (Zn2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca2+)",
            "Magnesium ion (Mg2+)",
            "Zinc ion (Zn2+)",
            "Iron ion (Fe2+)",
            "Copper ion (Cu2+)",
            "Nickel ion (Ni2+)",
            "Cobalt ion (Co2+)",
            "Manganese ion (Mn2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Lead ion (Pb2+)",
            "Cadmium ion (Cd2+)",
            "Mercury ion (Hg2+)",
            "Tin ion (Sn2+)",
            "Chromium ion (Cr2+)",
            "Vanadium ion (V2+)",
            "Titanium ion (Ti2+)",
            "Zirconium ion (Zr2+)",
            "Hafnium ion (Hf2+)",
            "Beryllium ion (Be2+)",
            "Radium ion (Ra2+)",
            "Thallium ion (Tl2+)",
            "Palladium ion (Pd2+)",
            "Platinum ion (Pt2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca2+)",
                "Magnesium ion (Mg2+)",
                "Zinc ion (Zn2+)",
                "Iron ion (Fe2+)",
                "Copper ion (Cu2+)",
                "Nickel ion (Ni2+)",
                "Cobalt ion (Co2+)",
                "Manganese ion (Mn2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Lead ion (Pb2+)",
                "Cadmium ion (Cd2+)",
                "Mercury ion (Hg2+)",
                "Tin ion (Sn2+)",
                "Chromium ion (Cr2+)",
                "Vanadium ion (V2+)",
                "Titanium ion (Ti2+)",
                "Zirconium ion (Zr2+)",
                "Hafnium ion (Hf2+)",
                "Beryllium ion (Be2+)",
                "Radium ion (Ra2+)",
                "Thallium ion (Tl2+)",
                "Palladium ion (Pd2+)",
                "Platinum ion (Pt2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium ion (Cr2+)",
                "Cobalt ion (Co2+)",
                "Copper ion (Cu2+)",
                "Hafnium ion (Hf2+)",
                "Iron ion (Fe2+)",
                "Lead ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese ion (Mn2+)",
                "Mercury ion (Hg2+)",
                "Nickel ion (Ni2+)",
                "Palladium ion (Pd2+)",
                "Platinum ion (Pt2+)",
                "Radium ion (Ra2+)",
                "Strontium ion (Sr2+)",
                "Thallium ion (Tl2+)",
                "Tin ion (Sn2+)",
                "Titanium ion (Ti2+)",
                "Vanadium ion (V2+)",
                "Zinc ion (Zn2+)",
                "Zirconium ion (Zr2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Calcium ion (Ca2+)",
            "Magnesium ion (Mg2+)",
            "Barium ion (Ba2+)",
            "Strontium ion (Sr2+)",
            "Radium ion (Ra2+)",
            "Beryllium ion (Be2+)",
            "Zinc ion (Zn2+)",
            "Cadmium ion (Cd2+)",
            "Mercury(II) ion (Hg2+)",
            "Tin(II) ion (Sn2+)",
            "Lead(II) ion (Pb2+)",
            "Copper(II) ion (Cu2+)",
            "Nickel(II) ion (Ni2+)",
            "Cobalt(II) ion (Co2+)",
            "Iron(II) ion (Fe2+)",
            "Manganese(II) ion (Mn2+)",
            "Chromium(II) ion (Cr2+)",
            "Vanadium(II) ion (V2+)",
            "Titanium(II) ion (Ti2+)",
            "Scandium(II) ion (Sc2+)",
            "Yttrium(II) ion (Y2+)",
            "Zirconium(II) ion (Zr2+)",
            "Niobium(II) ion (Nb2+)",
            "Molybdenum(II) ion (Mo2+)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Calcium ion (Ca2+)",
                "Magnesium ion (Mg2+)",
                "Barium ion (Ba2+)",
                "Strontium ion (Sr2+)",
                "Radium ion (Ra2+)",
                "Beryllium ion (Be2+)",
                "Zinc ion (Zn2+)",
                "Cadmium ion (Cd2+)",
                "Mercury(II) ion (Hg2+)",
                "Tin(II) ion (Sn2+)",
                "Lead(II) ion (Pb2+)",
                "Copper(II) ion (Cu2+)",
                "Nickel(II) ion (Ni2+)",
                "Cobalt(II) ion (Co2+)",
                "Iron(II) ion (Fe2+)",
                "Manganese(II) ion (Mn2+)",
                "Chromium(II) ion (Cr2+)",
                "Vanadium(II) ion (V2+)",
                "Titanium(II) ion (Ti2+)",
                "Scandium(II) ion (Sc2+)",
                "Yttrium(II) ion (Y2+)",
                "Zirconium(II) ion (Zr2+)",
                "Niobium(II) ion (Nb2+)",
                "Molybdenum(II) ion (Mo2+)"
            ],
            "mismatches": [],
            "true_referents": [
                "Barium ion (Ba2+)",
                "Beryllium ion (Be2+)",
                "Cadmium ion (Cd2+)",
                "Calcium ion (Ca2+)",
                "Chromium(II) ion (Cr2+)",
                "Cobalt(II) ion (Co2+)",
                "Copper(II) ion (Cu2+)",
                "Iron(II) ion (Fe2+)",
                "Lead(II) ion (Pb2+)",
                "Magnesium ion (Mg2+)",
                "Manganese(II) ion (Mn2+)",
                "Mercury(II) ion (Hg2+)",
                "Molybdenum(II) ion (Mo2+)",
                "Nickel(II) ion (Ni2+)",
                "Niobium(II) ion (Nb2+)",
                "Radium ion (Ra2+)",
                "Scandium(II) ion (Sc2+)",
                "Strontium ion (Sr2+)",
                "Tin(II) ion (Sn2+)",
                "Titanium(II) ion (Ti2+)",
                "Vanadium(II) ion (V2+)",
                "Yttrium(II) ion (Y2+)",
                "Zinc ion (Zn2+)",
                "Zirconium(II) ion (Zr2+)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Magnesium ion (Mg2+)",
            "Calcium ion (Ca2+)",
            "Strontium ion (Sr2+)",
            "Barium ion (Ba2+)",
            "Radium ion (Ra2+)",
            "Beryllium ion (Be2+)",
            "Magnesium(II) ion",
            "Calcium(II) ion",
            "Strontium(II) ion",
            "Barium(II) ion",
            "Radon ion (Ra2+)",
            "Beryllium(II) ion",
            "Manganese ion (Mn2+)",
            "Iron ion (Fe2+)",
            "Cobalt ion (Co2+)",
            "Nickel ion (Ni2+)",
            "Copper ion (Cu2+)",
            "Zinc ion (Zn2+)",
            "Magnesium(II) cation",
            "Calcium(II) cation",
            "Strontium(II) cation",
            "Barium(II) cation",
            "Radon(II) cation",
            "Beryllium(II) cation",
            "Manganese(II) ion",
            "Iron(II) ion",
            "Cobalt(II) ion",
            "Nickel(II) ion",
            "Copper(II) ion",
            "Zinc(II) ion"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Magnesium ion (Mg2+)",
                "Calcium ion (Ca2+)",
                "Strontium ion (Sr2+)",
                "Barium ion (Ba2+)",
                "Radon ion (Ra2+)",
                "Beryllium ion (Be2+)",
                "Magnesium(II) ion",
                "Calcium(II) ion",
                "Strontium(II) ion",
                "Barium(II) cation",
                "Radon(II) cation",
                "Beryllium(II) cation",
                "Manganese ion (Mn2+)",
                "Iron ion (Fe2+)",
                "Cobalt ion (Co2+)",
                "Nickel ion (Ni2+)",
                "Copper ion (Cu2+)",
                "Zinc ion (Zn2+)",
                "Magnesium(II) cation",
                "Calcium(II) cation",
                "Strontium(II) cation",
                "Barium(II) ion",
                "Radium ion (Ra2+)",
                "Beryllium(II) ion"
            ],
            "mismatches": [
                "Manganese(II) ion",
                "Iron(II) ion",
                "Cobalt(II) ion",
                "Nickel(II) ion",
                "Copper(II) ion",
                "Zinc(II) ion"
            ],
            "true_referents": [
                "Barium ion (Ba2+)",
                "Barium(II) cation",
                "Barium(II) ion",
                "Beryllium ion (Be2+)",
                "Beryllium(II) cation",
                "Beryllium(II) ion",
                "Calcium ion (Ca2+)",
                "Calcium(II) cation",
                "Calcium(II) ion",
                "Cobalt ion (Co2+)",
                "Copper ion (Cu2+)",
                "Iron ion (Fe2+)",
                "Magnesium ion (Mg2+)",
                "Magnesium(II) cation",
                "Magnesium(II) ion",
                "Manganese ion (Mn2+)",
                "Nickel ion (Ni2+)",
                "Radium ion (Ra2+)",
                "Radon ion (Ra2+)",
                "Radon(II) cation",
                "Strontium ion (Sr2+)",
                "Strontium(II) cation",
                "Strontium(II) ion",
                "Zinc ion (Zn2+)"
            ],
            "TP": 24,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": [
            "Aluminum ion (Al\u00b3\u207a)",
            "Beryllium ion (Be\u00b2\u207a)",
            "Magnesium ion (Mg\u00b2\u207a)",
            "Calcium ion (Ca\u00b2\u207a)",
            "Strontium ion (Sr\u00b2\u207a)",
            "Barium ion (Ba\u00b2\u207a)",
            "Radium ion (Ra\u00b2\u207a)",
            "Zinc ion (Zn\u00b2\u207a)",
            "Cadmium ion (Cd\u00b2\u207a)",
            "Mercury(I) ion (Hg\u00b2\u207a)",
            "Mercury(II) ion (Hg\u00b2\u207a)",
            "Lead(II) ion (Pb\u00b2\u207a)",
            "Tin(II) ion (Sn\u00b2\u207a)",
            "Titanium(II) ion (Ti\u00b2\u207a)",
            "Zirconium(II) ion (Zr\u00b2\u207a)",
            "Hafnium(II) ion (Hf\u00b2\u207a)",
            "Chromium(II) ion (Cr\u00b2\u207a)",
            "Manganese(II) ion (Mn\u00b2\u207a)",
            "Iron(II) ion (Fe\u00b2\u207a)",
            "Cobalt(II) ion (Co\u00b2\u207a)",
            "Nickel(II) ion (Ni\u00b2\u207a)",
            "Copper(II) ion (Cu\u00b2\u207a)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aluminum ion (Al\u00b3\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Barium ion (Ba\u00b2\u207a)",
                "Radium ion (Ra\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Mercury(I) ion (Hg\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Zirconium(II) ion (Zr\u00b2\u207a)",
                "Hafnium(II) ion (Hf\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aluminum ion (Al\u00b3\u207a)",
                "Barium ion (Ba\u00b2\u207a)",
                "Beryllium ion (Be\u00b2\u207a)",
                "Cadmium ion (Cd\u00b2\u207a)",
                "Calcium ion (Ca\u00b2\u207a)",
                "Chromium(II) ion (Cr\u00b2\u207a)",
                "Cobalt(II) ion (Co\u00b2\u207a)",
                "Copper(II) ion (Cu\u00b2\u207a)",
                "Hafnium(II) ion (Hf\u00b2\u207a)",
                "Iron(II) ion (Fe\u00b2\u207a)",
                "Lead(II) ion (Pb\u00b2\u207a)",
                "Magnesium ion (Mg\u00b2\u207a)",
                "Manganese(II) ion (Mn\u00b2\u207a)",
                "Mercury(I) ion (Hg\u00b2\u207a)",
                "Mercury(II) ion (Hg\u00b2\u207a)",
                "Nickel(II) ion (Ni\u00b2\u207a)",
                "Radium ion (Ra\u00b2\u207a)",
                "Strontium ion (Sr\u00b2\u207a)",
                "Tin(II) ion (Sn\u00b2\u207a)",
                "Titanium(II) ion (Ti\u00b2\u207a)",
                "Zinc ion (Zn\u00b2\u207a)",
                "Zirconium(II) ion (Zr\u00b2\u207a)"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "divalent metal cation",
        "domain": "chemistry",
        "response": "[\n  {\n    \"name\": \"Aluminum ion\" (Al3+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Calcium ion\" (Ca2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Copper ion\" (Cu2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Gold ion\" (Au3+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Iron ion\" (Fe3+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Magnesium ion\" (Mg2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Manganese ion\" (Mn2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Nickel ion\" (Ni2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Potassium ion\" (K+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Sodium ion\" (Na+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  },\n  {\n    \"name\": \"Zinc ion\" (Zn2+),\n    \"description\": \"It is a positively charged ion formed when a metal atom loses two electrons.\"\n  }\n]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]