[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (R)",
            "Arabinose (A)",
            "Xylose (X)",
            "Lyxose (L)",
            "Ribulose (Ru)",
            "Xylulose (Xu)",
            "Aribinose (Ab)",
            "Lyxulose (Lu)",
            "Deoxyribose (dR)",
            "Deoxyarabinose (dA)",
            "Deoxyxylose (dX)",
            "Deoxylyxose (dL)",
            "Deoxyribose-1-phosphate (dR-1-P)",
            "Deoxyarabinose-1-phosphate (dA-1-P)",
            "Deoxyxylose-1-phosphate (dX-1-P)",
            "Deoxylyxose-1-phosphate (dL-1-P)",
            "Deoxyribose-5-phosphate (dR-5-P)",
            "Deoxyarabinose-5-phosphate (dA-5-P)",
            "Deoxyxylose-5-phosphate (dX-5-P)",
            "Deoxylyxose-5-phosphate (dL-5-P)",
            "Ribulose-5-phosphate (Ru-5-P)",
            "Xylulose-5-phosphate (Xu-5-P)",
            "Arabinose-5-phosphate (Ab-5-P)",
            "Lyxulose-5-phosphate (Lu-5-P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (R)",
                "Arabinose (A)",
                "Xylose (X)",
                "Lyxose (L)",
                "Ribulose (Ru)",
                "Xylulose (Xu)",
                "Lyxulose (Lu)",
                "Ribulose-5-phosphate (Ru-5-P)",
                "Xylulose-5-phosphate (Xu-5-P)"
            ],
            "mismatches": [
                "Aribinose (Ab)",
                "Deoxyribose (dR)",
                "Deoxyarabinose (dA)",
                "Deoxyxylose (dX)",
                "Deoxylyxose (dL)",
                "Deoxyribose-1-phosphate (dR-1-P)",
                "Deoxyarabinose-1-phosphate (dA-1-P)",
                "Deoxyxylose-1-phosphate (dX-1-P)",
                "Deoxylyxose-1-phosphate (dL-1-P)",
                "Deoxyribose-5-phosphate (dR-5-P)",
                "Deoxyarabinose-5-phosphate (dA-5-P)",
                "Deoxyxylose-5-phosphate (dX-5-P)",
                "Deoxylyxose-5-phosphate (dL-5-P)",
                "Arabinose-5-phosphate (Ab-5-P)",
                "Lyxulose-5-phosphate (Lu-5-P)"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "2-deoxyribose 5-phosphate",
                "4-amino-4-deoxyarabinose",
                "5-deoxyribose phosphate",
                "D-arabinose",
                "D-arabinose 5-phosphate",
                "D-lyxopyranose",
                "D-lyxose",
                "D-ribofuranose",
                "D-ribofuranose 5-phosphate",
                "D-ribose",
                "D-ribose 1-phosphate",
                "D-ribose 5-phosphate",
                "D-ribulose",
                "D-xylose",
                "D-xylose 5-phosphate",
                "D-xylulose",
                "L-arabinose",
                "L-lyxose",
                "L-ribulose",
                "L-xylose",
                "L-xylulose",
                "L-xylulose 5-phosphate",
                "Lyxulose",
                "alpha-D-arabinofuranose",
                "alpha-D-xylose 1-phosphate",
                "arabinose",
                "arabinose phosphate",
                "deoxyadenosine phosphate",
                "deoxyribulose phosphate",
                "lyxose",
                "ribose",
                "ribose phosphate",
                "ribulose",
                "ribulose 5-phosphate",
                "sambubiose",
                "xylose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (C5H10O5)",
            "Deoxyribose (C5H10O4)",
            "Lyxose (C5H10O5)",
            "Arabinose (C5H10O5)",
            "Xylose (C5H10O5)",
            "Ribulose (C5H10O5)",
            "Xylulose (C5H10O5)",
            "Ribose 1-phosphate",
            "Deoxyribose 1-phosphate",
            "Lyxose 1-phosphate",
            "Arabinose 1-phosphate",
            "Xylose 1-phosphate",
            "Ribulose 1-phosphate",
            "Xylulose 1-phosphate",
            "Ribulose 5-phosphate",
            "Xylulose 5-phosphate",
            "Lyxose 5-phosphate",
            "Arabinose 5-phosphate",
            "Ribose 5-phosphate",
            "Deoxyribose 5-phosphate",
            "Ribose 2-phosphate",
            "Deoxyribose 2-phosphate",
            "Lyxose 2-phosphate",
            "Arabinose 2-phosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lyxose",
                "Arabinose",
                "Xylose",
                "Ribulose",
                "Xylulose",
                "Ribose 1-phosphate",
                "Arabinose 5-phosphate",
                "Ribulose 1-phosphate",
                "Xylulose 1-phosphate",
                "Ribulose 5-phosphate",
                "Xylulose 5-phosphate",
                "Ribose 5-phosphate"
            ],
            "mismatches": [
                "Ribose (C5H10O5)",
                "Deoxyribose (C5H10O4)",
                "Lyxose 1-phosphate",
                "Xylose 1-phosphate",
                "Deoxyribose 1-phosphate",
                "Lyxose 5-phosphate",
                "Deoxyribose 5-phosphate",
                "Ribose 2-phosphate",
                "Deoxyribose 2-phosphate",
                "Lyxose 2-phosphate",
                "Arabinose 2-phosphate"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "2-deoxyribose 1-phosphate",
                "2-deoxyribose 5-phosphate",
                "2-deoxyribose phosphate",
                "5-deoxy-D-ribose",
                "5-deoxyribose phosphate",
                "D-arabinose",
                "D-arabinose 5-phosphate",
                "D-lyxose",
                "D-ribose 1-phosphate",
                "D-ribose 5-phosphate",
                "D-xylose",
                "D-xylulose",
                "L-arabinose",
                "L-arabinose 1-phosphate",
                "L-lyxose",
                "L-ribose",
                "L-xylose",
                "L-xylulose 1-phosphate",
                "arabinose",
                "arabinose phosphate",
                "deoxyribonucleotide",
                "deoxyribulose phosphate",
                "lyxose",
                "ribose",
                "ribose diphosphate",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulose",
                "ribulose 1-phosphate",
                "ribulose 5-phosphate",
                "xylose",
                "xylose phosphate",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 12,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Ribulose",
            "Xylulose",
            "Xylose (Xyl)",
            "Lyxose",
            "Arabinose (Ara)",
            "Ribofuranose",
            "Ribopyranose",
            "Xylopyranose",
            "Xylofuranose",
            "Lyxopyranose",
            "Lyxofuranose",
            "Arabinofuranose",
            "Arabinopyranose",
            "D-Ribose",
            "L-Ribose",
            "D-Xylose",
            "L-Xylose",
            "D-Lyxose",
            "L-Lyxose",
            "D-Arabinose",
            "L-Arabinose",
            "D-Ribulose",
            "L-Ribulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribulose",
                "Xylulose",
                "Lyxose",
                "Ribofuranose",
                "Ribopyranose",
                "Xylopyranose",
                "Xylofuranose",
                "Lyxopyranose",
                "Lyxofuranose",
                "Arabinofuranose",
                "Arabinopyranose",
                "D-Ribose",
                "L-Ribose",
                "D-Xylose",
                "L-Xylose",
                "D-Lyxose",
                "L-Lyxose",
                "D-Arabinose",
                "L-Arabinose",
                "D-Ribulose",
                "L-Ribulose"
            ],
            "mismatches": [
                "Ribose (Rib)",
                "Xylose (Xyl)",
                "Arabinose (Ara)"
            ],
            "true_referents": [
                "D-arabinofuranose",
                "D-arabinopyranose",
                "D-arabinose",
                "D-lyxofuranose",
                "D-lyxopyranose",
                "D-lyxose",
                "D-ribose",
                "D-ribose 1-phosphate",
                "D-ribulose",
                "D-ribulose 1-phosphate",
                "D-xylofuranose",
                "D-xylopyranose",
                "D-xylose",
                "D-xylulose",
                "L-arabinofuranose",
                "L-arabinopyranose",
                "L-arabinose",
                "L-lyxofuranose",
                "L-lyxopyranose",
                "L-lyxose",
                "L-ribofuranose",
                "L-ribopyranose",
                "L-ribose",
                "L-ribulose",
                "L-ribulose 5-phosphate",
                "L-xylofuranose",
                "L-xylopyranose",
                "L-xylose",
                "L-xylulose",
                "arabinose",
                "beta-D-arabinofuranose",
                "beta-D-lyxopyranose",
                "beta-D-xylofuranose",
                "beta-D-xylose",
                "beta-L-arabinopyranose",
                "beta-L-lyxofuranose",
                "beta-L-ribose",
                "beta-L-xylopyranose",
                "lyxose",
                "ribofuranose",
                "ribopyranose",
                "ribose",
                "ribose phosphate",
                "ribulose",
                "ribulose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Deoxyribose (dRib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lxy)",
            "Allulose (All)",
            "Altrose (Alt)",
            "Glucose (Glc)",
            "Mannose (Man)",
            "Galactose (Gal)",
            "Fructose (Fru)",
            "Sorbose (Sorb)",
            "Tagatose (Tag)",
            "Psicose (Psi)",
            "Gulose (Gul)",
            "Idose (Ido)",
            "Talose (Tal)",
            "Rhamnose (Rha)",
            "Fucose (Fuc)",
            "2-Deoxyribose (2dRib)",
            "3-Deoxyribose (3dRib)",
            "Xylulose (Xylu)",
            "Ribulose (Ribu)",
            "Deoxyxylose (dXu)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Arabinose (Ara)",
                "Xylose (Xyl)",
                "Lyxose (Lxy)",
                "Xylulose (Xylu)",
                "Ribulose (Ribu)"
            ],
            "mismatches": [
                "Deoxyribose (dRib)",
                "Allulose (All)",
                "Altrose (Alt)",
                "Glucose (Glc)",
                "Mannose (Man)",
                "Galactose (Gal)",
                "Fructose (Fru)",
                "Sorbose (Sorb)",
                "Tagatose (Tag)",
                "Psicose (Psi)",
                "Gulose (Gul)",
                "Idose (Ido)",
                "Talose (Tal)",
                "Rhamnose (Rha)",
                "Fucose (Fuc)",
                "2-Deoxyribose (2dRib)",
                "3-Deoxyribose (3dRib)",
                "Deoxyxylose (dXu)"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "2'-deoxyribonucleotide",
                "2-deoxy-D-ribose",
                "2-deoxyribose phosphate",
                "3'-deoxyribonucleotide",
                "5-deoxy-D-ribose",
                "D-arabinose",
                "D-lyxose",
                "D-xylose",
                "D-xylulose",
                "GDP-6-deoxy-alpha-D-altrose",
                "GDP-D-glucose",
                "GDP-L-fucose",
                "GDP-beta-L-galactose",
                "GDP-fucose",
                "GDP-mannose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "L-ribulose",
                "L-xylose",
                "L-xylulose",
                "Lyxulose",
                "UDP-D-apiose",
                "UDP-alpha-D-apiose",
                "arabinose",
                "dTDP-4-dehydro-6-deoxy-alpha-D-gulose",
                "dTDP-L-talose",
                "dTDP-rhamnose",
                "deoxyribulose phosphate",
                "ketopentose",
                "lyxose",
                "pentopyranose",
                "ribonucleoside 3'-phosphate",
                "ribose",
                "ribose phosphate",
                "ribulose",
                "ribulose phosphate",
                "tinapoyl-CoA",
                "xylose",
                "xylulose"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose",
            "Deoxyribose",
            "Arabinose",
            "Lyxose",
            "Xylose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose",
                "Arabinose",
                "Lyxose",
                "Xylose"
            ],
            "mismatches": [
                "Deoxyribose"
            ],
            "true_referents": [
                "D-arabinose",
                "D-lyxose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "arabinose",
                "beta-D-xylose",
                "deoxyribonucleotide",
                "deoxyribulose phosphate",
                "lyxose",
                "ribose",
                "ribose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 4,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lyx)",
            "Ribulose (Ru5P)",
            "D-Ribose (D-Rib)",
            "D-Arabinose (D-Ara)",
            "D-Xylose (D-Xyl)",
            "D-Lyxose (D-Lyx)",
            "D-Ribulose (D-Ru5P)",
            "L-Ribose (L-Rib)",
            "L-Arabinose (L-Ara)",
            "L-Xylose (L-Xyl)",
            "L-Lyxose (L-Lyx)",
            "L-Ribulose (L-Ru5P)",
            "2-Deoxy-D-ribose (2dR)",
            "D-Ribofuranose (D-RibF)",
            "L-Ribofuranose (L-RibF)",
            "D-Arabinofuranose (D-AraF)",
            "L-Arabinofuranose (L-AraF)",
            "D-Xylofuranose (D-XylF)",
            "L-Xylofuranose (L-XylF)",
            "D-Lyxofuranose (D-LyxF)",
            "L-Lyxofuranose (L-LyxF)",
            "D-Ribulofuranose (D-Ru5F)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Arabinose (Ara)",
                "Xylose (Xyl)",
                "Lyxose (Lyx)",
                "D-Ribose (D-Rib)",
                "D-Arabinose (D-Ara)",
                "D-Xylose (D-Xyl)",
                "D-Lyxose (D-Lyx)",
                "L-Ribose (L-Rib)",
                "L-Arabinose (L-Ara)",
                "L-Xylose (L-Xyl)",
                "L-Lyxose (L-Lyx)",
                "2-Deoxy-D-ribose (2dR)",
                "D-Ribofuranose (D-RibF)",
                "L-Ribofuranose (L-RibF)",
                "D-Arabinofuranose (D-AraF)",
                "L-Arabinofuranose (L-AraF)",
                "D-Xylofuranose (D-XylF)",
                "L-Xylofuranose (L-XylF)",
                "D-Lyxofuranose (D-LyxF)",
                "L-Lyxofuranose (L-LyxF)"
            ],
            "mismatches": [
                "Ribulose (Ru5P)",
                "D-Ribulose (D-Ru5P)",
                "L-Ribulose (L-Ru5P)",
                "D-Ribulofuranose (D-Ru5F)"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "2-deoxy-D-ribose",
                "2-deoxy-beta-D-ribose 1-phosphate",
                "D-arabinofuranose",
                "D-arabinopyranose",
                "D-arabinose",
                "D-lyxofuranose",
                "D-lyxose",
                "D-ribofuranose",
                "D-ribose",
                "D-ribose 1-phosphate",
                "D-ribulose",
                "D-ribulose 5-phosphate",
                "D-xylofuranose",
                "D-xylose",
                "D-xylulose",
                "L-arabinofuranose",
                "L-arabinose",
                "L-lyxofuranose",
                "L-lyxose",
                "L-ribofuranose",
                "L-ribose",
                "L-ribulose 5-phosphate",
                "L-xylofuranose",
                "L-xylopyranose",
                "L-xylose",
                "L-xylulose",
                "alpha-D-arabinofuranose",
                "alpha-L-xylofuranose",
                "arabinose",
                "beta-D-arabinofuranose",
                "beta-D-lyxofuranose",
                "beta-D-xylofuranose",
                "beta-L-arabinofuranose",
                "beta-L-lyxofuranose",
                "beta-L-ribose",
                "beta-L-xylofuranose",
                "lyxose",
                "ribofuranose",
                "ribose",
                "ribose phosphate",
                "ribulose 5-phosphate",
                "xylose"
            ],
            "TP": 21,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Arabinose (Ara)",
            "Lyxose (Lyx)",
            "Xylose (Xyl)",
            "Ribulose (Rul)",
            "Xylulose (Xul)",
            "Arabinulose (Aul)",
            "Lyxulose (Lux)",
            "Ribonic acid (Rba)",
            "Arabinonic acid (Aba)",
            "Lyxonic acid (Lba)",
            "Xylonic acid (Xba)",
            "Riburonate (Rbu)",
            "Arabinuronate (Abu)",
            "Lyxuronate (Lbu)",
            "Xyluronate (Xbu)",
            "Ribitol (Rbt)",
            "Arabitol (Abt)",
            "Lyxitol (Lbt)",
            "Xylitol (Xbt)",
            "Ribulitol (Rulit)",
            "Xylulitol (Xulit)",
            "Arabinulitol (Aulit)",
            "Lyxulitol (Luxit)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose",
                "Arabinose",
                "Lyxose",
                "Xylose",
                "Ribulose",
                "Xylulose",
                "Lyxulose"
            ],
            "mismatches": [
                "Arabinulose",
                "Ribonic acid",
                "Arabinonic acid",
                "Lyxonic acid",
                "Xylonic acid",
                "Riburonate",
                "Arabinuronate",
                "Lyxuronate",
                "Xyluronate",
                "Ribitol",
                "Arabitol",
                "Lyxitol",
                "Xylitol",
                "Ribulitol",
                "Xylulitol",
                "Arabinulitol",
                "Lyxulitol"
            ],
            "true_referents": [
                "D-arabinofuranose",
                "D-arabinose",
                "D-lyxose",
                "D-xylose",
                "D-xylulose",
                "L-arabinofuranose",
                "L-arabinose",
                "L-lyxofuranose",
                "L-lyxopyranose",
                "L-lyxose",
                "L-ribose",
                "L-ribulose",
                "L-xylose",
                "L-xylulose",
                "Lyxulose",
                "UDP-2-acetamido-2-deoxy-alpha-D-ribo-hex-3-uloseuronic acid",
                "UDP-L-arabinose",
                "UDP-L-iduronic acid",
                "alpha-L-arabinofuranose",
                "alpha-L-arabinose 1-phosphate",
                "arabinose",
                "arabinose phosphate",
                "beta-D-xylose",
                "beta-L-lyxopyranose",
                "lyxose",
                "ribofuranose",
                "ribose",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulosamine",
                "ribulose",
                "ribulose derivative",
                "ribulose phosphate",
                "xylose",
                "xylulose",
                "xylulose derivative",
                "xylulose phosphate"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (R)",
            "Arabinose (A)",
            "Lyxose (L)",
            "Xylose (X)",
            "Deoxyribose (D)",
            "Altrose (At)",
            "Galactose (Gal)",
            "Glucose (Glc)",
            "Mannose (Man)",
            "Gulose (Gu)",
            "Idose (Id)",
            "Talose (Tal)",
            "Allose (Al)",
            "Glucuronic acid (GlcA)",
            "Iduronic acid (IdoA)",
            "Galacturonic acid (GalA)",
            "Mannuronic acid (ManA)",
            "Taluronic acid (TalA)",
            "Xyluronic acid (XylA)",
            "Fucose (Fuc)",
            "Rhamnose (Rha)",
            "Sorbose (Sor)",
            "Tagatose (Tag)",
            "Psicose (Psi)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Arabinose (A)",
                "Lyxose (L)",
                "Ribose (R)",
                "Xylose (X)"
            ],
            "mismatches": [
                "Deoxyribose (D)",
                "Altrose (At)",
                "Galactose (Gal)",
                "Glucose (Glc)",
                "Mannose (Man)",
                "Gulose (Gu)",
                "Idose (Id)",
                "Talose (Tal)",
                "Allose (Al)",
                "Glucuronic acid (GlcA)",
                "Iduronic acid (IdoA)",
                "Galacturonic acid (GalA)",
                "Mannuronic acid (ManA)",
                "Taluronic acid (TalA)",
                "Xyluronic acid (XylA)",
                "Fucose (Fuc)",
                "Rhamnose (Rha)",
                "Sorbose (Sor)",
                "Tagatose (Tag)",
                "Psicose (Psi)"
            ],
            "true_referents": [
                "D-arabinose",
                "D-lyxose",
                "D-ribose",
                "D-xylose",
                "D-xylulose",
                "GDP-6-deoxy-alpha-D-altrose",
                "GDP-D-glucose",
                "GDP-L-fucose",
                "GDP-beta-L-galactose",
                "GDP-fucose",
                "GDP-mannose",
                "IDP",
                "L-arabinose",
                "L-lyxose",
                "L-xylose",
                "L-xylulose",
                "UDP-2,3-diacetamido-2,3-dideoxy-alpha-D-mannuronic acid",
                "UDP-D-apiose",
                "UDP-D-galacturonic acid",
                "UDP-D-glucuronic acid",
                "UDP-L-iduronic acid",
                "UDP-alpha-D-galacturonic acid",
                "UDP-alpha-D-glucuronic acid",
                "UDP-beta-L-iduronic acid",
                "aldehydo-D-lyxose",
                "aldehydo-D-xylose",
                "aldehydo-L-lyxose",
                "arabinose",
                "dTDP-4-dehydro-6-deoxy-alpha-D-gulose",
                "dTDP-L-talose",
                "dTDP-rhamnose",
                "deoxyribulose phosphate",
                "ketopentose",
                "lyxose",
                "pentopyranose",
                "ribose",
                "ribose phosphate",
                "tinapoyl-CoA",
                "xylose",
                "xylulose"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lyx)",
            "Allose",
            "Altrose",
            "Talose",
            "Idose",
            "Ribulose",
            "Xylulose",
            "Arabinulose",
            "Lyxulose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Arabinose (Ara)",
                "Xylose (Xyl)",
                "Lyxose (Lyx)",
                "Ribulose",
                "Xylulose",
                "Lyxulose"
            ],
            "mismatches": [
                "Allose",
                "Altrose",
                "Talose",
                "Idose",
                "Arabinulose"
            ],
            "true_referents": [
                "D-arabinose",
                "D-lyxose",
                "D-xylose",
                "GDP-6-deoxy-alpha-D-altrose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "L-ribulose",
                "L-xylose",
                "L-xylulose",
                "Lyxulose",
                "UDP-D-apiose",
                "UDP-alpha-D-apiose",
                "arabinose",
                "dTDP-talose",
                "lyxose",
                "ribose",
                "ribose phosphate",
                "ribulose",
                "ribulose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 7,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose",
            "Deoxyribose",
            "Xylose",
            "Arabinose",
            "Lyxose",
            "Ribulose",
            "Xylulose",
            "Apiose",
            "Hamamelose",
            "Ribulofuranose",
            "Xylofuranose",
            "Arabinofuranose",
            "Lyxofuranose",
            "Ribopyranose",
            "Xylopyranose",
            "Arabinopyranose",
            "Lyxopyranose",
            "2-Deoxy-ribose",
            "2-Deoxy-xylose",
            "2-Deoxy-arabinose",
            "2-Deoxy-lyxose",
            "3-Deoxy-xylose",
            "4-Deoxy-xylose",
            "5-Deoxy-xylose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose",
                "Xylose",
                "Arabinose",
                "Lyxose",
                "Ribulose",
                "Xylulose",
                "Hamamelose",
                "Ribopyranose",
                "Xylofuranose",
                "Arabinofuranose",
                "Lyxofuranose",
                "Xylopyranose",
                "Arabinopyranose",
                "Lyxopyranose"
            ],
            "mismatches": [
                "Deoxyribose",
                "Apiose",
                "Ribulofuranose",
                "2-Deoxy-ribose",
                "2-Deoxy-xylose",
                "2-Deoxy-arabinose",
                "2-Deoxy-lyxose",
                "3-Deoxy-xylose",
                "4-Deoxy-xylose",
                "5-Deoxy-xylose"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "1-deoxy-D-xylulose 5-phosphate",
                "2-O-Methyl-D-xylose",
                "2-deoxy-D-ribose",
                "2-deoxy-L-arabinose",
                "2-deoxy-beta-D-ribose 1-phosphate",
                "4-amino-4-deoxy-L-arabinose",
                "D-arabinofuranose",
                "D-arabinopyranose",
                "D-arabinose",
                "D-hamamelose",
                "D-lyxofuranose",
                "D-lyxopyranose",
                "D-lyxose",
                "D-xylofuranose",
                "D-xylopyranose",
                "D-xylose",
                "L-arabinofuranose",
                "L-arabinopyranose",
                "L-arabinose",
                "L-lyxofuranose",
                "L-lyxopyranose",
                "L-lyxose",
                "L-ribopyranose",
                "L-ribose",
                "L-ribulose",
                "L-xylofuranose",
                "L-xylopyranose",
                "L-xylulose",
                "UDP-alpha-D-apiose",
                "aldehydo-D-lyxose",
                "arabinose",
                "beta-D-arabinofuranose",
                "beta-D-hamamelose",
                "beta-D-lyxopyranose",
                "beta-D-xylofuranose",
                "beta-D-xylose",
                "beta-L-arabinofuranose",
                "beta-L-arabinopyranose",
                "beta-L-lyxofuranose",
                "beta-L-xylopyranose",
                "deoxyribonucleotide",
                "deoxyribulose phosphate",
                "lyxose",
                "ribofuranose",
                "ribopyranose",
                "ribose",
                "ribose phosphate",
                "ribulose",
                "ribulose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lyx)",
            "Ribulose (Rbu)",
            "Xylulose (Xlu)",
            "Deoxyribose (dRib)",
            "Deoxyxylose",
            "Deoxyarabinose",
            "Deoxylyxose",
            "Ribitol",
            "Arabitol",
            "Xylitol",
            "Deoxyribitol",
            "Deoxyarabitol",
            "Deoxyxylitol",
            "Ribonic acid",
            "Arabinonic acid",
            "Xylonic acid",
            "Lyxonic acid",
            "Deoxyribonic acid",
            "Deoxyarabinonic acid",
            "Deoxyxylonic acid",
            "Deoxylyxonic acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose",
                "Arabinose",
                "Xylose",
                "Lyxose",
                "Ribulose",
                "Xylulose"
            ],
            "mismatches": [
                "Deoxyribose",
                "Deoxyxylose",
                "Deoxyarabinose",
                "Deoxylyxose",
                "Ribitol",
                "Arabitol",
                "Xylitol",
                "Deoxyribitol",
                "Deoxyarabitol",
                "Deoxyxylitol",
                "Ribonic acid",
                "Arabinonic acid",
                "Xylonic acid",
                "Lyxonic acid",
                "Deoxyribonic acid",
                "Deoxyarabinonic acid",
                "Deoxyxylonic acid",
                "Deoxylyxonic acid"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "4-amino-4-deoxyarabinose",
                "D-arabinofuranose",
                "D-arabinopyranose",
                "D-arabinose",
                "D-lyxopyranose",
                "D-lyxose",
                "D-lyxosylamine",
                "D-ribofuranose",
                "D-xylose",
                "D-xylulose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "L-ribulose",
                "L-xylose",
                "L-xylulose",
                "Lyxulose",
                "adenyl ribonucleotide",
                "aldehydo-arabinose",
                "arabinose",
                "arabinose phosphate",
                "beta-D-lyxopyranose",
                "beta-D-xylose",
                "deoxyribonucleotide",
                "deoxyribulose phosphate",
                "lyxose",
                "pyrimidine ribonucleotide",
                "ribopyranose",
                "ribose",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulose",
                "ribulose phosphate",
                "xylose",
                "xylose phosphate",
                "xylulose"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Xylose (Xyl)",
            "Arabinose (Ara)",
            "Lyxose (Lyx)",
            "Ribulose (Rbu)",
            "Xylulose (Xyu)",
            "Arabinoxylose (Arx)",
            "Arabinoxylitol (Axt)",
            "Arabitol (Abt)",
            "Xylitol (Xyt)",
            "Ribitol (Rbt)",
            "Ribonic acid (Rba)",
            "Xylonic acid (Xya)",
            "Arabinonic acid (Aaa)",
            "Lyxonic acid (Lxa)",
            "Ribonolactone (Rbl)",
            "Xylono\u03b3-lactone (Xyl)",
            "Arabinono-\u03b3-lactone (Aal)",
            "Lyxono-\u03b3-lactone (Lxl)",
            "Ribofuranose (Rbf)",
            "Xylofuranose (Xyf)",
            "Arabinofuranose (Aaf)",
            "Lyxofuranose (Lxf)",
            "Ribopyranose (Rbp)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Xylose (Xyl)",
                "Arabinose (Ara)",
                "Lyxose (Lyx)",
                "Ribulose (Rbu)",
                "Xylulose (Xyu)",
                "Ribofuranose (Rbf)",
                "Xylofuranose (Xyf)",
                "Arabinofuranose (Aaf)",
                "Lyxofuranose (Lxf)",
                "Ribopyranose (Rbp)"
            ],
            "mismatches": [
                "Arabinoxylose (Arx)",
                "Arabinoxylitol (Axt)",
                "Arabitol (Abt)",
                "Xylitol (Xyt)",
                "Ribitol (Rbt)",
                "Ribonic acid (Rba)",
                "Xylonic acid (Xya)",
                "Arabinonic acid (Aaa)",
                "Lyxonic acid (Lxa)",
                "Ribonolactone (Rbl)",
                "Xylono\u03b3-lactone (Xyl)",
                "Arabinono-\u03b3-lactone (Aal)",
                "Lyxono-\u03b3-lactone (Lxl)"
            ],
            "true_referents": [
                "D-arabinofuranose",
                "D-arabinose",
                "D-lyxofuranose",
                "D-lyxose",
                "D-xylofuranose",
                "D-xylose",
                "D-xylulose",
                "L-arabinofuranose",
                "L-arabinose",
                "L-lyxofuranose",
                "L-lyxose",
                "L-ribofuranose",
                "L-ribopyranose",
                "L-ribose",
                "L-ribulose",
                "L-xylofuranose",
                "L-xylose",
                "L-xylulose",
                "aldehydo-L-arabinose",
                "aldehydo-L-lyxose",
                "alpha-D-arabinofuranose",
                "alpha-L-arabinofuranose",
                "alpha-L-arabinopyranose",
                "alpha-L-arabinose 1-phosphate",
                "alpha-L-lyxopyranose",
                "arabinose",
                "arabinose phosphate",
                "beta-D-Xylf-(1->6)-beta-D-Glcp",
                "beta-D-xylofuranose",
                "beta-D-xylose",
                "beta-L-lyxofuranose",
                "lyxose",
                "ribofuranose",
                "ribopyranose",
                "ribose",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulosamine",
                "ribulose",
                "ribulose derivative",
                "ribulose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Deoxyribose (dRib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lyx)",
            "Erythrulose (Ery)",
            "Threose (Thr)",
            "Erythrulose (Ery)",
            "Erythritol (Eri)",
            "Xylitol (Xol)",
            "Arabitol (Abi)",
            "Ribitol (Rib)",
            "Threitol (Thr)",
            "Erythritol (Eri)",
            "Erythrulose (Ery)",
            "Threose (Thr)",
            "Ribose (Rib)",
            "Deoxyribose (dRib)",
            "Arabinose (Ara)",
            "Xylose (Xyl)",
            "Lyxose (Lyx)",
            "Erythrulose (Ery)",
            "Threose (Thr)",
            "Erythritol (Eri)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Arabinose (Ara)",
                "Xylose (Xyl)",
                "Lyxose (Lyx)"
            ],
            "mismatches": [
                "Deoxyribose (dRib)",
                "Erythrulose (Ery)",
                "Threose (Thr)",
                "Erythritol (Eri)",
                "Xylitol (Xol)",
                "Arabitol (Abi)",
                "Ribitol (Rib)",
                "Threitol (Thr)"
            ],
            "true_referents": [
                "1-Deoxy-1-(nonylamino)pentitol",
                "D-1-Deoxy-erythro-hexo-2,3-diulose",
                "D-arabinose",
                "D-lyxose",
                "D-xylose",
                "D-xylulose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "L-threonyl-AMP(1-)",
                "L-xylose",
                "Lyxulose",
                "aldehydo-arabinose",
                "arabinose",
                "arabinose phosphate",
                "beta-D-xylose",
                "deoxyribulose phosphate",
                "erucoyl-CoA",
                "lyxose",
                "ribose",
                "ribose phosphate",
                "ribulose derivative",
                "thioribose",
                "xylose",
                "xylulose"
            ],
            "TP": 4,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Ribose (Rib)",
            "Arabinose",
            "Xylose",
            "Lyxose",
            "Allose",
            "Altrose",
            "Gulose",
            "Idose",
            "Talose",
            "Ribulose",
            "Xylulose",
            "Lyxulose",
            "Allose",
            "Tagatose",
            "Psicose",
            "Ribose-5-phosphate",
            "Xylulose-5-phosphate",
            "Ribulose-5-phosphate",
            "Xylulose-1,5-bisphosphate",
            "Ribulose-1,5-bisphosphate",
            "Ribose-1-phosphate",
            "Ribose-5-phosphate",
            "Ribose-1,5-bisphosphate"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose (Rib)",
                "Arabinose",
                "Xylose",
                "Lyxose",
                "Ribulose",
                "Xylulose",
                "Lyxulose",
                "Ribose-5-phosphate",
                "Xylulose-5-phosphate",
                "Ribulose-5-phosphate",
                "Xylulose-1,5-bisphosphate",
                "Ribulose-1,5-bisphosphate",
                "Ribose-1-phosphate",
                "Ribose-1,5-bisphosphate"
            ],
            "mismatches": [
                "Allose",
                "Altrose",
                "Gulose",
                "Idose",
                "Talose",
                "Allose",
                "Tagatose",
                "Psicose"
            ],
            "true_referents": [
                "D-arabinose",
                "D-lyxose",
                "D-ribose 1,5-bisphosphate",
                "D-ribose 1-phosphate",
                "D-ribose 5-phosphate",
                "D-ribulose 1,5-bisphosphate",
                "D-xylulose 1,5-bisphosphate",
                "GDP-6-deoxy-alpha-D-altrose",
                "L-arabinose",
                "L-lyxose",
                "L-ribose",
                "L-ribulose",
                "L-xylulose",
                "Lyxulose",
                "UDP-D-apiose",
                "UDP-alpha-D-apiose",
                "arabinose",
                "beta-D-xylose",
                "choloyl-CoAs",
                "dTDP-4-dehydro-6-deoxy-beta-D-gulose",
                "dTDP-talose",
                "lyxose",
                "pentopyranose",
                "ribose",
                "ribose bisphosphate",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulose",
                "ribulose 5-phosphate",
                "ribulose phosphate",
                "xylose",
                "xylulose",
                "xylulose 5-phosphate"
            ],
            "TP": 14,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            "Arabinose (Ara)",
            "Ribose (Rib)",
            "Lyxose (Lyx)",
            "Xylose (Xyl)",
            "Ribulose (Rbu)",
            "Arabinulose",
            "Ribulose-1-phosphate",
            "Ribose-1-phosphate",
            "Arabinose-1-phosphate",
            "Lyxofuranose",
            "Xylofuranose",
            "Arabinofuranose",
            "Ribofuranose",
            "Lyxopyranose",
            "Xylopyranose",
            "Arabinopyranose",
            "Ribopyranose",
            "D-Arabinose",
            "D-Ribose",
            "D-Lyxose",
            "D-Xylose",
            "L-Arabinose",
            "L-Ribose",
            "L-Lyxose",
            "L-Xylose"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Ribose-1-phosphate",
                "Lyxofuranose",
                "Xylofuranose",
                "Arabinofuranose",
                "Ribofuranose",
                "Lyxopyranose",
                "Xylopyranose",
                "Arabinopyranose",
                "Ribopyranose",
                "D-Arabinose",
                "D-Ribose",
                "D-Lyxose",
                "D-Xylose",
                "L-Arabinose",
                "L-Ribose",
                "L-Lyxose",
                "L-Xylose"
            ],
            "mismatches": [
                "Arabinose (Ara)",
                "Ribose (Rib)",
                "Lyxose (Lyx)",
                "Xylose (Xyl)",
                "Ribulose (Rbu)",
                "Arabinulose",
                "Ribulose-1-phosphate",
                "Arabinose-1-phosphate"
            ],
            "true_referents": [
                "D-arabinofuranose",
                "D-arabinopyranose",
                "D-arabinose",
                "D-lyxofuranose",
                "D-lyxopyranose",
                "D-lyxose",
                "D-ribose",
                "D-ribose 1-phosphate",
                "D-xylofuranose",
                "D-xylopyranose",
                "D-xylose",
                "D-xylulose",
                "L-arabinofuranose",
                "L-arabinopyranose",
                "L-arabinose",
                "L-arabinose 1-phosphate",
                "L-lyxofuranose",
                "L-lyxopyranose",
                "L-lyxose",
                "L-ribofuranose",
                "L-ribopyranose",
                "L-ribose",
                "L-ribulose",
                "L-xylofuranose",
                "L-xylopyranose",
                "L-xylose",
                "alpha-L-arabinose 1-phosphate",
                "arabinose",
                "beta-D-arabinofuranose",
                "beta-D-lyxopyranose",
                "beta-D-xylofuranose",
                "beta-D-xylose",
                "beta-L-arabinopyranose",
                "beta-L-lyxofuranose",
                "beta-L-ribose",
                "beta-L-xylopyranose",
                "lyxose",
                "ribofuranose",
                "ribopyranose",
                "ribose",
                "ribose monophosphate",
                "ribose phosphate",
                "ribulose",
                "ribulose 1-phosphate",
                "ribulose phosphate",
                "xylose",
                "xylulose"
            ],
            "TP": 17,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "pentose",
        "domain": "chemistry",
        "response": [
            {
                "Referent": "Glucose (C6H12O6)",
                "Canonical name": "Glucose"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Glucose (C6H12O6)"
            ],
            "true_referents": [
                "1-deoxy-D-xylulose",
                "UDP-4-dehydro-6-deoxy-D-glucose"
            ],
            "TP": 0,
            "FP": 1,
            "FN": 0
        }
    }
]