[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Glucokinase (GCK)",
            "Phosphoglycerate Kinase (PGK1)",
            "Phosphofructokinase (PFK1)",
            "Pyruvate Kinase (PKM)",
            "Phosphoglycerate Mutase (PGAM1)",
            "Enolase (ENO1)",
            "Phosphoglucomutase (PGM1)",
            "Triosephosphate Isomerase (TPI1)",
            "Glyceraldehyde-3-Phosphate Dehydrogenase (GAPDH)",
            "Lactate Dehydrogenase (LDHA)",
            "Phosphoglycerate Dehydrogenase (PHGDH)",
            "Phosphoglycerate Kinase 2 (PGK2)",
            "Pyruvate Dehydrogenase E1 Subunit Alpha (PDHA1)",
            "Pyruvate Dehydrogenase E1 Subunit Beta (PDHB)",
            "Pyruvate Dehydrogenase Kinase 1 (PDK1)",
            "Pyruvate Dehydrogenase Phosphatase Catalytic Subunit (PDP1)",
            "Phosphoglycerate Dehydrogenase (PHGDH)",
            "Pyruvate Kinase Liver and RBC (PKLR)",
            "Pyruvate Kinase Muscle (PKM2)",
            "Pyruvate Carboxylase (PC)",
            "Pyruvate Dehydrogenase E1-Component Subunit Alpha (PDHA2)",
            "Pyruvate Dehydrogenase E1-Component Subunit Beta (PDHB)",
            "Pyruvate Dehydrogenase Kinase 4 (PDK4)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Glucokinase (GCK)",
                "Phosphoglycerate Kinase (PGK1)",
                "Phosphofructokinase (PFK1)",
                "Pyruvate Kinase (PKM)",
                "Phosphoglycerate Mutase (PGAM1)",
                "Enolase (ENO1)",
                "Phosphoglucomutase (PGM1)",
                "Triosephosphate Isomerase (TPI1)",
                "Glyceraldehyde-3-Phosphate Dehydrogenase (GAPDH)",
                "Lactate Dehydrogenase (LDHA)",
                "Phosphoglycerate Dehydrogenase (PHGDH)",
                "Phosphoglycerate Kinase 2 (PGK2)",
                "Pyruvate Dehydrogenase E1 Subunit Alpha (PDHA1)",
                "Pyruvate Dehydrogenase E1 Subunit Beta (PDHB)",
                "Pyruvate Dehydrogenase Kinase 1 (PDK1)",
                "Pyruvate Dehydrogenase Phosphatase Catalytic Subunit (PDP1)",
                "Pyruvate Kinase Liver and RBC (PKLR)",
                "Pyruvate Kinase Muscle (PKM2)",
                "Pyruvate Carboxylase (PC)",
                "Pyruvate Dehydrogenase E1-Component Subunit Alpha (PDHA2)",
                "Pyruvate Dehydrogenase E1-Component Subunit Beta (PDHB)",
                "Pyruvate Dehydrogenase Kinase 4 (PDK4)"
            ],
            "mismatches": [],
            "true_referents": [
                "Enolase (ENO1)",
                "Glucokinase (GCK)",
                "Glyceraldehyde-3-Phosphate Dehydrogenase (GAPDH)",
                "Hexokinase (HK)",
                "Lactate Dehydrogenase (LDHA)",
                "Phosphofructokinase (PFK1)",
                "Phosphoglucomutase (PGM1)",
                "Phosphoglycerate Dehydrogenase (PHGDH)",
                "Phosphoglycerate Kinase (PGK1)",
                "Phosphoglycerate Kinase 2 (PGK2)",
                "Phosphoglycerate Mutase (PGAM1)",
                "Pyruvate Carboxylase (PC)",
                "Pyruvate Dehydrogenase E1 Subunit Alpha (PDHA1)",
                "Pyruvate Dehydrogenase E1 Subunit Beta (PDHB)",
                "Pyruvate Dehydrogenase E1-Component Subunit Alpha (PDHA2)",
                "Pyruvate Dehydrogenase E1-Component Subunit Beta (PDHB)",
                "Pyruvate Dehydrogenase Kinase 1 (PDK1)",
                "Pyruvate Dehydrogenase Kinase 4 (PDK4)",
                "Pyruvate Dehydrogenase Phosphatase Catalytic Subunit (PDP1)",
                "Pyruvate Kinase (PKM)",
                "Pyruvate Kinase Liver and RBC (PKLR)",
                "Pyruvate Kinase Muscle (PKM2)",
                "Triosephosphate Isomerase (TPI1)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Phosphoglucose isomerase (PGI)",
            "Phosphofructokinase-1 (PFK-1)",
            "Aldolase (ALDO)",
            "Triosephosphate isomerase (TPI)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGAM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PK)",
            "Glucokinase (GK)",
            "Fructose-1,6-bisphosphate aldolase (ALDOA)",
            "Fructose-bisphosphate aldolase B (ALDOB)",
            "Fructose-bisphosphate aldolase C (ALDOC)",
            "Triose phosphate isomerase A (TPI1)",
            "Triose phosphate isomerase B (TPI2)",
            "Glyceraldehyde-3-phosphate dehydrogenase 1 (GAPDH1)",
            "Glyceraldehyde-3-phosphate dehydrogenase 2 (GAPDH2)",
            "Phosphoglycerate kinase 1 (PGK1)",
            "Phosphoglycerate kinase 2 (PGK2)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphofructokinase-1 (PFK-1)",
                "Aldolase (ALDO)",
                "Triosephosphate isomerase (TPI)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PK)",
                "Glucokinase (GK)",
                "Fructose-1,6-bisphosphate aldolase (ALDOA)",
                "Fructose-bisphosphate aldolase B (ALDOB)",
                "Fructose-bisphosphate aldolase C (ALDOC)",
                "Triose phosphate isomerase A (TPI1)",
                "Triose phosphate isomerase B (TPI2)",
                "Glyceraldehyde-3-phosphate dehydrogenase 1 (GAPDH1)",
                "Glyceraldehyde-3-phosphate dehydrogenase 2 (GAPDH2)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase (ALDO)",
                "Enolase (ENO)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Fructose-1,6-bisphosphate aldolase (ALDOA)",
                "Fructose-bisphosphate aldolase B (ALDOB)",
                "Fructose-bisphosphate aldolase C (ALDOC)",
                "Glucokinase (GK)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Glyceraldehyde-3-phosphate dehydrogenase 1 (GAPDH1)",
                "Glyceraldehyde-3-phosphate dehydrogenase 2 (GAPDH2)",
                "Hexokinase (HK)",
                "Phosphofructokinase-1 (PFK-1)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase (PGAM)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate kinase (PK)",
                "Triose phosphate isomerase A (TPI1)",
                "Triose phosphate isomerase B (TPI2)",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Glucokinase (GCK)",
            "Phosphofructokinase (PFK)",
            "Aldolase (ALDOA)",
            "Triosephosphate isomerase (TPI1)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGAM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PKM)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Fructose-bisphosphate aldolase (ALDOB)",
            "Fructose-bisphosphate aldolase C (ALDOC)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Triosephosphate isomerase 2 (TPI2)",
            "Pyruvate kinase isozyme M2 (PKM2)",
            "Pyruvate kinase isozyme L/R (PKLR)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)",
            "Enolase 3 (ENO3)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDHS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Glucokinase (GCK)",
                "Phosphofructokinase (PFK)",
                "Aldolase (ALDOA)",
                "Triosephosphate isomerase (TPI1)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PKM)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Fructose-bisphosphate aldolase (ALDOB)",
                "Fructose-bisphosphate aldolase C (ALDOC)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Triosephosphate isomerase 2 (TPI2)",
                "Pyruvate kinase isozyme M2 (PKM2)",
                "Pyruvate kinase isozyme L/R (PKLR)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDHS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase (ALDOA)",
                "Enolase (ENO)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Fructose-bisphosphate aldolase (ALDOB)",
                "Fructose-bisphosphate aldolase C (ALDOC)",
                "Glucokinase (GCK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDHS)",
                "Hexokinase (HK)",
                "Phosphofructokinase (PFK)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate kinase (PKM)",
                "Pyruvate kinase isozyme L/R (PKLR)",
                "Pyruvate kinase isozyme M2 (PKM2)",
                "Triosephosphate isomerase (TPI1)",
                "Triosephosphate isomerase 2 (TPI2)"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Phosphofructokinase-1 (PFK1)",
            "Aldolase (ALDOA)",
            "Triosephosphate isomerase (TPI1)",
            "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK1)",
            "Phosphoglycerate mutase (PGM1)",
            "Enolase (ENO1)",
            "Pyruvate kinase (PKM)",
            "Fructose-1,6-bisphosphatase (FBP1)",
            "Glucose-6-phosphatase (G6Pase)",
            "Pyruvate carboxylase (PC)",
            "Phosphoenolpyruvate carboxykinase (PEPCK)",
            "Lactate dehydrogenase A (LDHA)",
            "Aldolase B, Fructose Aldolase (ALDOB)",
            "Pyruvate dehydrogenase kinase 1 (PDK1)",
            "6-Phosphofructo-2-fructokinase (PFKFB3)",
            "Fructose bisphosphate aldolase (ALDOC)",
            "Glycerol-3-phosphate dehydrogenase (GPD1)",
            "Phosphoglycerate dehydrogenase (PHGDH)",
            "Transketolase (TKT)",
            "Transaldolase (TALDO1)",
            "Fumase (FUM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Phosphofructokinase-1 (PFK1)",
                "Aldolase (ALDOA)",
                "Triosephosphate isomerase (TPI1)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK1)",
                "Phosphoglycerate mutase (PGM1)",
                "Enolase (ENO1)",
                "Pyruvate kinase (PKM)",
                "Fructose-1,6-bisphosphatase (FBP1)",
                "Glucose-6-phosphatase (G6Pase)",
                "Pyruvate carboxylase (PC)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)",
                "Lactate dehydrogenase A (LDHA)",
                "Aldolase B, Fructose Aldolase (ALDOB)",
                "Pyruvate dehydrogenase kinase 1 (PDK1)",
                "6-Phosphofructo-2-fructokinase (PFKFB3)",
                "Fructose bisphosphate aldolase (ALDOC)",
                "Glycerol-3-phosphate dehydrogenase (GPD1)",
                "Phosphoglycerate dehydrogenase (PHGDH)",
                "Transketolase (TKT)",
                "Transaldolase (TALDO1)",
                "Fumase (FUM)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphofructo-2-fructokinase (PFKFB3)",
                "Aldolase (ALDOA)",
                "Aldolase B, Fructose Aldolase (ALDOB)",
                "Enolase (ENO1)",
                "Fructose bisphosphate aldolase (ALDOC)",
                "Fructose-1,6-bisphosphatase (FBP1)",
                "Fumase (FUM)",
                "Glucose-6-phosphatase (G6Pase)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Glycerol-3-phosphate dehydrogenase (GPD1)",
                "Hexokinase (HK)",
                "Lactate dehydrogenase A (LDHA)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)",
                "Phosphofructokinase-1 (PFK1)",
                "Phosphoglycerate dehydrogenase (PHGDH)",
                "Phosphoglycerate kinase (PGK1)",
                "Phosphoglycerate mutase (PGM1)",
                "Pyruvate carboxylase (PC)",
                "Pyruvate dehydrogenase kinase 1 (PDK1)",
                "Pyruvate kinase (PKM)",
                "Transaldolase (TALDO1)",
                "Transketolase (TKT)",
                "Triosephosphate isomerase (TPI1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase 1 (HK1)",
            "Hexokinase 2 (HK2)",
            "Hexokinase 3 (HK3)",
            "Hexokinase 4 (HK4 or GCK)",
            "Phosphoglucose isomerase (GPI)",
            "Phosphofructokinase 1 (PFK1)",
            "Phosphofructokinase 2 (PFK2)",
            "Aldolase A (ALDOA)",
            "Aldolase B (ALDOB)",
            "Aldolase C (ALDOC)",
            "Triosephosphate isomerase (TPI1)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase 1 (PGK1)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)",
            "Enolase 3 (ENO3)",
            "Pyruvate kinase M1/2 (PKM)",
            "Pyruvate kinase L/R (PKLR)",
            "Glucokinase (GCK)",
            "6-Phosphofructo-2-kinase (PFKFB1)",
            "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2 (PFKFB2)",
            "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3 (PFKFB3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Hexokinase 4 (HK4 or GCK)",
                "Phosphoglucose isomerase (GPI)",
                "Phosphofructokinase 1 (PFK1)",
                "Phosphofructokinase 2 (PFK2)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Triosephosphate isomerase (TPI1)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Pyruvate kinase M1/2 (PKM)",
                "Pyruvate kinase L/R (PKLR)",
                "Glucokinase (GCK)",
                "6-Phosphofructo-2-kinase (PFKFB1)",
                "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2 (PFKFB2)",
                "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3 (PFKFB3)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-Phosphofructo-2-kinase (PFKFB1)",
                "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2 (PFKFB2)",
                "6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3 (PFKFB3)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Glucokinase (GCK)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Hexokinase 4 (HK4 or GCK)",
                "Phosphofructokinase 1 (PFK1)",
                "Phosphofructokinase 2 (PFK2)",
                "Phosphoglucose isomerase (GPI)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate kinase L/R (PKLR)",
                "Pyruvate kinase M1/2 (PKM)",
                "Triosephosphate isomerase (TPI1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "HK1 (Hexokinase 1)",
            "HK2 (Hexokinase 2)",
            "HK3 (Hexokinase 3)",
            "PKM (Pyruvate Kinase M)",
            "PKLR (Pyruvate Kinase, Liver and Red Blood Cell)",
            "PFKFB1 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 1)",
            "PFKFB2 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2)",
            "PFKFB3 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3)",
            "PFKFB4 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 4)",
            "ALDOA (Aldolase A)",
            "ALDOB (Aldolase B)",
            "ALDOC (Aldolase C)",
            "TPI1 (Triosephosphate Isomerase 1)",
            "GAPDH (Glyceraldehyde-3-phosphate Dehydrogenase)",
            "PGK1 (Phosphoglycerate Kinase 1)",
            "PGAM1 (Phosphoglycerate Mutase 1)",
            "ENO1 (Enolase 1)",
            "ENO2 (Enolase 2)",
            "PKM2 (Pyruvate Kinase M2)",
            "LDHA (Lactate Dehydrogenase A)",
            "LDHB (Lactate Dehydrogenase B)",
            "LDHC (Lactate Dehydrogenase C)",
            "GPI (Glucose-6-Phosphate Isomerase)",
            "PHKA1 (Phosphorylase Kinase Alpha 1)",
            "PHKB (Phosphorylase Kinase Beta)",
            "PHKG1 (Phosphorylase Kinase Gamma 1)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HK1 (Hexokinase 1)",
                "HK2 (Hexokinase 2)",
                "HK3 (Hexokinase 3)",
                "PKM (Pyruvate Kinase M)",
                "PKLR (Pyruvate Kinase, Liver and Red Blood Cell)",
                "PFKFB1 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 1)",
                "PFKFB2 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2)",
                "PFKFB3 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3)",
                "PFKFB4 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 4)",
                "ALDOA (Aldolase A)",
                "ALDOB (Aldolase B)",
                "ALDOC (Aldolase C)",
                "TPI1 (Triosephosphate Isomerase 1)",
                "GAPDH (Glyceraldehyde-3-phosphate Dehydrogenase)",
                "PGK1 (Phosphoglycerate Kinase 1)",
                "PGAM1 (Phosphoglycerate Mutase 1)",
                "ENO1 (Enolase 1)",
                "ENO2 (Enolase 2)",
                "PKM2 (Pyruvate Kinase M2)",
                "LDHA (Lactate Dehydrogenase A)",
                "LDHB (Lactate Dehydrogenase B)",
                "LDHC (Lactate Dehydrogenase C)",
                "GPI (Glucose-6-Phosphate Isomerase)",
                "PHKA1 (Phosphorylase Kinase Alpha 1)"
            ],
            "mismatches": [
                "PHKB (Phosphorylase Kinase Beta)",
                "PHKG1 (Phosphorylase Kinase Gamma 1)"
            ],
            "true_referents": [
                "ALDOA (Aldolase A)",
                "ALDOB (Aldolase B)",
                "ALDOC (Aldolase C)",
                "ENO1 (Enolase 1)",
                "ENO2 (Enolase 2)",
                "GAPDH (Glyceraldehyde-3-phosphate Dehydrogenase)",
                "GPI (Glucose-6-Phosphate Isomerase)",
                "HK1 (Hexokinase 1)",
                "HK2 (Hexokinase 2)",
                "HK3 (Hexokinase 3)",
                "LDHA (Lactate Dehydrogenase A)",
                "LDHB (Lactate Dehydrogenase B)",
                "LDHC (Lactate Dehydrogenase C)",
                "PFKFB1 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 1)",
                "PFKFB2 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 2)",
                "PFKFB3 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 3)",
                "PFKFB4 (6-Phosphofructo-2-kinase/fructose-2,6-bisphosphatase 4)",
                "PGAM1 (Phosphoglycerate Mutase 1)",
                "PGK1 (Phosphoglycerate Kinase 1)",
                "PHKA1 (Phosphorylase Kinase Alpha 1)",
                "PKLR (Pyruvate Kinase, Liver and Red Blood Cell)",
                "PKM (Pyruvate Kinase M)",
                "PKM2 (Pyruvate Kinase M2)",
                "TPI1 (Triosephosphate Isomerase 1)"
            ],
            "TP": 24,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase 1 (HK1)",
            "Hexokinase 2 (HK2)",
            "Hexokinase 3 (HK3)",
            "Glucokinase (GCK)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Phosphofructokinase, liver type (PFKL)",
            "Phosphofructokinase, muscle type (PFKM)",
            "Phosphofructokinase, platelet type (PFKP)",
            "Aldolase A (ALDOA)",
            "Aldolase B (ALDOB)",
            "Aldolase C (ALDOC)",
            "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
            "Triosephosphate isomerase 1 (TPI1)",
            "Phosphoglycerate kinase 1 (PGK1)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)",
            "Enolase 3 (ENO3)",
            "Pyruvate kinase M1/2 (PKM)",
            "Pyruvate kinase L/R (PKLR)",
            "Lactate dehydrogenase A (LDHA)",
            "Lactate dehydrogenase B (LDHB)",
            "Lactate dehydrogenase C (LDHC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Glucokinase (GCK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Phosphofructokinase, liver type (PFKL)",
                "Phosphofructokinase, muscle type (PFKM)",
                "Phosphofructokinase, platelet type (PFKP)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Triosephosphate isomerase 1 (TPI1)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Pyruvate kinase M1/2 (PKM)",
                "Pyruvate kinase L/R (PKLR)",
                "Lactate dehydrogenase A (LDHA)",
                "Lactate dehydrogenase B (LDHB)",
                "Lactate dehydrogenase C (LDHC)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Glucokinase (GCK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Lactate dehydrogenase A (LDHA)",
                "Lactate dehydrogenase B (LDHB)",
                "Lactate dehydrogenase C (LDHC)",
                "Phosphofructokinase, liver type (PFKL)",
                "Phosphofructokinase, muscle type (PFKM)",
                "Phosphofructokinase, platelet type (PFKP)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate kinase L/R (PKLR)",
                "Pyruvate kinase M1/2 (PKM)",
                "Triosephosphate isomerase 1 (TPI1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Phosphofructokinase (PFK)",
            "Aldolase (ALDO)",
            "Triosephosphate isomerase (TPI)",
            "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PK)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Fructose-1,6-bisphosphatase (FBP)",
            "Pyruvate dehydrogenase (PDH)",
            "Phosphoenolpyruvate carboxykinase (PEPCK)",
            "Lactate dehydrogenase (LDH)",
            "Pyruvate carboxylase (PC)",
            "Phosphoenolpyruvate carboxylase (PEPC)",
            "Malate dehydrogenase (MDH)",
            "Isocitrate dehydrogenase (IDH)",
            "Alpha-ketoglutarate dehydrogenase (AKGDH)",
            "Succinyl-CoA synthetase (SCS)",
            "Succinate dehydrogenase (SDH)",
            "Fumarase (FH)",
            "Malate dehydrogenase (MDH)",
            "Citrate synthase (CS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Phosphofructokinase (PFK)",
                "Aldolase (ALDO)",
                "Triosephosphate isomerase (TPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Fructose-1,6-bisphosphatase (FBP)",
                "Pyruvate dehydrogenase (PDH)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)",
                "Lactate dehydrogenase (LDH)",
                "Pyruvate carboxylase (PC)",
                "Phosphoenolpyruvate carboxylase (PEPC)",
                "Malate dehydrogenase (MDH)",
                "Isocitrate dehydrogenase (IDH)",
                "Alpha-ketoglutarate dehydrogenase (AKGDH)",
                "Succinyl-CoA synthetase (SCS)",
                "Succinate dehydrogenase (SDH)",
                "Fumarase (FH)",
                "Citrate synthase (CS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase (ALDO)",
                "Alpha-ketoglutarate dehydrogenase (AKGDH)",
                "Citrate synthase (CS)",
                "Enolase (ENO)",
                "Fructose-1,6-bisphosphatase (FBP)",
                "Fumarase (FH)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase (HK)",
                "Isocitrate dehydrogenase (IDH)",
                "Lactate dehydrogenase (LDH)",
                "Malate dehydrogenase (MDH)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)",
                "Phosphoenolpyruvate carboxylase (PEPC)",
                "Phosphofructokinase (PFK)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGM)",
                "Pyruvate carboxylase (PC)",
                "Pyruvate dehydrogenase (PDH)",
                "Pyruvate kinase (PK)",
                "Succinate dehydrogenase (SDH)",
                "Succinyl-CoA synthetase (SCS)",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase 1 (HK1)",
            "Hexokinase 2 (HK2)",
            "Hexokinase 3 (HK3)",
            "Glucokinase (GCK)",
            "Phosphoglucose isomerase (GPI)",
            "Phosphofructokinase, muscle type (PFKM)",
            "Phosphofructokinase, liver type (PFKL)",
            "Phosphofructokinase, platelet type (PFKP)",
            "Aldolase A (ALDOA)",
            "Aldolase B (ALDOB)",
            "Aldolase C (ALDOC)",
            "Triose phosphate isomerase 1 (TPI1)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase 1 (PGK1)",
            "Phosphoglycerate kinase 2 (PGK2)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Phosphoglycerate mutase 4 (PGAM4)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)",
            "Enolase 3 (ENO3)",
            "Pyruvate kinase, muscle isoform (PKM)",
            "Pyruvate kinase, liver and red blood cell isoform (PKLR)",
            "Pyruvate kinase-related isoform (PKR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Glucokinase (GCK)",
                "Phosphoglucose isomerase (GPI)",
                "Phosphofructokinase, muscle type (PFKM)",
                "Phosphofructokinase, liver type (PFKL)",
                "Phosphofructokinase, platelet type (PFKP)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Triose phosphate isomerase 1 (TPI1)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Phosphoglycerate mutase 4 (PGAM4)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Pyruvate kinase, muscle isoform (PKM)",
                "Pyruvate kinase, liver and red blood cell isoform (PKLR)",
                "Pyruvate kinase-related isoform (PKR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Glucokinase (GCK)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Phosphofructokinase, liver type (PFKL)",
                "Phosphofructokinase, muscle type (PFKM)",
                "Phosphofructokinase, platelet type (PFKP)",
                "Phosphoglucose isomerase (GPI)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Phosphoglycerate mutase 4 (PGAM4)",
                "Pyruvate kinase, liver and red blood cell isoform (PKLR)",
                "Pyruvate kinase, muscle isoform (PKM)",
                "Pyruvate kinase-related isoform (PKR)",
                "Triose phosphate isomerase 1 (TPI1)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Phosphofructokinase (PFK)",
            "Aldolase (ALDOA)",
            "Triosephosphate isomerase (TPI1)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGAM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PK)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Phosphofructokinase (PFK)",
                "Aldolase (ALDOA)",
                "Triosephosphate isomerase (TPI1)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PK)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase (ALDOA)",
                "Enolase (ENO)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase (HK)",
                "Phosphofructokinase (PFK)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Pyruvate kinase (PK)",
                "Triosephosphate isomerase (TPI1)"
            ],
            "TP": 10,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Glucose-6-phosphate isomerase (GPI)",
            "Phosphofructokinase (PFK)",
            "Fructose-bisphosphate aldolase (ALDO)",
            "Triosephosphate isomerase (TPI)",
            "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGAM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PK)",
            "Lactate dehydrogenase (LDH)",
            "Glucose-6-phosphatase (G6PC)",
            "Fructose-1,6-bisphosphatase (FBP)",
            "Phosphoenolpyruvate carboxykinase (PCK)",
            "Pyruvate dehydrogenase complex (PDC)",
            "Dihydrolipoamide acetyltransferase (DLAT)",
            "Dihydrolipoamide dehydrogenase (DLD)",
            "Pyruvate dehydrogenase phosphatase (PDP)",
            "Pyruvate dehydrogenase kinase (PDK)",
            "Aldolase B (ALDOB)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Enolase 3 (ENO3)",
            "Hexokinase domain containing 1 (HKDC1)",
            "Pyruvate kinase L/R (PKLR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Phosphofructokinase (PFK)",
                "Fructose-bisphosphate aldolase (ALDO)",
                "Triosephosphate isomerase (TPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PK)",
                "Lactate dehydrogenase (LDH)",
                "Glucose-6-phosphatase (G6PC)",
                "Fructose-1,6-bisphosphatase (FBP)",
                "Phosphoenolpyruvate carboxykinase (PCK)",
                "Pyruvate dehydrogenase complex (PDC)",
                "Dihydrolipoamide acetyltransferase (DLAT)",
                "Dihydrolipoamide dehydrogenase (DLD)",
                "Pyruvate dehydrogenase phosphatase (PDP)",
                "Pyruvate dehydrogenase kinase (PDK)",
                "Aldolase B (ALDOB)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Enolase 3 (ENO3)",
                "Hexokinase domain containing 1 (HKDC1)",
                "Pyruvate kinase L/R (PKLR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase B (ALDOB)",
                "Dihydrolipoamide acetyltransferase (DLAT)",
                "Dihydrolipoamide dehydrogenase (DLD)",
                "Enolase (ENO)",
                "Enolase 3 (ENO3)",
                "Fructose-1,6-bisphosphatase (FBP)",
                "Fructose-bisphosphate aldolase (ALDO)",
                "Glucose-6-phosphatase (G6PC)",
                "Glucose-6-phosphate isomerase (GPI)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase (HK)",
                "Hexokinase domain containing 1 (HKDC1)",
                "Lactate dehydrogenase (LDH)",
                "Phosphoenolpyruvate carboxykinase (PCK)",
                "Phosphofructokinase (PFK)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate dehydrogenase complex (PDC)",
                "Pyruvate dehydrogenase kinase (PDK)",
                "Pyruvate dehydrogenase phosphatase (PDP)",
                "Pyruvate kinase (PK)",
                "Pyruvate kinase L/R (PKLR)",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "hexokinase gene (HK)",
            "glucose-6-phosphate isomerase gene (GPI)",
            "phosphofructokinase gene (PFK)",
            "fructose-bisphosphate aldolase gene (ALDO)",
            "triosephosphate isomerase gene (TPI)",
            "glyceraldehyde-3-phosphate dehydrogenase gene (GAPDH)",
            "phosphoglycerate kinase gene (PGK)",
            "phosphoglycerate mutase gene (PGM)",
            "enolase gene (ENO)",
            "pyruvate kinase gene (PK)",
            "glucose-6-phosphate dehydrogenase gene (G6PD)",
            "6-phosphogluconate dehydrogenase gene (6PGD)",
            "ribose-5-phosphate isomerase gene (RPI)",
            "ribulose-5-phosphate epimerase gene (RPE)",
            "transketolase gene (TKT)",
            "transaldolase gene (TALDO)",
            "phosphoglucose isomerase gene (PGI)",
            "phosphoglucomutase gene (PGM)",
            "UDP-glucose pyrophosphorylase gene (UGP)",
            "phosphoglucosamine mutase gene (PNGM)",
            "N-acetylglucosamine-6-phosphate deacetylase gene (NAGD)",
            "glucosamine-6-phosphate deaminase gene (GNPDA)",
            "phosphoacetylglucosamine mutase gene (PAGM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "hexokinase gene (HK)",
                "glucose-6-phosphate isomerase gene (GPI)",
                "phosphofructokinase gene (PFK)",
                "fructose-bisphosphate aldolase gene (ALDO)",
                "triosephosphate isomerase gene (TPI)",
                "glyceraldehyde-3-phosphate dehydrogenase gene (GAPDH)",
                "phosphoglycerate kinase gene (PGK)",
                "phosphoglycerate mutase gene (PGM)",
                "enolase gene (ENO)",
                "pyruvate kinase gene (PK)",
                "glucose-6-phosphate dehydrogenase gene (G6PD)",
                "6-phosphogluconate dehydrogenase gene (6PGD)",
                "ribose-5-phosphate isomerase gene (RPI)",
                "ribulose-5-phosphate epimerase gene (RPE)",
                "transketolase gene (TKT)",
                "transaldolase gene (TALDO)",
                "phosphoglucose isomerase gene (PGI)",
                "phosphoglucomutase gene (PGM)",
                "UDP-glucose pyrophosphorylase gene (UGP)",
                "phosphoglucosamine mutase gene (PNGM)",
                "N-acetylglucosamine-6-phosphate deacetylase gene (NAGD)",
                "glucosamine-6-phosphate deaminase gene (GNPDA)",
                "phosphoacetylglucosamine mutase gene (PAGM)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-phosphogluconate dehydrogenase gene (6PGD)",
                "N-acetylglucosamine-6-phosphate deacetylase gene (NAGD)",
                "UDP-glucose pyrophosphorylase gene (UGP)",
                "enolase gene (ENO)",
                "fructose-bisphosphate aldolase gene (ALDO)",
                "glucosamine-6-phosphate deaminase gene (GNPDA)",
                "glucose-6-phosphate dehydrogenase gene (G6PD)",
                "glucose-6-phosphate isomerase gene (GPI)",
                "glyceraldehyde-3-phosphate dehydrogenase gene (GAPDH)",
                "hexokinase gene (HK)",
                "phosphoacetylglucosamine mutase gene (PAGM)",
                "phosphofructokinase gene (PFK)",
                "phosphoglucomutase gene (PGM)",
                "phosphoglucosamine mutase gene (PNGM)",
                "phosphoglucose isomerase gene (PGI)",
                "phosphoglycerate kinase gene (PGK)",
                "phosphoglycerate mutase gene (PGM)",
                "pyruvate kinase gene (PK)",
                "ribose-5-phosphate isomerase gene (RPI)",
                "ribulose-5-phosphate epimerase gene (RPE)",
                "transaldolase gene (TALDO)",
                "transketolase gene (TKT)",
                "triosephosphate isomerase gene (TPI)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase (HK)",
            "Phosphoglucose isomerase (PGI)",
            "Phosphofructokinase (PFK)",
            "Aldolase (ALDO)",
            "Triosephosphate isomerase (TPI)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGAM)",
            "Enolase (ENO)",
            "Pyruvate kinase (PK)",
            "Glucose-6-phosphate dehydrogenase (G6PD)",
            "6-phosphogluconate dehydrogenase (6PGD)",
            "Transketolase (TKT)",
            "Transaldolase (TALDO)",
            "Acetyl-CoA carboxylase (ACC)",
            "Fatty acid synthase (FASN)",
            "Pyruvate dehydrogenase (PDH)",
            "Citrate synthase (CS)",
            "Aconitase (ACO)",
            "Isocitrate dehydrogenase (IDH)",
            "Alpha-ketoglutarate dehydrogenase (OGDH)",
            "Succinyl-CoA synthetase (SUCLG)",
            "Fumarase (FH)",
            "Malate dehydrogenase (MDH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase (HK)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphofructokinase (PFK)",
                "Aldolase (ALDO)",
                "Triosephosphate isomerase (TPI)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Enolase (ENO)",
                "Pyruvate kinase (PK)",
                "Glucose-6-phosphate dehydrogenase (G6PD)",
                "6-phosphogluconate dehydrogenase (6PGD)",
                "Transketolase (TKT)",
                "Transaldolase (TALDO)",
                "Acetyl-CoA carboxylase (ACC)",
                "Fatty acid synthase (FASN)",
                "Pyruvate dehydrogenase (PDH)",
                "Citrate synthase (CS)",
                "Aconitase (ACO)",
                "Isocitrate dehydrogenase (IDH)",
                "Alpha-ketoglutarate dehydrogenase (OGDH)",
                "Succinyl-CoA synthetase (SUCLG)",
                "Fumarase (FH)",
                "Malate dehydrogenase (MDH)"
            ],
            "mismatches": [],
            "true_referents": [
                "6-phosphogluconate dehydrogenase (6PGD)",
                "Acetyl-CoA carboxylase (ACC)",
                "Aconitase (ACO)",
                "Aldolase (ALDO)",
                "Alpha-ketoglutarate dehydrogenase (OGDH)",
                "Citrate synthase (CS)",
                "Enolase (ENO)",
                "Fatty acid synthase (FASN)",
                "Fumarase (FH)",
                "Glucose-6-phosphate dehydrogenase (G6PD)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase (HK)",
                "Isocitrate dehydrogenase (IDH)",
                "Malate dehydrogenase (MDH)",
                "Phosphofructokinase (PFK)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGAM)",
                "Pyruvate dehydrogenase (PDH)",
                "Pyruvate kinase (PK)",
                "Succinyl-CoA synthetase (SUCLG)",
                "Transaldolase (TALDO)",
                "Transketolase (TKT)",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Glucose-6-phosphatase (G6Pase) (G6Pase)",
            "Phosphoglucomutase (PGM)",
            "Phosphoglucose isomerase (PGI)",
            "Phosphofructokinase (PFK)",
            "Fructose-1,6-bisphosphatase (FBPase)",
            "Triosephosphate isomerase (TPI)",
            "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase (PGK)",
            "Phosphoglycerate mutase (PGM)",
            "Enolase (ENO)",
            "Lactate dehydrogenase (LDH)",
            "Pyruvate kinase (PK)",
            "Pyruvate carboxylase (PC)",
            "Malate dehydrogenase (MDH)",
            "Isocitrate dehydrogenase (IDH)",
            "Alpha-ketoglutarate dehydrogenase (AKGDH)",
            "Succinyl-CoA synthetase (SCS)",
            "Succinate dehydrogenase (SDH)",
            "Fumarase (FUM)",
            "MALATE SYNTHASE (MLS)",
            "Malic enzyme (ME)",
            "Aconitase (ACO)",
            "Isocitrate lyase (ICL)",
            "Phosphoenolpyruvate carboxykinase (PEPCK)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glucose-6-phosphatase (G6Pase) (G6Pase)",
                "Phosphoglucomutase (PGM)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphofructokinase (PFK)",
                "Fructose-1,6-bisphosphatase (FBPase)",
                "Triosephosphate isomerase (TPI)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGM)",
                "Enolase (ENO)",
                "Lactate dehydrogenase (LDH)",
                "Pyruvate kinase (PK)",
                "Pyruvate carboxylase (PC)",
                "Malate dehydrogenase (MDH)",
                "Isocitrate dehydrogenase (IDH)",
                "Alpha-ketoglutarate dehydrogenase (AKGDH)",
                "Succinyl-CoA synthetase (SCS)",
                "Succinate dehydrogenase (SDH)",
                "Fumarase (FUM)",
                "MALATE SYNTHASE (MLS)",
                "Malic enzyme (ME)",
                "Aconitase (ACO)",
                "Isocitrate lyase (ICL)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aconitase (ACO)",
                "Alpha-ketoglutarate dehydrogenase (AKGDH)",
                "Enolase (ENO)",
                "Fructose-1,6-bisphosphatase (FBPase)",
                "Fumarase (FUM)",
                "Glucose-6-phosphatase (G6Pase) (G6Pase)",
                "Glyceraldehyde 3-phosphate dehydrogenase (GAPDH)",
                "Isocitrate dehydrogenase (IDH)",
                "Isocitrate lyase (ICL)",
                "Lactate dehydrogenase (LDH)",
                "MALATE SYNTHASE (MLS)",
                "Malate dehydrogenase (MDH)",
                "Malic enzyme (ME)",
                "Phosphoenolpyruvate carboxykinase (PEPCK)",
                "Phosphofructokinase (PFK)",
                "Phosphoglucomutase (PGM)",
                "Phosphoglucose isomerase (PGI)",
                "Phosphoglycerate kinase (PGK)",
                "Phosphoglycerate mutase (PGM)",
                "Pyruvate carboxylase (PC)",
                "Pyruvate kinase (PK)",
                "Succinate dehydrogenase (SDH)",
                "Succinyl-CoA synthetase (SCS)",
                "Triosephosphate isomerase (TPI)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": [
            "Hexokinase 1 (HK1)",
            "Hexokinase 2 (HK2)",
            "Hexokinase 3 (HK3)",
            "Glucokinase (GCK)",
            "Glucose-6-phosphatase (G6PC)",
            "Fructose-1,6-bisphosphatase 1 (FBP1)",
            "Fructose-1,6-bisphosphatase 2 (FBP2)",
            "Aldolase A (ALDOA)",
            "Aldolase B (ALDOB)",
            "Aldolase C (ALDOC)",
            "Triosephosphate isomerase 1 (TPI1)",
            "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
            "Phosphoglycerate kinase 1 (PGK1)",
            "Phosphoglycerate kinase 2 (PGK2)",
            "Phosphoglycerate mutase 1 (PGAM1)",
            "Phosphoglycerate mutase 2 (PGAM2)",
            "Enolase 1 (ENO1)",
            "Enolase 2 (ENO2)",
            "Enolase 3 (ENO3)",
            "Pyruvate kinase M1 (PKM)",
            "Pyruvate kinase M2 (PKM2)",
            "Pyruvate kinase L (PKL)",
            "Pyruvate kinase R (PKR)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Glucokinase (GCK)",
                "Glucose-6-phosphatase (G6PC)",
                "Fructose-1,6-bisphosphatase 1 (FBP1)",
                "Fructose-1,6-bisphosphatase 2 (FBP2)",
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Triosephosphate isomerase 1 (TPI1)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Pyruvate kinase M1 (PKM)",
                "Pyruvate kinase M2 (PKM2)",
                "Pyruvate kinase L (PKL)",
                "Pyruvate kinase R (PKR)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aldolase A (ALDOA)",
                "Aldolase B (ALDOB)",
                "Aldolase C (ALDOC)",
                "Enolase 1 (ENO1)",
                "Enolase 2 (ENO2)",
                "Enolase 3 (ENO3)",
                "Fructose-1,6-bisphosphatase 1 (FBP1)",
                "Fructose-1,6-bisphosphatase 2 (FBP2)",
                "Glucokinase (GCK)",
                "Glucose-6-phosphatase (G6PC)",
                "Glyceraldehyde-3-phosphate dehydrogenase (GAPDH)",
                "Hexokinase 1 (HK1)",
                "Hexokinase 2 (HK2)",
                "Hexokinase 3 (HK3)",
                "Phosphoglycerate kinase 1 (PGK1)",
                "Phosphoglycerate kinase 2 (PGK2)",
                "Phosphoglycerate mutase 1 (PGAM1)",
                "Phosphoglycerate mutase 2 (PGAM2)",
                "Pyruvate kinase L (PKL)",
                "Pyruvate kinase M1 (PKM)",
                "Pyruvate kinase M2 (PKM2)",
                "Pyruvate kinase R (PKR)",
                "Triosephosphate isomerase 1 (TPI1)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "glycolysis pathway gene",
        "domain": "biology",
        "response": "{\n    \"Referent\": \"Glucose-6-phosphate dehydrogenase\",\n    \"Canonical name\": \"G6PDH\"\n},\n{\n    \"Referent\": \"Phosphoglycerate kinase\",\n    \"Canonical name\": \"PGK\"\n},\n{\n    \"Referent\": \"Pyruvate kinase\",\n    \"Canonical name\": \"PK\"\n},\n{\n    \"Referent\": \"Triosephosphate isomerase\",\n    \"Canonical name\": \"TPI\"\n}",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]