[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Cystinuria",
            "Alkaptonuria",
            "Argininemia",
            "Citrullinemia",
            "Methylmalonic acidemia",
            "Propionic acidemia",
            "Isovaleric acidemia",
            "Holocarboxylase synthetase deficiency",
            "3-Methylcrotonyl-CoA carboxylase deficiency",
            "Glutaric aciduria type I",
            "Tyrosinosis",
            "Cystathionine beta-synthase deficiency",
            "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
            "Hypermethioninemia",
            "Homocystinuria due to cobalamin C defect",
            "Homocystinuria due to cobalamin F defect",
            "Homocystinuria due to cobalamin E defect",
            "Homocystinuria due to cobalamin D defect",
            "Homocystinuria due to methylenetetrahydrofolate reductase deficiency",
            "Homocystinuria due to transcobalamin II deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Cystinuria",
                "Alkaptonuria",
                "Argininemia",
                "Citrullinemia",
                "Methylmalonic acidemia",
                "Propionic acidemia",
                "Isovaleric acidemia",
                "Holocarboxylase synthetase deficiency",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Glutaric aciduria type I",
                "Tyrosinosis",
                "Cystathionine beta-synthase deficiency",
                "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
                "Hypermethioninemia",
                "Homocystinuria due to cobalamin C defect",
                "Homocystinuria due to cobalamin F defect",
                "Homocystinuria due to cobalamin E defect",
                "Homocystinuria due to cobalamin D defect",
                "Homocystinuria due to methylenetetrahydrofolate reductase deficiency",
                "Homocystinuria due to transcobalamin II deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Alkaptonuria",
                "Argininemia",
                "Citrullinemia",
                "Cystathionine beta-synthase deficiency",
                "Cystinuria",
                "Glutaric aciduria type I",
                "Holocarboxylase synthetase deficiency",
                "Homocystinuria",
                "Homocystinuria due to cobalamin C defect",
                "Homocystinuria due to cobalamin D defect",
                "Homocystinuria due to cobalamin E defect",
                "Homocystinuria due to cobalamin F defect",
                "Homocystinuria due to methylenetetrahydrofolate reductase deficiency",
                "Homocystinuria due to transcobalamin II deficiency",
                "Hypermethioninemia",
                "Isovaleric acidemia",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
                "Tyrosinemia",
                "Tyrosinosis"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Alkaptonuria",
            "Cystinuria",
            "Methylmalonic Aciduria",
            "Propionic Acidemia",
            "Isovaleric Acidemia",
            "Glutaric Acidemia Type I",
            "Hyperlysinemia",
            "Glycine Encephalopathy (Nonketotic Hyperglycinemia, NKH)",
            "Argininosuccinic Aciduria",
            "Citrullinemia",
            "Orotic Aciduria",
            "Carnitine Deficiency",
            "Hyperprolinemia",
            "Hyperornithinemia",
            "Hyperglycinemia",
            "Glycogen Storage Disease Type III (Cori Disease)",
            "Galactosemia",
            "Fibroblast Growth Factor 21 Deficiency",
            "Cystathionine Beta-Synthase Deficiency",
            "Succinyl-CoA:3-Ketoacid CoA Transferase Deficiency (SCOT Deficiency)",
            "4-Hydroxyphenylpyruvate Dioxygenase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Alkaptonuria",
                "Cystinuria",
                "Methylmalonic Aciduria",
                "Propionic Acidemia",
                "Isovaleric Acidemia",
                "Glutaric Acidemia Type I",
                "Hyperlysinemia",
                "Glycine Encephalopathy (Nonketotic Hyperglycinemia, NKH)",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Orotic Aciduria",
                "Carnitine Deficiency",
                "Hyperprolinemia",
                "Hyperornithinemia",
                "Hyperglycinemia",
                "Glycogen Storage Disease Type III (Cori Disease)",
                "Galactosemia",
                "Fibroblast Growth Factor 21 Deficiency",
                "Cystathionine Beta-Synthase Deficiency",
                "Succinyl-CoA:3-Ketoacid CoA Transferase Deficiency (SCOT Deficiency)"
            ],
            "mismatches": [
                "4-Hydroxyphenylpyruvate Dioxygenase Deficiency"
            ],
            "true_referents": [
                "Alkaptonuria",
                "Argininosuccinic Aciduria",
                "Carnitine Deficiency",
                "Citrullinemia",
                "Cystathionine Beta-Synthase Deficiency",
                "Cystinuria",
                "Fibroblast Growth Factor 21 Deficiency",
                "Galactosemia",
                "Glutaric Acidemia Type I",
                "Glycine Encephalopathy (Nonketotic Hyperglycinemia, NKH)",
                "Glycogen Storage Disease Type III (Cori Disease)",
                "Homocystinuria",
                "Hyperglycinemia",
                "Hyperlysinemia",
                "Hyperornithinemia",
                "Hyperprolinemia",
                "Isovaleric Acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Aciduria",
                "Orotic Aciduria",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Succinyl-CoA:3-Ketoacid CoA Transferase Deficiency (SCOT Deficiency)",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Alkaptonuria",
            "Cystinuria",
            "Hartnup disease",
            "Cystathioninuria",
            "Argininosuccinic aciduria",
            "Arginemia",
            "Citrullinemia",
            "Hyperornithinemia-Homocitrullinuria-Hyperammonemia Syndrome (HHH Syndrome)",
            "Isovaleric acidemia",
            "Methylmalonic acidemia",
            "Propionic acidemia",
            "Glutaric acidemia type I",
            "2-Methylbutyryl-CoA dehydrogenase deficiency",
            "3-Methylcrotonyl-CoA carboxylase deficiency",
            "Beta-ketothiolase deficiency",
            "Dibasic aminoaciduria",
            "Glycine encephalopathy (Nonketotic hyperglycinemia)",
            "Hyperprolinemia",
            "Hyperlysinemia",
            "Hypermethioninemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Alkaptonuria",
                "Cystinuria",
                "Hartnup disease",
                "Cystathioninuria",
                "Argininosuccinic aciduria",
                "Arginemia",
                "Citrullinemia",
                "Hyperornithinemia-Homocitrullinuria-Hyperammonemia Syndrome (HHH Syndrome)",
                "Isovaleric acidemia",
                "Methylmalonic acidemia",
                "Propionic acidemia",
                "Glutaric acidemia type I",
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Beta-ketothiolase deficiency",
                "Dibasic aminoaciduria",
                "Glycine encephalopathy (Nonketotic hyperglycinemia)",
                "Hyperprolinemia",
                "Hyperlysinemia",
                "Hypermethioninemia"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Alkaptonuria",
                "Arginemia",
                "Argininosuccinic aciduria",
                "Beta-ketothiolase deficiency",
                "Citrullinemia",
                "Cystathioninuria",
                "Cystinuria",
                "Dibasic aminoaciduria",
                "Glutaric acidemia type I",
                "Glycine encephalopathy (Nonketotic hyperglycinemia)",
                "Hartnup disease",
                "Homocystinuria",
                "Hyperlysinemia",
                "Hypermethioninemia",
                "Hyperornithinemia-Homocitrullinuria-Hyperammonemia Syndrome (HHH Syndrome)",
                "Hyperprolinemia",
                "Isovaleric acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic acidemia",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria (HCU)",
            "Alkaptonuria (AKU)",
            "Cystinuria (CYST)",
            "Hartnup Disease (HND)",
            "Tyrosinemia Type I (TYR1)",
            "Tyrosinemia Type II (TYR2)",
            "Tyrosinemia Type III (TYR3)",
            "Galactosialidosis (GS)",
            "Hyperlysinemia (HLY)",
            "Hyperargininemia (HAR)",
            "Ornithine Transcarbamylase Deficiency (OTCD)",
            "Citrullinemia Type I (CIT1)",
            "Argininemia (ARG)",
            "Proline Dehydrogenase Deficiency (PDHD)",
            "Glycine Encephalopathy (GNE)",
            "Histidinemia (HIS)",
            "Iminoglycinuria (IGU)",
            "Xanthurenic Aciduria (XAU)",
            "Hypermethioninemia (HMT)",
            "Threoninemia (THR)",
            "Alanine Aminotransferase Deficiency (AATD)",
            "Glutamate Formyltransferase Deficiency (GFTD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria (HCU)",
                "Alkaptonuria (AKU)",
                "Cystinuria (CYST)",
                "Hartnup Disease (HND)",
                "Tyrosinemia Type I (TYR1)",
                "Tyrosinemia Type II (TYR2)",
                "Tyrosinemia Type III (TYR3)",
                "Galactosialidosis (GS)",
                "Hyperlysinemia (HLY)",
                "Hyperargininemia (HAR)",
                "Ornithine Transcarbamylase Deficiency (OTCD)",
                "Citrullinemia Type I (CIT1)",
                "Argininemia (ARG)",
                "Proline Dehydrogenase Deficiency (PDHD)",
                "Glycine Encephalopathy (GNE)",
                "Histidinemia (HIS)",
                "Iminoglycinuria (IGU)",
                "Xanthurenic Aciduria (XAU)",
                "Hypermethioninemia (HMT)",
                "Threoninemia (THR)",
                "Alanine Aminotransferase Deficiency (AATD)",
                "Glutamate Formyltransferase Deficiency (GFTD)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alanine Aminotransferase Deficiency (AATD)",
                "Alkaptonuria (AKU)",
                "Argininemia (ARG)",
                "Citrullinemia Type I (CIT1)",
                "Cystinuria (CYST)",
                "Galactosialidosis (GS)",
                "Glutamate Formyltransferase Deficiency (GFTD)",
                "Glycine Encephalopathy (GNE)",
                "Hartnup Disease (HND)",
                "Histidinemia (HIS)",
                "Homocystinuria (HCU)",
                "Hyperargininemia (HAR)",
                "Hyperlysinemia (HLY)",
                "Hypermethioninemia (HMT)",
                "Iminoglycinuria (IGU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Ornithine Transcarbamylase Deficiency (OTCD)",
                "Phenylketonuria (PKU)",
                "Proline Dehydrogenase Deficiency (PDHD)",
                "Threoninemia (THR)",
                "Tyrosinemia Type I (TYR1)",
                "Tyrosinemia Type II (TYR2)",
                "Tyrosinemia Type III (TYR3)",
                "Xanthurenic Aciduria (XAU)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Alkaptonuria",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Cystinuria",
            "Hartnup Disease",
            "Non-Ketotic Hyperglycinemia",
            "Isovaleric Acidemia",
            "Methylmalonic Acidemia",
            "Propionic Acidemia",
            "Ornithine Transcarbamylase Deficiency (OTC)",
            "Argininosuccinic Aciduria",
            "Citrullinemia",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
            "Glycine Encephalopathy",
            "Histidinemia",
            "Hyperprolinemia Type I",
            "Hyperprolinemia Type II",
            "Argininemia",
            "Cystathionine Beta-Synthase Deficiency",
            "Gyrate Atrophy of the Choroid and Retina"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Alkaptonuria",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Cystinuria",
                "Hartnup Disease",
                "Non-Ketotic Hyperglycinemia",
                "Isovaleric Acidemia",
                "Methylmalonic Acidemia",
                "Propionic Acidemia",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Glycine Encephalopathy",
                "Histidinemia",
                "Hyperprolinemia Type I",
                "Hyperprolinemia Type II",
                "Argininemia",
                "Cystathionine Beta-Synthase Deficiency",
                "Gyrate Atrophy of the Choroid and Retina"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria",
                "Argininemia",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Cystathionine Beta-Synthase Deficiency",
                "Cystinuria",
                "Glycine Encephalopathy",
                "Gyrate Atrophy of the Choroid and Retina",
                "Hartnup Disease",
                "Histidinemia",
                "Homocystinuria",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Hyperprolinemia Type I",
                "Hyperprolinemia Type II",
                "Isovaleric Acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Non-Ketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Cystinuria",
            "Tyrosinemia Type I",
            "Argininemia",
            "Homocystinuria",
            "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
            "Aminoacidopathy due to Glutamate Dehydrogenase Deficiency",
            "Citrullinemia",
            "Glycogen Storage Disease Type I (Von Gierke Disease)",
            "Sodium-dependent neutral amino acid transporter defect",
            "Hartnup Disorder",
            "Lysinuric Protein Intolerance (LPI)",
            "Beta-ketothiolase Deficiency",
            "Aminoaciduria due to renal tubular dysfunction",
            "Acidosis due to Cystathionine Beta-Synthase Deficiency",
            "Glutaric Acidemia Type I",
            "Arginine Vasopressin Nephrogenic Diabetes Insipidus",
            "Dicarboxylic Aciduria",
            "Urea Cycle Disorders",
            "Short Bowel Syndrome with Amino Acid Malabsorption",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome",
            "L-2-Hydroxyglutaric Aciduria",
            "Methylmalonic Acidemia",
            "Isovaleric Acidemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Cystinuria",
                "Tyrosinemia Type I",
                "Argininemia",
                "Homocystinuria",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Aminoacidopathy due to Glutamate Dehydrogenase Deficiency",
                "Citrullinemia",
                "Glycogen Storage Disease Type I (Von Gierke Disease)",
                "Sodium-dependent neutral amino acid transporter defect",
                "Hartnup Disorder",
                "Lysinuric Protein Intolerance (LPI)",
                "Beta-ketothiolase Deficiency",
                "Aminoaciduria due to renal tubular dysfunction",
                "Acidosis due to Cystathionine Beta-Synthase Deficiency",
                "Glutaric Acidemia Type I",
                "Arginine Vasopressin Nephrogenic Diabetes Insipidus",
                "Dicarboxylic Aciduria",
                "Urea Cycle Disorders",
                "Short Bowel Syndrome with Amino Acid Malabsorption",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome",
                "L-2-Hydroxyglutaric Aciduria",
                "Methylmalonic Acidemia"
            ],
            "mismatches": [
                "Isovaleric Acidemia"
            ],
            "true_referents": [
                "Acidosis due to Cystathionine Beta-Synthase Deficiency",
                "Aminoacidopathy due to Glutamate Dehydrogenase Deficiency",
                "Aminoaciduria due to renal tubular dysfunction",
                "Arginine Vasopressin Nephrogenic Diabetes Insipidus",
                "Argininemia",
                "Beta-ketothiolase Deficiency",
                "Citrullinemia",
                "Cystinuria",
                "Dicarboxylic Aciduria",
                "Glutaric Acidemia Type I",
                "Glycogen Storage Disease Type I (Von Gierke Disease)",
                "Hartnup Disorder",
                "Homocystinuria",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome",
                "L-2-Hydroxyglutaric Aciduria",
                "Lysinuric Protein Intolerance (LPI)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Phenylketonuria (PKU)",
                "Short Bowel Syndrome with Amino Acid Malabsorption",
                "Sodium-dependent neutral amino acid transporter defect",
                "Tyrosinemia Type I",
                "Urea Cycle Disorders"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Cystinuria (CYSTIN)",
            "Homocystinuria (HCU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Phenylketonuria (PKU)",
            "Tyrosinemia (TYR)",
            "Alkaptonuria (AKU)",
            "Argininosuccinic Aciduria (ASA)",
            "Citrullinemia (CITR)",
            "Hyperammonemia (HA)",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
            "Hyperphenylalaninemia (HPA)",
            "Ornithine Transcarbamylase Deficiency (OTC)",
            "Propionic Acidemia (PA)",
            "Methylmalonic Acidemia (MMA)",
            "Glutaric Acidemia Type 1 (GA1)",
            "Glutaric Acidemia Type 2 (GA2)",
            "Isovaleric Acidemia (IVA)",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCC)",
            "Oculocutaneous Albinism (OCA)",
            "Hartnup Disease (HND)",
            "Glycine Encephalopathy (GE)",
            "Hyperprolinemia Type 1 (HP1)",
            "Hyperprolinemia Type 2 (HP2)",
            "Nonketotic Hyperglycinemia (NKH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystinuria (CYSTIN)",
                "Homocystinuria (HCU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Phenylketonuria (PKU)",
                "Tyrosinemia (TYR)",
                "Alkaptonuria (AKU)",
                "Argininosuccinic Aciduria (ASA)",
                "Citrullinemia (CITR)",
                "Hyperammonemia (HA)",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Hyperphenylalaninemia (HPA)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Propionic Acidemia (PA)",
                "Methylmalonic Acidemia (MMA)",
                "Glutaric Acidemia Type 1 (GA1)",
                "Glutaric Acidemia Type 2 (GA2)",
                "Isovaleric Acidemia (IVA)",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCC)",
                "Oculocutaneous Albinism (OCA)",
                "Hartnup Disease (HND)",
                "Glycine Encephalopathy (GE)",
                "Hyperprolinemia Type 1 (HP1)",
                "Hyperprolinemia Type 2 (HP2)",
                "Nonketotic Hyperglycinemia (NKH)"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCC)",
                "Alkaptonuria (AKU)",
                "Argininosuccinic Aciduria (ASA)",
                "Citrullinemia (CITR)",
                "Cystinuria (CYSTIN)",
                "Glutaric Acidemia Type 1 (GA1)",
                "Glutaric Acidemia Type 2 (GA2)",
                "Glycine Encephalopathy (GE)",
                "Hartnup Disease (HND)",
                "Homocystinuria (HCU)",
                "Hyperammonemia (HA)",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Hyperphenylalaninemia (HPA)",
                "Hyperprolinemia Type 1 (HP1)",
                "Hyperprolinemia Type 2 (HP2)",
                "Isovaleric Acidemia (IVA)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia (MMA)",
                "Nonketotic Hyperglycinemia (NKH)",
                "Oculocutaneous Albinism (OCA)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia (PA)",
                "Tyrosinemia (TYR)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Cystinuria",
            "Hartnup disease",
            "Tyrosinemia",
            "Alkaptonuria",
            "Argininosuccinic aciduria",
            "Citrullinemia",
            "Histidinemia",
            "Isoleucine hydroxylase deficiency",
            "Leucinosis",
            "Lysinuric protein intolerance",
            "Methylmalonic acidemia",
            "Ornithinemia",
            "Phenylalanine hydroxylase deficiency",
            "Propionic acidemia",
            "Tryptophanuria",
            "Tyrosine aminotransferase deficiency",
            "Tyrosinemia type I",
            "Tyrosinemia type II",
            "Tyrosinemia type III",
            "Tyrosinemia type IV",
            "Urea cycle disorders",
            "Valinemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Cystinuria",
                "Hartnup disease",
                "Tyrosinemia",
                "Alkaptonuria",
                "Argininosuccinic aciduria",
                "Citrullinemia",
                "Histidinemia",
                "Isoleucine hydroxylase deficiency",
                "Leucinosis",
                "Lysinuric protein intolerance",
                "Methylmalonic acidemia",
                "Ornithinemia",
                "Phenylalanine hydroxylase deficiency",
                "Propionic acidemia",
                "Tryptophanuria",
                "Tyrosine aminotransferase deficiency",
                "Tyrosinemia type I",
                "Tyrosinemia type II",
                "Tyrosinemia type III",
                "Tyrosinemia type IV",
                "Urea cycle disorders"
            ],
            "mismatches": [
                "Valinemia"
            ],
            "true_referents": [
                "Alkaptonuria",
                "Argininosuccinic aciduria",
                "Citrullinemia",
                "Cystinuria",
                "Hartnup disease",
                "Histidinemia",
                "Homocystinuria",
                "Isoleucine hydroxylase deficiency",
                "Leucinosis",
                "Lysinuric protein intolerance",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Ornithinemia",
                "Phenylalanine hydroxylase deficiency",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Tryptophanuria",
                "Tyrosine aminotransferase deficiency",
                "Tyrosinemia",
                "Tyrosinemia type I",
                "Tyrosinemia type II",
                "Tyrosinemia type III",
                "Tyrosinemia type IV",
                "Urea cycle disorders"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Citrullinemia",
            "Alkaptonuria",
            "Hartnup Disease",
            "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
            "Cystinuria",
            "Lysinuric Protein Intolerance",
            "Glutaric Acidemia Type I",
            "Methylmalonic Acidemia (MMA)",
            "Propionic Acidemia",
            "Isovaleric Acidemia",
            "Argininosuccinic Aciduria",
            "Serine Deficiency Disorders",
            "Hyperphenylalanemia",
            "Tetrahydropteridine Deficiency",
            "SLC7A9-related Cystinuria",
            "Branched-Chain Alpha-Ketoacid Dehydrogenase Complex Deficiency",
            "Nonketotic Hyperglycinemia",
            "Pyruvoyl-tetrahydropterin Synthase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Citrullinemia",
                "Alkaptonuria",
                "Hartnup Disease",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Cystinuria",
                "Lysinuric Protein Intolerance",
                "Glutaric Acidemia Type I",
                "Methylmalonic Acidemia (MMA)",
                "Propionic Acidemia",
                "Isovaleric Acidemia",
                "Argininosuccinic Aciduria",
                "Serine Deficiency Disorders",
                "Hyperphenylalanemia",
                "Tetrahydropteridine Deficiency",
                "SLC7A9-related Cystinuria",
                "Branched-Chain Alpha-Ketoacid Dehydrogenase Complex Deficiency",
                "Nonketotic Hyperglycinemia",
                "Pyruvoyl-tetrahydropterin Synthase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria",
                "Argininosuccinic Aciduria",
                "Branched-Chain Alpha-Ketoacid Dehydrogenase Complex Deficiency",
                "Citrullinemia",
                "Cystinuria",
                "Glutaric Acidemia Type I",
                "Hartnup Disease",
                "Homocystinuria",
                "Hyperphenylalanemia",
                "Isovaleric Acidemia",
                "Lysinuric Protein Intolerance",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia (MMA)",
                "Nonketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Pyruvoyl-tetrahydropterin Synthase Deficiency",
                "SLC7A9-related Cystinuria",
                "Serine Deficiency Disorders",
                "Tetrahydropteridine Deficiency",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Alkaptonuria",
            "Argininemia",
            "Citrullinemia",
            "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
            "Carbamoyl Phosphate Synthetase I Deficiency (CPS I Deficiency)",
            "Arginase Deficiency",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
            "Isovaleric Acidemia",
            "Propionic Acidemia",
            "Methylmalonic Acidemia",
            "Glutaric Acidemia Type I",
            "Lysinuric Protein Intolerance (LPI)",
            "Hartnup Disease",
            "Cystinuria",
            "Histidinemia",
            "Non-Ketotic Hyperglycinemia (NKH)",
            "Hyperprolinemia",
            "Hyperlysinemia",
            "Dicarboxylic Aminoaciduria",
            "Iminoglycinuria"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Alkaptonuria",
                "Argininemia",
                "Citrullinemia",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS I Deficiency)",
                "Arginase Deficiency",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
                "Isovaleric Acidemia",
                "Propionic Acidemia",
                "Methylmalonic Acidemia",
                "Glutaric Acidemia Type I",
                "Lysinuric Protein Intolerance (LPI)",
                "Hartnup Disease",
                "Cystinuria",
                "Histidinemia",
                "Non-Ketotic Hyperglycinemia (NKH)",
                "Hyperprolinemia",
                "Hyperlysinemia",
                "Dicarboxylic Aminoaciduria",
                "Iminoglycinuria"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria",
                "Arginase Deficiency",
                "Argininemia",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS I Deficiency)",
                "Citrullinemia",
                "Cystinuria",
                "Dicarboxylic Aminoaciduria",
                "Glutaric Acidemia Type I",
                "Hartnup Disease",
                "Histidinemia",
                "Homocystinuria",
                "Hyperlysinemia",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
                "Hyperprolinemia",
                "Iminoglycinuria",
                "Isovaleric Acidemia",
                "Lysinuric Protein Intolerance (LPI)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Non-Ketotic Hyperglycinemia (NKH)",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia type I",
            "Alkaptonuria",
            "Argininosuccinic aciduria",
            "Citrullinemia type I",
            "Hartnup disease",
            "Cystinuria",
            "Glycine encephalopathy",
            "Propionic acidemia",
            "Methylmalonic acidemia",
            "Isovaleric acidemia",
            "Glutaric acidemia type I",
            "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
            "Argininemia",
            "Hyperlysinemia",
            "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
            "Lysinuric protein intolerance",
            "Dihydrolipoamide dehydrogenase (E3) deficiency",
            "Branched-chain ketoacid dehydrogenase kinase (BCKDK) deficiency",
            "Hyperprolinemia type I",
            "Hyperprolinemia type II",
            "Sarcosinemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia type I",
                "Alkaptonuria",
                "Argininosuccinic aciduria",
                "Citrullinemia type I",
                "Hartnup disease",
                "Cystinuria",
                "Glycine encephalopathy",
                "Propionic acidemia",
                "Methylmalonic acidemia",
                "Isovaleric acidemia",
                "Glutaric acidemia type I",
                "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
                "Argininemia",
                "Hyperlysinemia",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Lysinuric protein intolerance",
                "Dihydrolipoamide dehydrogenase (E3) deficiency",
                "Branched-chain ketoacid dehydrogenase kinase (BCKDK) deficiency",
                "Hyperprolinemia type I",
                "Hyperprolinemia type II",
                "Sarcosinemia"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
                "Alkaptonuria",
                "Argininemia",
                "Argininosuccinic aciduria",
                "Branched-chain ketoacid dehydrogenase kinase (BCKDK) deficiency",
                "Citrullinemia type I",
                "Cystinuria",
                "Dihydrolipoamide dehydrogenase (E3) deficiency",
                "Glutaric acidemia type I",
                "Glycine encephalopathy",
                "Hartnup disease",
                "Homocystinuria",
                "Hyperlysinemia",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Hyperprolinemia type I",
                "Hyperprolinemia type II",
                "Isovaleric acidemia",
                "Lysinuric protein intolerance",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Sarcosinemia",
                "Tyrosinemia type I"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Alkaptonuria",
            "Tyrosinemia",
            "Glycine encephalopathy",
            "Cystinuria",
            "Methylmalonic acidemia",
            "Propionic acidemia",
            "Isovaleric acidemia",
            "Glutaric aciduria type 1",
            "Argininemia",
            "Citrullinemia",
            "Ornithine transcarbamylase deficiency",
            "Carbamoyl phosphate synthetase I deficiency",
            "Lysinuric protein intolerance",
            "Hartnup disease",
            "Cystathionine beta-synthase deficiency",
            "Iminoglycinuria",
            "Hyperornithinemia-hyperammonemia-homocitrullinuria syndrome",
            "Glutamine synthetase deficiency",
            "Glutaric aciduria type 2",
            "3-Methylcrotonyl-CoA carboxylase deficiency",
            "Branched-chain ketoacid dehydrogenase deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Alkaptonuria",
                "Tyrosinemia",
                "Glycine encephalopathy",
                "Cystinuria",
                "Methylmalonic acidemia",
                "Propionic acidemia",
                "Isovaleric acidemia",
                "Glutaric aciduria type 1",
                "Argininemia",
                "Citrullinemia",
                "Ornithine transcarbamylase deficiency",
                "Carbamoyl phosphate synthetase I deficiency",
                "Lysinuric protein intolerance",
                "Hartnup disease",
                "Cystathionine beta-synthase deficiency",
                "Iminoglycinuria",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria syndrome",
                "Glutamine synthetase deficiency",
                "Glutaric aciduria type 2",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Branched-chain ketoacid dehydrogenase deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Alkaptonuria",
                "Argininemia",
                "Branched-chain ketoacid dehydrogenase deficiency",
                "Carbamoyl phosphate synthetase I deficiency",
                "Citrullinemia",
                "Cystathionine beta-synthase deficiency",
                "Cystinuria",
                "Glutamine synthetase deficiency",
                "Glutaric aciduria type 1",
                "Glutaric aciduria type 2",
                "Glycine encephalopathy",
                "Hartnup disease",
                "Homocystinuria",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria syndrome",
                "Iminoglycinuria",
                "Isovaleric acidemia",
                "Lysinuric protein intolerance",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Ornithine transcarbamylase deficiency",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Alkaptonuria",
            "Cystinuria",
            "Argininemia",
            "Argininosuccinic Aciduria",
            "Citrullinemia",
            "Hyperammonemia",
            "Hyperprolinemia",
            "Hyperlysinemia",
            "Hypervalinemia",
            "Hyperleucine-Isoleucinemia",
            "Hypermethioninemia",
            "Hyperornithinemia",
            "Hyperargininemia",
            "Glutaric Acidemia Type I",
            "Isovaleric Acidemia",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency",
            "Propionic Acidemia",
            "Methylmalonic Acidemia",
            "Holocarboxylase Synthetase Deficiency",
            "Biotinidase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Alkaptonuria",
                "Cystinuria",
                "Argininemia",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Hyperammonemia",
                "Hyperprolinemia",
                "Hyperlysinemia",
                "Hypervalinemia",
                "Hyperleucine-Isoleucinemia",
                "Hypermethioninemia",
                "Hyperornithinemia",
                "Hyperargininemia",
                "Glutaric Acidemia Type I",
                "Isovaleric Acidemia",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Propionic Acidemia",
                "Methylmalonic Acidemia",
                "Holocarboxylase Synthetase Deficiency",
                "Biotinidase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Alkaptonuria",
                "Argininemia",
                "Argininosuccinic Aciduria",
                "Biotinidase Deficiency",
                "Citrullinemia",
                "Cystinuria",
                "Glutaric Acidemia Type I",
                "Holocarboxylase Synthetase Deficiency",
                "Homocystinuria",
                "Hyperammonemia",
                "Hyperargininemia",
                "Hyperleucine-Isoleucinemia",
                "Hyperlysinemia",
                "Hypermethioninemia",
                "Hyperornithinemia",
                "Hyperprolinemia",
                "Hypervalinemia",
                "Isovaleric Acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Aminoacyl-tRNA Synthetase Deficiency (AARS Deficiency)",
            "Aminoaciduria",
            "Aminoacidurias",
            "Aromatic Amino Acid Decarboxylase Deficiency",
            "Aromatic Amino Acid Hydroxylase Deficiency",
            "Branched-Chain Ketoacid Dehydrogenase Deficiency",
            "Citrullinemia",
            "Citrullinemia Type I",
            "Citrullinemia Type II",
            "Cystinuria",
            "Cystinuria Type I",
            "Cystinuria Type II",
            "Cystinuria Type III",
            "Glutamic Acidemia Type I",
            "Glutamic Acidemia Type II",
            "Glutamic Acidemia Type III",
            "Glutathione Synthetase Deficiency",
            "Hartnup Disorder",
            "Hartnup Disease",
            "Hartnup Syndrome",
            "Hyperammonemia",
            "Hyperammonemia Type I",
            "Hyperammonemia Type II",
            "Hyperammonemia Type III",
            "Maple Syrup Urine Disease",
            "Maple Syrup Urine Disease Type I",
            "Maple Syrup Urine Disease Type II",
            "Maple Syrup Urine Disease Type III",
            "Maple Syrup Urine Disease Type IV",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aminoacyl-tRNA Synthetase Deficiency (AARS Deficiency)",
                "Aminoaciduria",
                "Aminoacidurias",
                "Aromatic Amino Acid Decarboxylase Deficiency",
                "Aromatic Amino Acid Hydroxylase Deficiency",
                "Branched-Chain Ketoacid Dehydrogenase Deficiency",
                "Citrullinemia",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Cystinuria",
                "Cystinuria Type I",
                "Cystinuria Type II",
                "Cystinuria Type III",
                "Glutamic Acidemia Type I",
                "Glutamic Acidemia Type II",
                "Glutamic Acidemia Type III",
                "Glutathione Synthetase Deficiency",
                "Hartnup Disorder",
                "Hartnup Disease",
                "Hartnup Syndrome",
                "Hyperammonemia",
                "Hyperammonemia Type I",
                "Hyperammonemia Type II",
                "Hyperammonemia Type III"
            ],
            "mismatches": [
                "Maple Syrup Urine Disease",
                "Maple Syrup Urine Disease Type I",
                "Maple Syrup Urine Disease Type II",
                "Maple Syrup Urine Disease Type III",
                "Maple Syrup Urine Disease Type IV",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "true_referents": [
                "Aminoaciduria",
                "Aminoacidurias",
                "Aminoacyl-tRNA Synthetase Deficiency (AARS Deficiency)",
                "Aromatic Amino Acid Decarboxylase Deficiency",
                "Aromatic Amino Acid Hydroxylase Deficiency",
                "Branched-Chain Ketoacid Dehydrogenase Deficiency",
                "Citrullinemia",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Cystinuria",
                "Cystinuria Type I",
                "Cystinuria Type II",
                "Cystinuria Type III",
                "Glutamic Acidemia Type I",
                "Glutamic Acidemia Type II",
                "Glutamic Acidemia Type III",
                "Glutathione Synthetase Deficiency",
                "Hartnup Disease",
                "Hartnup Disorder",
                "Hartnup Syndrome",
                "Hyperammonemia",
                "Hyperammonemia Type I",
                "Hyperammonemia Type II",
                "Hyperammonemia Type III"
            ],
            "TP": 24,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Tyrosinemia",
            "Homocystinuria",
            "Argininosuccinic aciduria (ASA)",
            "Citrullinemia",
            "Argininemia",
            "Hyperargininemia",
            "Hypermethioninemia",
            "Methionine adenosyltransferase deficiency",
            "S-adenosylhomocysteine hydrolase deficiency",
            "Propionic acidemia",
            "Methylmalonic acidemia",
            "Isobutyryl-CoA dehydrogenase deficiency",
            "2-Methylbutyryl-CoA dehydrogenase deficiency",
            "Isovaleric acidemia",
            "3-Methylcrotonyl-CoA carboxylase deficiency",
            "Methylmalonyl-CoA mutase deficiency",
            "Cobalamin C, D, F, and J disease",
            "Glutaric acidemia type I",
            "Glutaric acidemia type II",
            "2-Aminoadipic semialdehyde synthase deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Tyrosinemia",
                "Homocystinuria",
                "Argininosuccinic aciduria (ASA)",
                "Citrullinemia",
                "Argininemia",
                "Hyperargininemia",
                "Hypermethioninemia",
                "Methionine adenosyltransferase deficiency",
                "S-adenosylhomocysteine hydrolase deficiency",
                "Propionic acidemia",
                "Methylmalonic acidemia",
                "Isobutyryl-CoA dehydrogenase deficiency",
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "Isovaleric acidemia",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Methylmalonyl-CoA mutase deficiency",
                "Cobalamin C, D, F, and J disease",
                "Glutaric acidemia type I",
                "Glutaric acidemia type II",
                "2-Aminoadipic semialdehyde synthase deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Aminoadipic semialdehyde synthase deficiency",
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Argininemia",
                "Argininosuccinic aciduria (ASA)",
                "Citrullinemia",
                "Cobalamin C, D, F, and J disease",
                "Glutaric acidemia type I",
                "Glutaric acidemia type II",
                "Homocystinuria",
                "Hyperargininemia",
                "Hypermethioninemia",
                "Isobutyryl-CoA dehydrogenase deficiency",
                "Isovaleric acidemia",
                "Maple syrup urine disease (MSUD)",
                "Methionine adenosyltransferase deficiency",
                "Methylmalonic acidemia",
                "Methylmalonyl-CoA mutase deficiency",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "S-adenosylhomocysteine hydrolase deficiency",
                "Tyrosinemia"
            ],
            "TP": 22,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": "{\n    \"referent\": \"Albinism\"\n},\n{\n    \"referent\": \"Argininosuccinate lyase deficiency\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type II\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type III\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type IV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type V\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type VI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type VII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type VIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type IX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type X\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XIV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XVI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XVII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XVIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XIX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXIV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXVI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXVII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXVIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXIX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXIV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXV\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXVI\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXVII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXVIII\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XXXIX\"\n},\n{\n    \"referent\": \"Argininosuccinic aciduria type XL\"\n}",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]