[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Methylmalonic acidemia",
            "Propionic acidemia",
            "Isovaleric acidemia",
            "Glutaric acidemia type I",
            "Citrullinemia",
            "Argininosuccinic aciduria",
            "Hyperammonemia",
            "Urea cycle disorders",
            "Cystinuria",
            "Alkaptonuria",
            "Histidinemia",
            "Hypermethioninemia",
            "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
            "3-Methylcrotonyl-CoA carboxylase deficiency",
            "2-Methylbutyryl-CoA dehydrogenase deficiency",
            "Beta-ketothiolase deficiency",
            "3-Hydroxyisobutyric aciduria",
            "4-Hydroxybutyric aciduria",
            "3-Methylglutaconic aciduria"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Methylmalonic acidemia",
                "Propionic acidemia",
                "Isovaleric acidemia",
                "Glutaric acidemia type I",
                "Citrullinemia",
                "Argininosuccinic aciduria",
                "Hyperammonemia",
                "Urea cycle disorders",
                "Cystinuria",
                "Alkaptonuria",
                "Histidinemia",
                "Hypermethioninemia",
                "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "Beta-ketothiolase deficiency",
                "3-Hydroxyisobutyric aciduria",
                "4-Hydroxybutyric aciduria",
                "3-Methylglutaconic aciduria"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Methylbutyryl-CoA dehydrogenase deficiency",
                "3-Hydroxyisobutyric aciduria",
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "3-Methylglutaconic aciduria",
                "4-Hydroxybutyric aciduria",
                "Alkaptonuria",
                "Argininosuccinic aciduria",
                "Beta-ketothiolase deficiency",
                "Citrullinemia",
                "Cystinuria",
                "Glutaric acidemia type I",
                "Histidinemia",
                "Homocystinuria",
                "Hyperammonemia",
                "Hypermethioninemia",
                "Isovaleric acidemia",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Succinyl-CoA:3-oxoacid CoA transferase deficiency",
                "Tyrosinemia",
                "Urea cycle disorders"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Methionine Adenosyltransferase Deficiency",
            "Histidinemia",
            "Argininosuccinic Aciduria",
            "Citrullinemia",
            "Ornithine Transcarbamylase Deficiency",
            "Nonketotic Hyperglycinemia",
            "Glycine Encephalopathy",
            "Propionic Acidemia",
            "Methylmalonic Acidemia",
            "Methylmalonic Acidemia with Homocystinuria",
            "Glutaric Acidemia Type I",
            "Glutaric Acidemia Type II",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency",
            "3-Hydroxyisobutyric Aciduria",
            "Beta-Ketothiolase Deficiency",
            "Arginase Deficiency",
            "Cystinuria",
            "Cystinosis",
            "Glycine Decarboxylase Deficiency",
            "Branched-Chain Ketoacid Dehydrogenase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Methionine Adenosyltransferase Deficiency",
                "Histidinemia",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Ornithine Transcarbamylase Deficiency",
                "Nonketotic Hyperglycinemia",
                "Glycine Encephalopathy",
                "Propionic Acidemia",
                "Methylmalonic Acidemia",
                "Methylmalonic Acidemia with Homocystinuria",
                "Glutaric Acidemia Type I",
                "Glutaric Acidemia Type II",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "3-Hydroxyisobutyric Aciduria",
                "Beta-Ketothiolase Deficiency",
                "Arginase Deficiency",
                "Cystinuria",
                "Cystinosis",
                "Glycine Decarboxylase Deficiency",
                "Branched-Chain Ketoacid Dehydrogenase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Hydroxyisobutyric Aciduria",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Arginase Deficiency",
                "Argininosuccinic Aciduria",
                "Beta-Ketothiolase Deficiency",
                "Branched-Chain Ketoacid Dehydrogenase Deficiency",
                "Citrullinemia",
                "Cystinosis",
                "Cystinuria",
                "Glutaric Acidemia Type I",
                "Glutaric Acidemia Type II",
                "Glycine Decarboxylase Deficiency",
                "Glycine Encephalopathy",
                "Histidinemia",
                "Homocystinuria",
                "Maple Syrup Urine Disease (MSUD)",
                "Methionine Adenosyltransferase Deficiency",
                "Methylmalonic Acidemia",
                "Methylmalonic Acidemia with Homocystinuria",
                "Nonketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Alkaptonuria (AKU)",
            "Argininosuccinic Aciduria (ASA)",
            "Argininemia",
            "Citrullinemia",
            "Cystathioninuria",
            "Cystinosis",
            "Cystinuria",
            "Glutaric Aciduria Type I (GA-I)",
            "Homocystinuria",
            "Hypophosphatasia",
            "Isovaleric Acidemia (IVA)",
            "Maple Syrup Urine Disease (MSUD)",
            "Methionine Adenosyltransferase I/III Deficiency (MAT I/III)",
            "Methylmalonic Acidemia (MMA)",
            "Nonketotic Hyperglycinemia (NKH)",
            "Ornithine Transcarbamylase Deficiency (OTC)",
            "Phenylketonuria (PKU)",
            "Propionic Acidemia (PA)",
            "Tyrosinemia Type I (TYR I)",
            "Tyrosinemia Type II (TYR II)",
            "Tyrosinemia Type III (TYR III)",
            "Urea Cycle Disorders",
            "Xanthinuria",
            "Hartnup Disease"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alkaptonuria (AKU)",
                "Argininosuccinic Aciduria (ASA)",
                "Argininemia",
                "Citrullinemia",
                "Cystathioninuria",
                "Cystinosis",
                "Cystinuria",
                "Glutaric Aciduria Type I (GA-I)",
                "Homocystinuria",
                "Hypophosphatasia",
                "Isovaleric Acidemia (IVA)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methionine Adenosyltransferase I/III Deficiency (MAT I/III)",
                "Methylmalonic Acidemia (MMA)",
                "Nonketotic Hyperglycinemia (NKH)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia (PA)",
                "Tyrosinemia Type I (TYR I)",
                "Tyrosinemia Type II (TYR II)",
                "Tyrosinemia Type III (TYR III)",
                "Urea Cycle Disorders",
                "Xanthinuria",
                "Hartnup Disease"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria (AKU)",
                "Argininemia",
                "Argininosuccinic Aciduria (ASA)",
                "Citrullinemia",
                "Cystathioninuria",
                "Cystinosis",
                "Cystinuria",
                "Glutaric Aciduria Type I (GA-I)",
                "Hartnup Disease",
                "Homocystinuria",
                "Hypophosphatasia",
                "Isovaleric Acidemia (IVA)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methionine Adenosyltransferase I/III Deficiency (MAT I/III)",
                "Methylmalonic Acidemia (MMA)",
                "Nonketotic Hyperglycinemia (NKH)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia (PA)",
                "Tyrosinemia Type I (TYR I)",
                "Tyrosinemia Type II (TYR II)",
                "Tyrosinemia Type III (TYR III)",
                "Urea Cycle Disorders",
                "Xanthinuria"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Galactosemia (GAL)",
            "Homocystinuria (HCU)",
            "Tyrosinemia Type I (TYR-I)",
            "Tyrosinemia Type II (TYR-II)",
            "Tyrosinemia Type III (TYR-III)",
            "Cystathioninuria (CSU)",
            "Argininemia (ARG)",
            "Ornithine Transcarbamylase Deficiency (OTCD)",
            "Citrullinemia Type I (CIT-I)",
            "Citrullinemia Type II (CIT-II)",
            "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
            "Propionic Acidemia (PA)",
            "Methylmalonic Acidemia (MMA)",
            "Isovaleric Acidemia (IVA)",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCCD)",
            "Multiple Carboxylase Deficiency (MCD)",
            "Glutaric Acidemia Type I (GA-I)",
            "Glutaric Acidemia Type II (GA-II)",
            "Hyperlysinemia (HLY)",
            "Hyperargininemia (HAR)",
            "Non-ketotic Hyperglycinemia (NKH)",
            "Riboflavin-responsive Multiple Carboxylase Deficiency (RR-MCD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Galactosemia (GAL)",
                "Homocystinuria (HCU)",
                "Tyrosinemia Type I (TYR-I)",
                "Tyrosinemia Type II (TYR-II)",
                "Tyrosinemia Type III (TYR-III)",
                "Cystathioninuria (CSU)",
                "Argininemia (ARG)",
                "Ornithine Transcarbamylase Deficiency (OTCD)",
                "Citrullinemia Type I (CIT-I)",
                "Citrullinemia Type II (CIT-II)",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
                "Propionic Acidemia (PA)",
                "Methylmalonic Acidemia (MMA)",
                "Isovaleric Acidemia (IVA)",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCCD)",
                "Multiple Carboxylase Deficiency (MCD)",
                "Glutaric Acidemia Type I (GA-I)",
                "Glutaric Acidemia Type II (GA-II)",
                "Hyperlysinemia (HLY)",
                "Hyperargininemia (HAR)",
                "Non-ketotic Hyperglycinemia (NKH)",
                "Riboflavin-responsive Multiple Carboxylase Deficiency (RR-MCD)"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA Carboxylase Deficiency (3-MCCD)",
                "Argininemia (ARG)",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
                "Citrullinemia Type I (CIT-I)",
                "Citrullinemia Type II (CIT-II)",
                "Cystathioninuria (CSU)",
                "Galactosemia (GAL)",
                "Glutaric Acidemia Type I (GA-I)",
                "Glutaric Acidemia Type II (GA-II)",
                "Homocystinuria (HCU)",
                "Hyperargininemia (HAR)",
                "Hyperlysinemia (HLY)",
                "Isovaleric Acidemia (IVA)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia (MMA)",
                "Multiple Carboxylase Deficiency (MCD)",
                "Non-ketotic Hyperglycinemia (NKH)",
                "Ornithine Transcarbamylase Deficiency (OTCD)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia (PA)",
                "Riboflavin-responsive Multiple Carboxylase Deficiency (RR-MCD)",
                "Tyrosinemia Type I (TYR-I)",
                "Tyrosinemia Type II (TYR-II)",
                "Tyrosinemia Type III (TYR-III)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Alkaptonuria",
            "Argininosuccinic Aciduria",
            "Citrullinemia Type I",
            "Citrullinemia Type II",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
            "Nonketotic Hyperglycinemia",
            "Glycine Encephalopathy",
            "Histidinemia",
            "Isovaleric Acidemia",
            "Methylmalonic Acidemia",
            "Propionic Acidemia",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency",
            "Ornithine Transcarbamylase Deficiency (OTC)",
            "Carbamoyl Phosphate Synthetase I Deficiency",
            "Argininemia",
            "Hypermethioninemia",
            "Cystathioninuria",
            "Primary Hyperoxaluria"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Alkaptonuria",
                "Argininosuccinic Aciduria",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Nonketotic Hyperglycinemia",
                "Glycine Encephalopathy",
                "Histidinemia",
                "Isovaleric Acidemia",
                "Methylmalonic Acidemia",
                "Propionic Acidemia",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Carbamoyl Phosphate Synthetase I Deficiency",
                "Argininemia",
                "Hypermethioninemia",
                "Cystathioninuria",
                "Primary Hyperoxaluria"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Alkaptonuria",
                "Argininemia",
                "Argininosuccinic Aciduria",
                "Carbamoyl Phosphate Synthetase I Deficiency",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Cystathioninuria",
                "Glycine Encephalopathy",
                "Histidinemia",
                "Homocystinuria",
                "Hypermethioninemia",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH)",
                "Isovaleric Acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Nonketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Primary Hyperoxaluria",
                "Propionic Acidemia",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Homocystinuria",
            "Citrullinemia",
            "Argininosuccinic Aciduria",
            "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
            "Urea Cycle Disorders",
            "Glutaric Acidemia Type I",
            "Isovaleric Acidemia",
            "Methylmalonic Acidemia (MMA)",
            "Propionic Acidemia",
            "Acyl-CoA Dehydrogenase Deficiency",
            "Carnitine Palmitoyltransferase I Deficiency (CPT I Deficiency)",
            "Carnitine Palmitoyltransferase II Deficiency (CPT II Deficiency)",
            "Short-Chain Acyl-CoA Dehydrogenase Deficiency (SCAD Deficiency)",
            "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD Deficiency)",
            "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD Deficiency)",
            "Beta-Ketothiolase Deficiency",
            "3-Hydroxy-3-Methylglutaryl-CoA Lyase Deficiency",
            "Glutamate Dehydrogenase Deficiency",
            "Arginase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Homocystinuria",
                "Citrullinemia",
                "Argininosuccinic Aciduria",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Urea Cycle Disorders",
                "Glutaric Acidemia Type I",
                "Isovaleric Acidemia",
                "Methylmalonic Acidemia (MMA)",
                "Propionic Acidemia",
                "Acyl-CoA Dehydrogenase Deficiency",
                "Carnitine Palmitoyltransferase I Deficiency (CPT I Deficiency)",
                "Carnitine Palmitoyltransferase II Deficiency (CPT II Deficiency)",
                "Short-Chain Acyl-CoA Dehydrogenase Deficiency (SCAD Deficiency)",
                "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD Deficiency)",
                "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD Deficiency)",
                "Beta-Ketothiolase Deficiency",
                "3-Hydroxy-3-Methylglutaryl-CoA Lyase Deficiency",
                "Glutamate Dehydrogenase Deficiency",
                "Arginase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Hydroxy-3-Methylglutaryl-CoA Lyase Deficiency",
                "Acyl-CoA Dehydrogenase Deficiency",
                "Arginase Deficiency",
                "Argininosuccinic Aciduria",
                "Beta-Ketothiolase Deficiency",
                "Carnitine Palmitoyltransferase I Deficiency (CPT I Deficiency)",
                "Carnitine Palmitoyltransferase II Deficiency (CPT II Deficiency)",
                "Citrullinemia",
                "Glutamate Dehydrogenase Deficiency",
                "Glutaric Acidemia Type I",
                "Homocystinuria",
                "Isovaleric Acidemia",
                "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD Deficiency)",
                "Maple Syrup Urine Disease (MSUD)",
                "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD Deficiency)",
                "Methylmalonic Acidemia (MMA)",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Short-Chain Acyl-CoA Dehydrogenase Deficiency (SCAD Deficiency)",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Urea Cycle Disorders"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Homocystinuria",
            "Maple Syrup Urine Disease (MSUD)",
            "Tyrosinemia",
            "Alkaptonuria",
            "Argininosuccinic Aciduria",
            "Citrullinemia",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH Syndrome)",
            "Isovaleric Acidemia",
            "Methylmalonic Acidemia",
            "Propionic Acidemia",
            "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
            "Albinism",
            "Cystinuria",
            "Glutaric Acidemia Type 1",
            "Hyperphenylalaninemia",
            "Oculocutaneous Albinism Type 1 (OCA1)",
            "Oculocutaneous Albinism Type 2 (OCA2)",
            "Oculocutaneous Albinism Type 3 (OCA3)",
            "Oculocutaneous Albinism Type 4 (OCA4)",
            "Hartnup Disease",
            "Cystinosis",
            "Nonketotic Hyperglycinemia (NKH)",
            "Tyrosinemia Type II"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Homocystinuria",
                "Maple Syrup Urine Disease (MSUD)",
                "Tyrosinemia",
                "Alkaptonuria",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH Syndrome)",
                "Isovaleric Acidemia",
                "Methylmalonic Acidemia",
                "Propionic Acidemia",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Albinism",
                "Cystinuria",
                "Glutaric Acidemia Type 1",
                "Hyperphenylalaninemia",
                "Oculocutaneous Albinism Type 1 (OCA1)",
                "Oculocutaneous Albinism Type 2 (OCA2)",
                "Oculocutaneous Albinism Type 3 (OCA3)",
                "Oculocutaneous Albinism Type 4 (OCA4)",
                "Hartnup Disease",
                "Cystinosis",
                "Nonketotic Hyperglycinemia (NKH)",
                "Tyrosinemia Type II"
            ],
            "mismatches": [],
            "true_referents": [
                "Albinism",
                "Alkaptonuria",
                "Argininosuccinic Aciduria",
                "Citrullinemia",
                "Cystinosis",
                "Cystinuria",
                "Glutaric Acidemia Type 1",
                "Hartnup Disease",
                "Homocystinuria",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria Syndrome (HHH Syndrome)",
                "Hyperphenylalaninemia",
                "Isovaleric Acidemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Nonketotic Hyperglycinemia (NKH)",
                "Oculocutaneous Albinism Type 1 (OCA1)",
                "Oculocutaneous Albinism Type 2 (OCA2)",
                "Oculocutaneous Albinism Type 3 (OCA3)",
                "Oculocutaneous Albinism Type 4 (OCA4)",
                "Ornithine Transcarbamylase Deficiency (OTC Deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia",
                "Tyrosinemia Type II"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Alkaptonuria",
            "Citrullinemia",
            "Argininosuccinic aciduria",
            "Cystinuria",
            "Hartnup disease",
            "Histidinemia",
            "Isovaleric acidemia",
            "Methylmalonic acidemia",
            "Propionic acidemia",
            "Cystathioninuria",
            "Glutaric aciduria",
            "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
            "Nonketotic hyperglycinemia",
            "Ornithine transcarbamylase deficiency (OTC deficiency)",
            "Urea cycle disorders",
            "Glycine encephalopathy",
            "Phosphoserine phosphatase deficiency",
            "Sarcosinemia",
            "Serine deficiency",
            "Tryptophanemia",
            "Tyrosinemia type I"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Alkaptonuria",
                "Citrullinemia",
                "Argininosuccinic aciduria",
                "Cystinuria",
                "Hartnup disease",
                "Histidinemia",
                "Isovaleric acidemia",
                "Methylmalonic acidemia",
                "Propionic acidemia",
                "Cystathioninuria",
                "Glutaric aciduria",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Nonketotic hyperglycinemia",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Urea cycle disorders",
                "Glycine encephalopathy",
                "Phosphoserine phosphatase deficiency",
                "Sarcosinemia",
                "Serine deficiency",
                "Tryptophanemia"
            ],
            "mismatches": [
                "Tyrosinemia type I"
            ],
            "true_referents": [
                "Alkaptonuria",
                "Argininosuccinic aciduria",
                "Citrullinemia",
                "Cystathioninuria",
                "Cystinuria",
                "Glutaric aciduria",
                "Glycine encephalopathy",
                "Hartnup disease",
                "Histidinemia",
                "Homocystinuria",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Isovaleric acidemia",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia",
                "Nonketotic hyperglycinemia",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Phenylketonuria (PKU)",
                "Phosphoserine phosphatase deficiency",
                "Propionic acidemia",
                "Sarcosinemia",
                "Serine deficiency",
                "Tryptophanemia",
                "Tyrosinemia",
                "Urea cycle disorders"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria (HCU)",
            "Tyrosinemia Type I (TYR I)",
            "Tyrosinemia Type II (TYR II)",
            "Tyrosinemia Type III (TYR III)",
            "Alkaptonuria (AKU)",
            "Adenylosuccinate Lyase Deficiency",
            "Cystinuria",
            "Hyperhomocysteinemia",
            "Hypermethioninemia",
            "Hyperprolinemia",
            "Hyperornithinemia",
            "Lysinuric Protein Intolerance (LPI)",
            "Isovaleric Acidemia",
            "Propionic Acidemia",
            "Methylmalonic Acidemia",
            "Cystathionine Beta-Synthase Deficiency",
            "Serine Deficiency Disorders",
            "Argininosuccinic Aciduria",
            "Ornithine Transcarbamylase Deficiency",
            "Citrullinemia",
            "Nonketotic Hyperglycinemia",
            "Aspartate Aminotransferase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria (HCU)",
                "Tyrosinemia Type I (TYR I)",
                "Tyrosinemia Type II (TYR II)",
                "Tyrosinemia Type III (TYR III)",
                "Alkaptonuria (AKU)",
                "Adenylosuccinate Lyase Deficiency",
                "Cystinuria",
                "Hyperhomocysteinemia",
                "Hypermethioninemia",
                "Hyperprolinemia",
                "Hyperornithinemia",
                "Lysinuric Protein Intolerance (LPI)",
                "Isovaleric Acidemia",
                "Propionic Acidemia",
                "Methylmalonic Acidemia",
                "Cystathionine Beta-Synthase Deficiency",
                "Serine Deficiency Disorders",
                "Argininosuccinic Aciduria",
                "Ornithine Transcarbamylase Deficiency",
                "Citrullinemia",
                "Nonketotic Hyperglycinemia",
                "Aspartate Aminotransferase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "Adenylosuccinate Lyase Deficiency",
                "Alkaptonuria (AKU)",
                "Argininosuccinic Aciduria",
                "Aspartate Aminotransferase Deficiency",
                "Citrullinemia",
                "Cystathionine Beta-Synthase Deficiency",
                "Cystinuria",
                "Homocystinuria (HCU)",
                "Hyperhomocysteinemia",
                "Hypermethioninemia",
                "Hyperornithinemia",
                "Hyperprolinemia",
                "Isovaleric Acidemia",
                "Lysinuric Protein Intolerance (LPI)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Nonketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Serine Deficiency Disorders",
                "Tyrosinemia Type I (TYR I)",
                "Tyrosinemia Type II (TYR II)",
                "Tyrosinemia Type III (TYR III)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Argininosuccinic aciduria",
            "Citrullinemia",
            "Ornithine transcarbamylase deficiency (OTC deficiency)",
            "Carbamoyl phosphate synthetase I deficiency (CPS I deficiency)",
            "Arginase deficiency",
            "Isovaleric acidemia",
            "Propionic acidemia",
            "Methylmalonic acidemia",
            "Glutaric acidemia type I",
            "Nonketotic hyperglycinemia (NKH)",
            "Alkaptonuria",
            "Histidinemia",
            "Hyperlysinemia",
            "Hyperprolinemia",
            "Sarcosinemia",
            "Hartnup disease",
            "Cystinuria",
            "Lysinuric protein intolerance (LPI)",
            "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
            "3-Methylcrotonyl-CoA carboxylase deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Argininosuccinic aciduria",
                "Citrullinemia",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Carbamoyl phosphate synthetase I deficiency (CPS I deficiency)",
                "Arginase deficiency",
                "Isovaleric acidemia",
                "Propionic acidemia",
                "Methylmalonic acidemia",
                "Glutaric acidemia type I",
                "Nonketotic hyperglycinemia (NKH)",
                "Alkaptonuria",
                "Histidinemia",
                "Hyperlysinemia",
                "Hyperprolinemia",
                "Sarcosinemia",
                "Hartnup disease",
                "Cystinuria",
                "Lysinuric protein intolerance (LPI)",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "3-Methylcrotonyl-CoA carboxylase deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA carboxylase deficiency",
                "Alkaptonuria",
                "Arginase deficiency",
                "Argininosuccinic aciduria",
                "Carbamoyl phosphate synthetase I deficiency (CPS I deficiency)",
                "Citrullinemia",
                "Cystinuria",
                "Glutaric acidemia type I",
                "Hartnup disease",
                "Histidinemia",
                "Homocystinuria",
                "Hyperlysinemia",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Hyperprolinemia",
                "Isovaleric acidemia",
                "Lysinuric protein intolerance (LPI)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic acidemia",
                "Nonketotic hyperglycinemia (NKH)",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic acidemia",
                "Sarcosinemia",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple syrup urine disease (MSUD)",
            "Tyrosinemia type I",
            "Homocystinuria",
            "Citrullinemia type I (CTLN1)",
            "Argininosuccinic aciduria (ASA)",
            "Methylmalonic acidemia (MMA)",
            "Propionic acidemia (PA)",
            "Isovaleric acidemia (IVA)",
            "Glutaric acidemia type I (GA1)",
            "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
            "3-Hydroxy-3-methylglutaryl-CoA lyase deficiency (HMG)",
            "Biotinidase deficiency",
            "Ornithine transcarbamylase deficiency (OTC deficiency)",
            "Carbamoyl phosphate synthetase I deficiency (CPS1 deficiency)",
            "N-acetylglutamate synthase deficiency (NAGS deficiency)",
            "Argininemia",
            "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
            "Lysinuric protein intolerance (LPI)",
            "Hartnup disease",
            "Cystinuria",
            "Iminoglycinuria",
            "Dicarboxylic aminoaciduria",
            "Hawkinsinuria"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple syrup urine disease (MSUD)",
                "Tyrosinemia type I",
                "Homocystinuria",
                "Citrullinemia type I (CTLN1)",
                "Argininosuccinic aciduria (ASA)",
                "Methylmalonic acidemia (MMA)",
                "Propionic acidemia (PA)",
                "Isovaleric acidemia (IVA)",
                "Glutaric acidemia type I (GA1)",
                "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
                "3-Hydroxy-3-methylglutaryl-CoA lyase deficiency (HMG)",
                "Biotinidase deficiency",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Carbamoyl phosphate synthetase I deficiency (CPS1 deficiency)",
                "N-acetylglutamate synthase deficiency (NAGS deficiency)",
                "Argininemia",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Lysinuric protein intolerance (LPI)",
                "Hartnup disease",
                "Cystinuria",
                "Iminoglycinuria",
                "Dicarboxylic aminoaciduria",
                "Hawkinsinuria"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Hydroxy-3-methylglutaryl-CoA lyase deficiency (HMG)",
                "3-Methylcrotonyl-CoA carboxylase deficiency (3-MCC)",
                "Argininemia",
                "Argininosuccinic aciduria (ASA)",
                "Biotinidase deficiency",
                "Carbamoyl phosphate synthetase I deficiency (CPS1 deficiency)",
                "Citrullinemia type I (CTLN1)",
                "Cystinuria",
                "Dicarboxylic aminoaciduria",
                "Glutaric acidemia type I (GA1)",
                "Hartnup disease",
                "Hawkinsinuria",
                "Homocystinuria",
                "Hyperornithinemia-hyperammonemia-homocitrullinuria (HHH) syndrome",
                "Iminoglycinuria",
                "Isovaleric acidemia (IVA)",
                "Lysinuric protein intolerance (LPI)",
                "Maple syrup urine disease (MSUD)",
                "Methylmalonic acidemia (MMA)",
                "N-acetylglutamate synthase deficiency (NAGS deficiency)",
                "Ornithine transcarbamylase deficiency (OTC deficiency)",
                "Phenylketonuria (PKU)",
                "Propionic acidemia (PA)",
                "Tyrosinemia type I"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Alkaptonuria",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Albinism",
            "Cystinuria",
            "Glycine Encephalopathy",
            "Isovaleric Acidemia",
            "Glutaric Aciduria Type I",
            "Methylmalonic Acidemia",
            "Propionic Acidemia",
            "Argininemia",
            "Citrullinemia Type I",
            "Citrullinemia Type II",
            "Lysinuric Protein Intolerance",
            "Non-Ketotic Hyperglycinemia",
            "Ornithine Transcarbamylase Deficiency",
            "Carbamoyl Phosphate Synthetase I Deficiency",
            "Hyperammonemia-Hyperornithinemia-Homocitrullinuria Syndrome",
            "Glutaric Aciduria Type II",
            "3-Methylcrotonyl-CoA Carboxylase Deficiency"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Alkaptonuria",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Albinism",
                "Cystinuria",
                "Glycine Encephalopathy",
                "Isovaleric Acidemia",
                "Glutaric Aciduria Type I",
                "Methylmalonic Acidemia",
                "Propionic Acidemia",
                "Argininemia",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Lysinuric Protein Intolerance",
                "Non-Ketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency",
                "Carbamoyl Phosphate Synthetase I Deficiency",
                "Hyperammonemia-Hyperornithinemia-Homocitrullinuria Syndrome",
                "Glutaric Aciduria Type II",
                "3-Methylcrotonyl-CoA Carboxylase Deficiency"
            ],
            "mismatches": [],
            "true_referents": [
                "3-Methylcrotonyl-CoA Carboxylase Deficiency",
                "Albinism",
                "Alkaptonuria",
                "Argininemia",
                "Carbamoyl Phosphate Synthetase I Deficiency",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Cystinuria",
                "Glutaric Aciduria Type I",
                "Glutaric Aciduria Type II",
                "Glycine Encephalopathy",
                "Homocystinuria",
                "Hyperammonemia-Hyperornithinemia-Homocitrullinuria Syndrome",
                "Isovaleric Acidemia",
                "Lysinuric Protein Intolerance",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia",
                "Non-Ketotic Hyperglycinemia",
                "Ornithine Transcarbamylase Deficiency",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Tyrosinemia",
            "Cystinuria",
            "Argininosuccinic Aciduria (ASA)",
            "Citrullinemia",
            "Hyperammonemia",
            "Glutaric Acidemia Type I (GA-I)",
            "Isovaleric Acidemia (IVA)",
            "Methylmalonic Acidemia (MMA)",
            "Propionic Acidemia (PA)",
            "Ornithine Transcarbamylase Deficiency (OTC)",
            "Nonketotic Hyperglycinemia (NKH)",
            "Biotin-Responsive Basal Ganglia Disease (BBGD)",
            "Histidinemia",
            "Hyperlysinemia",
            "Hyperprolinemia",
            "Hartnup Disease",
            "Lysinuric Protein Intolerance (LPI)",
            "Glycine Encephalopathy",
            "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
            "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
            "Hyperargininemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Tyrosinemia",
                "Cystinuria",
                "Argininosuccinic Aciduria (ASA)",
                "Citrullinemia",
                "Hyperammonemia",
                "Glutaric Acidemia Type I (GA-I)",
                "Isovaleric Acidemia (IVA)",
                "Methylmalonic Acidemia (MMA)",
                "Propionic Acidemia (PA)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Nonketotic Hyperglycinemia (NKH)",
                "Biotin-Responsive Basal Ganglia Disease (BBGD)",
                "Histidinemia",
                "Hyperlysinemia",
                "Hyperprolinemia",
                "Hartnup Disease",
                "Lysinuric Protein Intolerance (LPI)",
                "Glycine Encephalopathy",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
                "Hyperargininemia"
            ],
            "mismatches": [],
            "true_referents": [
                "Argininosuccinic Aciduria (ASA)",
                "Biotin-Responsive Basal Ganglia Disease (BBGD)",
                "Carbamoyl Phosphate Synthetase I Deficiency (CPS-I)",
                "Citrullinemia",
                "Cystinuria",
                "Glutaric Acidemia Type I (GA-I)",
                "Glycine Encephalopathy",
                "Hartnup Disease",
                "Histidinemia",
                "Homocystinuria",
                "Hyperammonemia",
                "Hyperargininemia",
                "Hyperlysinemia",
                "Hyperornithinemia-Hyperammonemia-Homocitrullinuria (HHH) Syndrome",
                "Hyperprolinemia",
                "Isovaleric Acidemia (IVA)",
                "Lysinuric Protein Intolerance (LPI)",
                "Maple Syrup Urine Disease (MSUD)",
                "Methylmalonic Acidemia (MMA)",
                "Nonketotic Hyperglycinemia (NKH)",
                "Ornithine Transcarbamylase Deficiency (OTC)",
                "Phenylketonuria (PKU)",
                "Propionic Acidemia (PA)",
                "Tyrosinemia"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": "[\n\"Tyrosinemia type I (TYR1)\" ,\n\"Phenylketonuria (PKU)\" ,\n\"Citrullinemia type I (CTLN1)\" ,\n\"Argininosuccinic aciduria (ASA)\" ,\n\"Hartnup disorder (HND)\" ,\n\"Methylmalonic acidemia (MMA)\" ,\n\"Isobutylmalonic aciduria (IBMA)\" ,\n\"Glutaric acidemia type I (GA1)\" ,\n\"Glutaryl-CoA dehydrogenase deficiency (GCDH)\" ,\n\"3-Methylglutaconic aciduria type I (MGA1)\" ,\n\"3-Hydroxy-3-methylglutaryl-CoA lyase deficiency (HMGCL)\" ,\n\"Propionic acidemia (PA)\" ,\n\"Methylmalonyl-CoA mutase deficiency (MUT)\" ,\n\"Propionic acidemia with methylcitrate excretion (PAMCE)\" ,\n\"Arginase deficiency (ARG1)\" ,\n\"Argininosuccinate lyase deficiency (ASL)\" ,\n\"Carbamyl phosphate synthetase I deficiency (CPS1)\" ,\n\"Orotiduria (OROT)\" ,\n\"Hyperammonemia due to ornithine transcarbamylase deficiency (OTCD)\" ,\n\"Hyperammonemia due to carbamoyl-phosphate synthetase deficiency (CPS2)\" ,\n\"Hyperammonemia due to ornithine transcarbamylase deficiency (OTCD)\" ,\n\"Hyperammonemia due to argininosuccinate synthetase deficiency (ASS)\" ,\n\"Hyperammonemia due to argininosuccinate lyase deficiency (ASL)\" ,\n\"Hyperammonemia due to carbamoyl-phosphate synthetase I deficiency (CPS1)\" ,\n\"Hyperammonemia due to ornithine translocase deficiency (OTL)\" ,\n\"Hyperammonemia due to glutamine synthetase deficiency (GS)\" ,\n\"Hyperammonemia due to glutamate dehydrogenase deficiency (GDH)\" ,\n\"Hyperammonemia due to aspartate aminotransferase deficiency (AAT)\" ,\n\"Hyperammonemia due to aspartate dehydrogenase deficiency (ADH)\" ,\n\"Hyperammonemia due to aspartate transaminase deficiency (AST)\" ,\n\"Hyperammonemia due to aspartate aminotransferase deficiency (AAT)\" ,\n\"Hyperammonemia due to aspartate dehydrogenase deficiency (ADH)\" ,\n\"Hyperammonemia due to aspartate transaminase deficiency (AST)\"",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            "Phenylketonuria (PKU)",
            "Maple Syrup Urine Disease (MSUD)",
            "Tyrosinemia Type I",
            "Tyrosinemia Type II",
            "Tyrosinemia Type III",
            "Alkaptonuria (AKU)",
            "Homocystinuria (HCY)",
            "Citrullinemia Type I",
            "Citrullinemia Type II",
            "Argininosuccinic Aciduria (ASA)",
            "Argininemia",
            "Hyperargininemia",
            "Hypermethioninemia",
            "Methionine Adenosyltransferase Deficiency",
            "S-Adenosylhomocysteine Hydrolase Deficiency",
            "Glutamic Acidemia Type I",
            "Glutamic Acidemia Type II",
            "2-Aminoadipic Semialdehyde Synthase Deficiency",
            "Prolidase Deficiency",
            "Hyperprolinemia Type I",
            "Hyperprolinemia Type II",
            "Aspartylglycosaminuria (AGU)",
            "Glutaric Acidemia Type I",
            "Glutaric Acidemia Type II"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Phenylketonuria (PKU)",
                "Maple Syrup Urine Disease (MSUD)",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III",
                "Alkaptonuria (AKU)",
                "Homocystinuria (HCY)",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Argininosuccinic Aciduria (ASA)",
                "Argininemia",
                "Hyperargininemia",
                "Hypermethioninemia",
                "Methionine Adenosyltransferase Deficiency",
                "S-Adenosylhomocysteine Hydrolase Deficiency",
                "Glutamic Acidemia Type I",
                "Glutamic Acidemia Type II",
                "2-Aminoadipic Semialdehyde Synthase Deficiency",
                "Prolidase Deficiency",
                "Hyperprolinemia Type I",
                "Hyperprolinemia Type II",
                "Aspartylglycosaminuria (AGU)",
                "Glutaric Acidemia Type I",
                "Glutaric Acidemia Type II"
            ],
            "mismatches": [],
            "true_referents": [
                "2-Aminoadipic Semialdehyde Synthase Deficiency",
                "Alkaptonuria (AKU)",
                "Argininemia",
                "Argininosuccinic Aciduria (ASA)",
                "Aspartylglycosaminuria (AGU)",
                "Citrullinemia Type I",
                "Citrullinemia Type II",
                "Glutamic Acidemia Type I",
                "Glutamic Acidemia Type II",
                "Glutaric Acidemia Type I",
                "Glutaric Acidemia Type II",
                "Homocystinuria (HCY)",
                "Hyperargininemia",
                "Hypermethioninemia",
                "Hyperprolinemia Type I",
                "Hyperprolinemia Type II",
                "Maple Syrup Urine Disease (MSUD)",
                "Methionine Adenosyltransferase Deficiency",
                "Phenylketonuria (PKU)",
                "Prolidase Deficiency",
                "S-Adenosylhomocysteine Hydrolase Deficiency",
                "Tyrosinemia Type I",
                "Tyrosinemia Type II",
                "Tyrosinemia Type III"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "congenital amino acid metabolism disorder",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Maple syrup urine disease",
                "Canonical name": "MSUD"
            },
            {
                "Referent": "Isovaleric acidemia",
                "Canonical name": "IVA"
            },
            {
                "Referent": "Propionic acidemia",
                "Canonical name": "PA"
            },
            {
                "Referent": "Methylmalonic acidemia",
                "Canonical name": "MMA"
            },
            {
                "Referent": "Homocystinuria",
                "Canonical name": "HCS"
            },
            {
                "Referent": "Glutaric aciduria type 1",
                "Canonical name": "GCD1"
            },
            {
                "Referent": "Glutaric aciduria type 2",
                "Canonical name": "GCD2"
            },
            {
                "Referent": "Maple syrup urine disease",
                "Canonical name": "MSUD"
            },
            {
                "Referent": "Isovaleric acidemia",
                "Canonical name": "IVA"
            },
            {
                "Referent": "Propionic acidemia",
                "Canonical name": "PA"
            },
            {
                "Referent": "Methylmalonic acidemia",
                "Canonical name": "MMA"
            },
            {
                "Referent": "Homocystinuria",
                "Canonical name": "HCS"
            },
            {
                "Referent": "Glutaric aciduria type 1",
                "Canonical name": "GCD1"
            },
            {
                "Referent": "Glutaric aciduria type 2",
                "Canonical name": "GCD2"
            },
            {
                "Referent": "Maple syrup urine disease",
                "Canonical name": "MSUD"
            },
            {
                "Referent": "Isovaleric acidemia",
                "Canonical name": "IVA"
            },
            {
                "Referent": "Propionic acidemia",
                "Canonical name": "PA"
            },
            {
                "Referent": "Methylmalonic acidemia",
                "Canonical name": "MMA"
            },
            {
                "Referent": "Homocystinuria",
                "Canonical name": "HCS"
            },
            {
                "Referent": "Glutaric aciduria type 1",
                "Canonical name": "GCD1"
            },
            {
                "Referent": "Glutaric aciduria type 2",
                "Canonical name": "GCD2"
            },
            {
                "Referent": "Maple syrup urine disease",
                "Canonical name": "MSUD"
            },
            {
                "Referent": "Isovaleric acidemia",
                "Canonical name": "IVA"
            },
            {
                "Referent": "Propionic acidemia",
                "Canonical name": "PA"
            },
            {
                "Referent": "Methylmalonic acidemia",
                "Canonical name": "MMA"
            },
            {
                "Referent": "Homocystinuria",
                "Canonical name": "HCS"
            },
            {
                "Referent": "Glutaric aciduria type 1",
                "Canonical name": "GCD1"
            },
            {
                "Referent": "Glutaric aciduria type 2",
                "Canonical name": "GCD2"
            },
            {
                "Referent": "Maple syrup urine disease",
                "Canonical name": "MSUD"
            },
            {
                "Referent": "Isovaleric acidemia",
                "Canonical name": "IVA"
            },
            {
                "Referent": "Propionic acidemia",
                "Canonical name": "PA"
            },
            {
                "Referent": "Methylmalonic acidemia",
                "Canonical name": "MMA"
            },
            {
                "Referent": "Homocystinuria",
                "Canonical name": "HCS"
            },
            {
                "Referent": "Glutaric aciduria type 1",
                "Canonical name": "GCD1"
            },
            {
                "Referent": "Glutaric aciduria type 2",
                "Canonical name": "GCD2"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Maple syrup urine disease",
                "Isovaleric acidemia",
                "Propionic acidemia",
                "Methylmalonic acidemia",
                "Homocystinuria",
                "Glutaric aciduria type 1",
                "Glutaric aciduria type 2"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"Referent\": \"Glutaric aciduria type 1\", \"Canonical name\": \"GCD1\"}",
                "{\"Referent\": \"Glutaric aciduria type 2\", \"Canonical name\": \"GCD2\"}",
                "{\"Referent\": \"Homocystinuria\", \"Canonical name\": \"HCS\"}",
                "{\"Referent\": \"Isovaleric acidemia\", \"Canonical name\": \"IVA\"}",
                "{\"Referent\": \"Maple syrup urine disease\", \"Canonical name\": \"MSUD\"}",
                "{\"Referent\": \"Methylmalonic acidemia\", \"Canonical name\": \"MMA\"}",
                "{\"Referent\": \"Propionic acidemia\", \"Canonical name\": \"PA\"}"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 0
        }
    }
]