[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Huntington's disease (HD)",
            "Tay-Sachs disease (TSD)",
            "Cystic fibrosis (CF)",
            "Sickle cell anemia (SCA)",
            "Hemochromatosis (HH)",
            "Duchenne muscular dystrophy (DMD)",
            "Lactose intolerance (LI)",
            "Phenylketonuria (PKU)",
            "Marfan syndrome (MFS)",
            "Hemophilia A (HA)",
            "Tuberous sclerosis (TS)",
            "Gaucher's disease (GD)",
            "Hypertrophic cardiomyopathy (HCM)",
            "Fragile X syndrome (FXS)",
            "Thalassemia (THAL)",
            "Albinism (ALB)",
            "Hurler syndrome (HS)",
            "Lesch-Nyhan syndrome (LNS)",
            "Fabry disease (FD)",
            "Acatalasia (ACAT)",
            "Retinitis pigmentosa (RP)",
            "Ataxia telangiectasia (AT)",
            "Hereditary fructose intolerance (HFI)",
            "Hereditary spherocytosis (HSph)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Huntington's disease (HD)",
                "Tay-Sachs disease (TSD)",
                "Cystic fibrosis (CF)",
                "Sickle cell anemia (SCA)",
                "Hemochromatosis (HH)",
                "Duchenne muscular dystrophy (DMD)",
                "Lactose intolerance (LI)",
                "Phenylketonuria (PKU)",
                "Marfan syndrome (MFS)",
                "Hemophilia A (HA)",
                "Tuberous sclerosis (TS)",
                "Gaucher's disease (GD)",
                "Hypertrophic cardiomyopathy (HCM)",
                "Fragile X syndrome (FXS)",
                "Thalassemia (THAL)",
                "Albinism (ALB)",
                "Hurler syndrome (HS)",
                "Lesch-Nyhan syndrome (LNS)",
                "Fabry disease (FD)",
                "Acatalasia (ACAT)",
                "Retinitis pigmentosa (RP)",
                "Ataxia telangiectasia (AT)",
                "Hereditary fructose intolerance (HFI)",
                "Hereditary spherocytosis (HSph)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acatalasia (ACAT)",
                "Albinism (ALB)",
                "Ataxia telangiectasia (AT)",
                "Cystic fibrosis (CF)",
                "Duchenne muscular dystrophy (DMD)",
                "Fabry disease (FD)",
                "Fragile X syndrome (FXS)",
                "Gaucher's disease (GD)",
                "Hemochromatosis (HH)",
                "Hemophilia A (HA)",
                "Hereditary fructose intolerance (HFI)",
                "Hereditary spherocytosis (HSph)",
                "Huntington's disease (HD)",
                "Hurler syndrome (HS)",
                "Hypertrophic cardiomyopathy (HCM)",
                "Lactose intolerance (LI)",
                "Lesch-Nyhan syndrome (LNS)",
                "Marfan syndrome (MFS)",
                "Phenylketonuria (PKU)",
                "Retinitis pigmentosa (RP)",
                "Sickle cell anemia (SCA)",
                "Tay-Sachs disease (TSD)",
                "Thalassemia (THAL)",
                "Tuberous sclerosis (TS)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Hemochromatosis (HFE)",
            "Huntington's Disease (HD)",
            "Familial Hypercholesterolemia (FH)",
            "Neurofibromatosis Type 1 (NF1)",
            "Marfan Syndrome (MFS)",
            "Tay-Sachs Disease (TSD)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Familial Mediterranean Fever (FMF)",
            "Hereditary Hemochromatosis (HFE)",
            "Tuberous Sclerosis Complex (TSC)",
            "Familial Adenomatous Polyposis (FAP)",
            "Polycystic Kidney Disease (PKD)",
            "Wilson's Disease (WD)",
            "Alpha-1 Antitrypsin Deficiency (AATD)",
            "Phenylketonuria (PKU)",
            "Cystinuria",
            "G6PD Deficiency",
            "Osteogenesis Imperfecta (OI)",
            "Hereditary Angioedema (HAE)",
            "Dent's Disease",
            "Fabry Disease",
            "Oculocutaneous Albinism (OCA)",
            "Brugada Syndrome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Hemochromatosis (HFE)",
                "Huntington's Disease (HD)",
                "Familial Hypercholesterolemia (FH)",
                "Neurofibromatosis Type 1 (NF1)",
                "Marfan Syndrome (MFS)",
                "Tay-Sachs Disease (TSD)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Familial Mediterranean Fever (FMF)",
                "Hereditary Hemochromatosis (HFE)",
                "Tuberous Sclerosis Complex (TSC)",
                "Familial Adenomatous Polyposis (FAP)",
                "Polycystic Kidney Disease (PKD)",
                "Wilson's Disease (WD)",
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Phenylketonuria (PKU)",
                "Cystinuria",
                "G6PD Deficiency",
                "Osteogenesis Imperfecta (OI)",
                "Hereditary Angioedema (HAE)",
                "Dent's Disease",
                "Fabry Disease",
                "Oculocutaneous Albinism (OCA)"
            ],
            "mismatches": [
                "Brugada Syndrome"
            ],
            "true_referents": [
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Cystic Fibrosis (CF)",
                "Cystinuria",
                "Dent's Disease",
                "Duchenne Muscular Dystrophy (DMD)",
                "Fabry Disease",
                "Familial Adenomatous Polyposis (FAP)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "G6PD Deficiency",
                "Hemochromatosis (HFE)",
                "Hereditary Angioedema (HAE)",
                "Hereditary Hemochromatosis (HFE)",
                "Huntington's Disease (HD)",
                "Marfan Syndrome (MFS)",
                "Neurofibromatosis Type 1 (NF1)",
                "Oculocutaneous Albinism (OCA)",
                "Osteogenesis Imperfecta (OI)",
                "Phenylketonuria (PKU)",
                "Polycystic Kidney Disease (PKD)",
                "Sickle Cell Disease (SCD)",
                "Tay-Sachs Disease (TSD)",
                "Tuberous Sclerosis Complex (TSC)",
                "Wilson's Disease (WD)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Sickle Cell Anemia (SCA)",
            "Cystic Fibrosis (CF)",
            "Phenylketonuria (PKU)",
            "Huntington's Disease (HD)",
            "Hemochromatosis",
            "Hereditary Spherocytosis (HS)",
            "Alpha-1 Antitrypsin Deficiency (A1AD)",
            "Gaucher Disease",
            "Fabry Disease",
            "Wilson Disease",
            "Porphyria",
            "Familial Hypercholesterolemia (FH)",
            "Ehlers-Danlos Syndrome (EDS)",
            "Marfan Syndrome",
            "Neurofibromatosis Type 1 (NF1)",
            "Tuberous Sclerosis Complex (TSC)",
            "Alzheimer's Disease (Early-Onset Familial)",
            "Hereditary Nonpolyposis Colorectal Cancer (HNPCC)",
            "Retinoblastoma",
            "Von Hippel-Lindau Disease (VHL)",
            "Familial Mediterranean Fever (FMF)",
            "Hereditary Hemochromatosis",
            "Hereditary Angioedema",
            "Hereditary Multiple Exostoses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sickle Cell Anemia (SCA)",
                "Cystic Fibrosis (CF)",
                "Phenylketonuria (PKU)",
                "Huntington's Disease (HD)",
                "Hemochromatosis",
                "Hereditary Spherocytosis (HS)",
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Gaucher Disease",
                "Fabry Disease",
                "Wilson Disease",
                "Porphyria",
                "Familial Hypercholesterolemia (FH)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Tuberous Sclerosis Complex (TSC)",
                "Alzheimer's Disease (Early-Onset Familial)",
                "Hereditary Nonpolyposis Colorectal Cancer (HNPCC)",
                "Retinoblastoma",
                "Von Hippel-Lindau Disease (VHL)",
                "Familial Mediterranean Fever (FMF)",
                "Hereditary Hemochromatosis"
            ],
            "mismatches": [
                "Hereditary Angioedema",
                "Hereditary Multiple Exostoses"
            ],
            "true_referents": [
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Alzheimer's Disease (Early-Onset Familial)",
                "Cystic Fibrosis (CF)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Fabry Disease",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "Gaucher Disease",
                "Hemochromatosis",
                "Hereditary Angioedema",
                "Hereditary Hemochromatosis",
                "Hereditary Multiple Exostoses",
                "Hereditary Nonpolyposis Colorectal Cancer (HNPCC)",
                "Hereditary Spherocytosis (HS)",
                "Huntington's Disease (HD)",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Phenylketonuria (PKU)",
                "Porphyria",
                "Retinoblastoma",
                "Sickle Cell Anemia (SCA)",
                "Tuberous Sclerosis Complex (TSC)",
                "Von Hippel-Lindau Disease (VHL)",
                "Wilson Disease"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Anemia (SCA)",
            "Thalassemia (THAL)",
            "Phenylketonuria (PKU)",
            "Tay-Sachs Disease (TSD)",
            "Spinal Muscular Atrophy (SMA)",
            "Fragile X Syndrome (FXS)",
            "Huntington's Disease (HD)",
            "Hereditary Hemochromatosis (HHC)",
            "Alpha-1 Antitrypsin Deficiency (AATD)",
            "Galactosemia (GAL)",
            "Maple Syrup Urine Disease (MSUD)",
            "Gaucher Disease (GD)",
            "Niemann-Pick Disease (NPD)",
            "Fabry Disease (FD)",
            "Wilson Disease (WD)",
            "Retinitis Pigmentosa (RP)",
            "Familial Hypercholesterolemia (FH)",
            "Ehlers-Danlos Syndrome (EDS) - certain types",
            "Neurofibromatosis Type 1 (NF1)",
            "Marfan Syndrome (MFS)",
            "Bloom Syndrome (BS)",
            "Ataxia-Telangiectasia (AT)",
            "Congenital Adrenal Hyperplasia (CAH)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Anemia (SCA)",
                "Thalassemia (THAL)",
                "Phenylketonuria (PKU)",
                "Tay-Sachs Disease (TSD)",
                "Spinal Muscular Atrophy (SMA)",
                "Fragile X Syndrome (FXS)",
                "Huntington's Disease (HD)",
                "Hereditary Hemochromatosis (HHC)",
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Galactosemia (GAL)",
                "Maple Syrup Urine Disease (MSUD)",
                "Gaucher Disease (GD)",
                "Niemann-Pick Disease (NPD)",
                "Fabry Disease (FD)",
                "Wilson Disease (WD)",
                "Retinitis Pigmentosa (RP)",
                "Familial Hypercholesterolemia (FH)",
                "Ehlers-Danlos Syndrome (EDS) - certain types",
                "Neurofibromatosis Type 1 (NF1)",
                "Marfan Syndrome (MFS)",
                "Bloom Syndrome (BS)",
                "Ataxia-Telangiectasia (AT)",
                "Congenital Adrenal Hyperplasia (CAH)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Ataxia-Telangiectasia (AT)",
                "Bloom Syndrome (BS)",
                "Congenital Adrenal Hyperplasia (CAH)",
                "Cystic Fibrosis (CF)",
                "Ehlers-Danlos Syndrome (EDS) - certain types",
                "Fabry Disease (FD)",
                "Familial Hypercholesterolemia (FH)",
                "Fragile X Syndrome (FXS)",
                "Galactosemia (GAL)",
                "Gaucher Disease (GD)",
                "Hereditary Hemochromatosis (HHC)",
                "Huntington's Disease (HD)",
                "Maple Syrup Urine Disease (MSUD)",
                "Marfan Syndrome (MFS)",
                "Neurofibromatosis Type 1 (NF1)",
                "Niemann-Pick Disease (NPD)",
                "Phenylketonuria (PKU)",
                "Retinitis Pigmentosa (RP)",
                "Sickle Cell Anemia (SCA)",
                "Spinal Muscular Atrophy (SMA)",
                "Tay-Sachs Disease (TSD)",
                "Thalassemia (THAL)",
                "Wilson Disease (WD)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Beta-Thalassemia",
            "Alpha-1 Antitrypsin Deficiency (AATD)",
            "Hemochromatosis",
            "Phenylketonuria (PKU)",
            "Tay-Sachs Disease",
            "Fragile X Syndrome",
            "Spinal Muscular Atrophy (SMA)",
            "Huntington's Disease",
            "Marfan Syndrome",
            "Neurofibromatosis Type 1 (NF1)",
            "Polycystic Kidney Disease (PKD)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Familial Hypercholesterolemia (FH)",
            "Retinitis Pigmentosa",
            "Wilson's Disease",
            "Hereditary Angioedema (HAE)",
            "Alport Syndrome",
            "Congenital Adrenal Hyperplasia (CAH)",
            "Gaucher Disease",
            "Von Willebrand Disease (VWD)",
            "Achondroplasia",
            "Galactosemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Beta-Thalassemia",
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Hemochromatosis",
                "Phenylketonuria (PKU)",
                "Tay-Sachs Disease",
                "Fragile X Syndrome",
                "Spinal Muscular Atrophy (SMA)",
                "Huntington's Disease",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Polycystic Kidney Disease (PKD)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Familial Hypercholesterolemia (FH)",
                "Retinitis Pigmentosa",
                "Wilson's Disease",
                "Hereditary Angioedema (HAE)",
                "Alport Syndrome",
                "Congenital Adrenal Hyperplasia (CAH)",
                "Gaucher Disease",
                "Von Willebrand Disease (VWD)",
                "Achondroplasia",
                "Galactosemia"
            ],
            "mismatches": [],
            "true_referents": [
                "Achondroplasia",
                "Alpha-1 Antitrypsin Deficiency (AATD)",
                "Alport Syndrome",
                "Beta-Thalassemia",
                "Congenital Adrenal Hyperplasia (CAH)",
                "Cystic Fibrosis (CF)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Familial Hypercholesterolemia (FH)",
                "Fragile X Syndrome",
                "Galactosemia",
                "Gaucher Disease",
                "Hemochromatosis",
                "Hereditary Angioedema (HAE)",
                "Huntington's Disease",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Phenylketonuria (PKU)",
                "Polycystic Kidney Disease (PKD)",
                "Retinitis Pigmentosa",
                "Sickle Cell Disease (SCD)",
                "Spinal Muscular Atrophy (SMA)",
                "Tay-Sachs Disease",
                "Von Willebrand Disease (VWD)",
                "Wilson's Disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Phenylketonuria (PKU)",
            "Huntington's Disease (HD)",
            "Hemophilia A (HA)",
            "Tay-Sachs Disease (TSD)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Beta-Thalassemia (\u03b2-thal)",
            "Marfan Syndrome (MFS)",
            "Neurofibromatosis Type 1 (NF1)",
            "Albinism (Oculocutaneous Albinism)",
            "Gaucher Disease (GD)",
            "Spinal Muscular Atrophy (SMA)",
            "Wilson's Disease (WD)",
            "Familial Hypercholesterolemia (FH)",
            "Achondroplasia (ACH)",
            "Myotonic Dystrophy (MD)",
            "Usher Syndrome (USH)",
            "Congenital Adrenal Hyperplasia (CAH)",
            "Osteogenesis Imperfecta (OI)",
            "Cystinuria (CYS)",
            "Long QT Syndrome (LQT)",
            "Hereditary Hemorrhagic Telangiectasia (HHT)",
            "Ehlers-Danlos Syndrome (EDS)",
            "Retinitis Pigmentosa (RP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Phenylketonuria (PKU)",
                "Huntington's Disease (HD)",
                "Hemophilia A (HA)",
                "Tay-Sachs Disease (TSD)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Beta-Thalassemia (\u03b2-thal)",
                "Marfan Syndrome (MFS)",
                "Neurofibromatosis Type 1 (NF1)",
                "Albinism (Oculocutaneous Albinism)",
                "Gaucher Disease (GD)",
                "Spinal Muscular Atrophy (SMA)",
                "Wilson's Disease (WD)",
                "Familial Hypercholesterolemia (FH)",
                "Achondroplasia (ACH)",
                "Myotonic Dystrophy (MD)",
                "Usher Syndrome (USH)",
                "Congenital Adrenal Hyperplasia (CAH)",
                "Osteogenesis Imperfecta (OI)",
                "Cystinuria (CYS)",
                "Long QT Syndrome (LQT)",
                "Hereditary Hemorrhagic Telangiectasia (HHT)",
                "Ehlers-Danlos Syndrome (EDS)"
            ],
            "mismatches": [
                "Retinitis Pigmentosa (RP)"
            ],
            "true_referents": [
                "Achondroplasia (ACH)",
                "Albinism (Oculocutaneous Albinism)",
                "Beta-Thalassemia (\u03b2-thal)",
                "Congenital Adrenal Hyperplasia (CAH)",
                "Cystic Fibrosis (CF)",
                "Cystinuria (CYS)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Familial Hypercholesterolemia (FH)",
                "Gaucher Disease (GD)",
                "Hemophilia A (HA)",
                "Hereditary Hemorrhagic Telangiectasia (HHT)",
                "Huntington's Disease (HD)",
                "Long QT Syndrome (LQT)",
                "Marfan Syndrome (MFS)",
                "Myotonic Dystrophy (MD)",
                "Neurofibromatosis Type 1 (NF1)",
                "Osteogenesis Imperfecta (OI)",
                "Phenylketonuria (PKU)",
                "Sickle Cell Disease (SCD)",
                "Spinal Muscular Atrophy (SMA)",
                "Tay-Sachs Disease (TSD)",
                "Usher Syndrome (USH)",
                "Wilson's Disease (WD)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic fibrosis (CF)",
            "Sickle cell disease (SCD)",
            "Huntington's disease (HD)",
            "Hemochromatosis (HFE)",
            "Phenylketonuria (PKU)",
            "Neurofibromatosis type 1 (NF1)",
            "Marfan syndrome (MFS)",
            "Polycystic kidney disease (PKD)",
            "Hereditary hemochromatosis (HH)",
            "Hereditary spherocytosis (HS)",
            "Alpha-1 antitrypsin deficiency (A1AD)",
            "Hereditary angioedema (HAE)",
            "Wilson's disease (WD)",
            "Galactosemia (GAL)",
            "Maple syrup urine disease (MSUD)",
            "Congenital adrenal hyperplasia (CAH)",
            "Familial hypercholesterolemia (FH)",
            "Tay-Sachs disease (TSD)",
            "Gaucher's disease (GD)",
            "Fragile X syndrome (FXS)",
            "Charcot-Marie-Tooth disease (CMT)",
            "Duchenne muscular dystrophy (DMD)",
            "Retinitis pigmentosa (RP)",
            "Thalassemia (THAL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic fibrosis (CF)",
                "Sickle cell disease (SCD)",
                "Huntington's disease (HD)",
                "Hemochromatosis (HFE)",
                "Phenylketonuria (PKU)",
                "Neurofibromatosis type 1 (NF1)",
                "Marfan syndrome (MFS)",
                "Polycystic kidney disease (PKD)",
                "Hereditary hemochromatosis (HH)",
                "Hereditary spherocytosis (HS)",
                "Alpha-1 antitrypsin deficiency (A1AD)",
                "Hereditary angioedema (HAE)",
                "Wilson's disease (WD)",
                "Galactosemia (GAL)",
                "Maple syrup urine disease (MSUD)",
                "Congenital adrenal hyperplasia (CAH)",
                "Familial hypercholesterolemia (FH)",
                "Tay-Sachs disease (TSD)",
                "Gaucher's disease (GD)",
                "Fragile X syndrome (FXS)",
                "Charcot-Marie-Tooth disease (CMT)",
                "Duchenne muscular dystrophy (DMD)",
                "Retinitis pigmentosa (RP)",
                "Thalassemia (THAL)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-1 antitrypsin deficiency (A1AD)",
                "Charcot-Marie-Tooth disease (CMT)",
                "Congenital adrenal hyperplasia (CAH)",
                "Cystic fibrosis (CF)",
                "Duchenne muscular dystrophy (DMD)",
                "Familial hypercholesterolemia (FH)",
                "Fragile X syndrome (FXS)",
                "Galactosemia (GAL)",
                "Gaucher's disease (GD)",
                "Hemochromatosis (HFE)",
                "Hereditary angioedema (HAE)",
                "Hereditary hemochromatosis (HH)",
                "Hereditary spherocytosis (HS)",
                "Huntington's disease (HD)",
                "Maple syrup urine disease (MSUD)",
                "Marfan syndrome (MFS)",
                "Neurofibromatosis type 1 (NF1)",
                "Phenylketonuria (PKU)",
                "Polycystic kidney disease (PKD)",
                "Retinitis pigmentosa (RP)",
                "Sickle cell disease (SCD)",
                "Tay-Sachs disease (TSD)",
                "Thalassemia (THAL)",
                "Wilson's disease (WD)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic fibrosis (CF)",
            "Sickle cell disease (SCD)",
            "Huntington's disease (HD)",
            "Duchenne muscular dystrophy (DMD)",
            "Fragile X syndrome (FXS)",
            "Spinal muscular atrophy (SMA)",
            "Tay-Sachs disease",
            "Phenylketonuria (PKU)",
            "Hemochromatosis",
            "Polycystic kidney disease (PKD)",
            "Gaucher disease",
            "Wilson disease",
            "Hereditary spherocytosis",
            "Familial hypercholesterolemia (FH)",
            "Marfan syndrome",
            "Neurofibromatosis type 1 (NF1)",
            "Alpha-1 antitrypsin deficiency",
            "Myotonic dystrophy",
            "Familial Mediterranean fever (FMF)",
            "Familial adenomatous polyposis (FAP)",
            "Hereditary hemochromatosis",
            "Autosomal dominant polycystic kidney disease (ADPKD)",
            "Hereditary angioedema",
            "Familial hypertriglyceridemia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic fibrosis (CF)",
                "Sickle cell disease (SCD)",
                "Huntington's disease (HD)",
                "Duchenne muscular dystrophy (DMD)",
                "Fragile X syndrome (FXS)",
                "Spinal muscular atrophy (SMA)",
                "Tay-Sachs disease",
                "Phenylketonuria (PKU)",
                "Hemochromatosis",
                "Polycystic kidney disease (PKD)",
                "Gaucher disease",
                "Wilson disease",
                "Hereditary spherocytosis",
                "Familial hypercholesterolemia (FH)",
                "Marfan syndrome",
                "Neurofibromatosis type 1 (NF1)",
                "Alpha-1 antitrypsin deficiency",
                "Myotonic dystrophy",
                "Familial Mediterranean fever (FMF)",
                "Familial adenomatous polyposis (FAP)",
                "Hereditary hemochromatosis",
                "Autosomal dominant polycystic kidney disease (ADPKD)",
                "Hereditary angioedema",
                "Familial hypertriglyceridemia"
            ],
            "mismatches": [],
            "true_referents": [
                "Alpha-1 antitrypsin deficiency",
                "Autosomal dominant polycystic kidney disease (ADPKD)",
                "Cystic fibrosis (CF)",
                "Duchenne muscular dystrophy (DMD)",
                "Familial Mediterranean fever (FMF)",
                "Familial adenomatous polyposis (FAP)",
                "Familial hypercholesterolemia (FH)",
                "Familial hypertriglyceridemia",
                "Fragile X syndrome (FXS)",
                "Gaucher disease",
                "Hemochromatosis",
                "Hereditary angioedema",
                "Hereditary hemochromatosis",
                "Hereditary spherocytosis",
                "Huntington's disease (HD)",
                "Marfan syndrome",
                "Myotonic dystrophy",
                "Neurofibromatosis type 1 (NF1)",
                "Phenylketonuria (PKU)",
                "Polycystic kidney disease (PKD)",
                "Sickle cell disease (SCD)",
                "Spinal muscular atrophy (SMA)",
                "Tay-Sachs disease",
                "Wilson disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Hemophilia A",
            "Hemophilia B",
            "Phenylketonuria (PKU)",
            "Huntington Disease (HD)",
            "Familial Hypercholesterolemia (FH)",
            "Hereditary Hemochromatosis (HH)",
            "Marfan Syndrome",
            "Neurofibromatosis Type 1 (NF1)",
            "Tuberous Sclerosis Complex (TSC)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Becker Muscular Dystrophy (BMD)",
            "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
            "Retinitis Pigmentosa",
            "Thalassemia",
            "Osteogenesis Imperfecta (OI)",
            "Albinism",
            "Ehlers-Danlos Syndrome (EDS)",
            "Fragile X Syndrome",
            "Spinocerebellar Ataxia",
            "Familial Mediterranean Fever (FMF)",
            "Wilson's Disease",
            "Pallister-Killian Syndrome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Hemophilia A",
                "Hemophilia B",
                "Phenylketonuria (PKU)",
                "Huntington Disease (HD)",
                "Familial Hypercholesterolemia (FH)",
                "Hereditary Hemochromatosis (HH)",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Tuberous Sclerosis Complex (TSC)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Becker Muscular Dystrophy (BMD)",
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Retinitis Pigmentosa",
                "Thalassemia",
                "Osteogenesis Imperfecta (OI)",
                "Albinism",
                "Ehlers-Danlos Syndrome (EDS)",
                "Fragile X Syndrome",
                "Spinocerebellar Ataxia",
                "Familial Mediterranean Fever (FMF)",
                "Wilson's Disease",
                "Pallister-Killian Syndrome"
            ],
            "mismatches": [],
            "true_referents": [
                "Albinism",
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Becker Muscular Dystrophy (BMD)",
                "Cystic Fibrosis (CF)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "Fragile X Syndrome",
                "Hemophilia A",
                "Hemophilia B",
                "Hereditary Hemochromatosis (HH)",
                "Huntington Disease (HD)",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Osteogenesis Imperfecta (OI)",
                "Pallister-Killian Syndrome",
                "Phenylketonuria (PKU)",
                "Retinitis Pigmentosa",
                "Sickle Cell Disease (SCD)",
                "Spinocerebellar Ataxia",
                "Thalassemia",
                "Tuberous Sclerosis Complex (TSC)",
                "Wilson's Disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Sickle Cell Disease (SCD)",
            "Cystic Fibrosis (CF)",
            "Huntington's Disease (HD)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Hemophilia A",
            "Thalassemia",
            "Fragile X Syndrome (FXS)",
            "Neurofibromatosis Type 1 (NF1)",
            "Marfan Syndrome",
            "Familial Hypercholesterolemia (FH)",
            "Polycystic Kidney Disease (PKD)",
            "Hereditary Hemochromatosis",
            "Phenylketonuria (PKU)",
            "Spinal Muscular Atrophy (SMA)",
            "Myotonic Dystrophy",
            "Retinitis Pigmentosa",
            "Osteogenesis Imperfecta",
            "Gaucher Disease",
            "Hemophilia B",
            "Wilson's Disease",
            "Familial Mediterranean Fever (FMF)",
            "Friedreich's Ataxia",
            "Rett Syndrome",
            "Angelman Syndrome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sickle Cell Disease (SCD)",
                "Cystic Fibrosis (CF)",
                "Huntington's Disease (HD)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Hemophilia A",
                "Thalassemia",
                "Fragile X Syndrome (FXS)",
                "Neurofibromatosis Type 1 (NF1)",
                "Marfan Syndrome",
                "Familial Hypercholesterolemia (FH)",
                "Polycystic Kidney Disease (PKD)",
                "Hereditary Hemochromatosis",
                "Phenylketonuria (PKU)",
                "Spinal Muscular Atrophy (SMA)",
                "Myotonic Dystrophy",
                "Retinitis Pigmentosa",
                "Osteogenesis Imperfecta",
                "Gaucher Disease",
                "Hemophilia B",
                "Wilson's Disease",
                "Familial Mediterranean Fever (FMF)",
                "Friedreich's Ataxia",
                "Rett Syndrome",
                "Angelman Syndrome"
            ],
            "mismatches": [],
            "true_referents": [
                "Angelman Syndrome",
                "Cystic Fibrosis (CF)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "Fragile X Syndrome (FXS)",
                "Friedreich's Ataxia",
                "Gaucher Disease",
                "Hemophilia A",
                "Hemophilia B",
                "Hereditary Hemochromatosis",
                "Huntington's Disease (HD)",
                "Marfan Syndrome",
                "Myotonic Dystrophy",
                "Neurofibromatosis Type 1 (NF1)",
                "Osteogenesis Imperfecta",
                "Phenylketonuria (PKU)",
                "Polycystic Kidney Disease (PKD)",
                "Retinitis Pigmentosa",
                "Rett Syndrome",
                "Sickle Cell Disease (SCD)",
                "Spinal Muscular Atrophy (SMA)",
                "Thalassemia",
                "Wilson's Disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Sickle cell anemia (SCA)",
            "Cystic fibrosis (CF)",
            "Thalassemia",
            "Huntington's disease (HD)",
            "Duchenne muscular dystrophy (DMD)",
            "Fragile X syndrome (FXS)",
            "Neurofibromatosis type 1 (NF1)",
            "Marfan syndrome",
            "Hemochromatosis",
            "Phenylketonuria (PKU)",
            "Polycystic kidney disease (PKD)",
            "Tay-Sachs disease",
            "Spinal muscular atrophy (SMA)",
            "Myotonic dystrophy (DM)",
            "Achondroplasia",
            "Osteogenesis imperfecta (OI)",
            "Familial hypercholesterolemia (FH)",
            "Hereditary spherocytosis",
            "Wilson's disease",
            "Retinitis pigmentosa (RP)",
            "Hemophilia A",
            "Gaucher disease",
            "Fabry disease",
            "Pompe disease"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Sickle cell anemia (SCA)",
                "Cystic fibrosis (CF)",
                "Thalassemia",
                "Huntington's disease (HD)",
                "Duchenne muscular dystrophy (DMD)",
                "Fragile X syndrome (FXS)",
                "Neurofibromatosis type 1 (NF1)",
                "Marfan syndrome",
                "Hemochromatosis",
                "Phenylketonuria (PKU)",
                "Polycystic kidney disease (PKD)",
                "Tay-Sachs disease",
                "Spinal muscular atrophy (SMA)",
                "Myotonic dystrophy (DM)",
                "Achondroplasia",
                "Osteogenesis imperfecta (OI)",
                "Familial hypercholesterolemia (FH)",
                "Hereditary spherocytosis",
                "Wilson's disease",
                "Retinitis pigmentosa (RP)",
                "Hemophilia A",
                "Gaucher disease",
                "Fabry disease",
                "Pompe disease"
            ],
            "mismatches": [],
            "true_referents": [
                "Achondroplasia",
                "Cystic fibrosis (CF)",
                "Duchenne muscular dystrophy (DMD)",
                "Fabry disease",
                "Familial hypercholesterolemia (FH)",
                "Fragile X syndrome (FXS)",
                "Gaucher disease",
                "Hemochromatosis",
                "Hemophilia A",
                "Hereditary spherocytosis",
                "Huntington's disease (HD)",
                "Marfan syndrome",
                "Myotonic dystrophy (DM)",
                "Neurofibromatosis type 1 (NF1)",
                "Osteogenesis imperfecta (OI)",
                "Phenylketonuria (PKU)",
                "Polycystic kidney disease (PKD)",
                "Pompe disease",
                "Retinitis pigmentosa (RP)",
                "Sickle cell anemia (SCA)",
                "Spinal muscular atrophy (SMA)",
                "Tay-Sachs disease",
                "Thalassemia",
                "Wilson's disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Phenylketonuria (PKU)",
            "Hemochromatosis (HH)",
            "Familial Hypercholesterolemia (FH)",
            "Alpha-1 Antitrypsin Deficiency (A1AD)",
            "Galactosemia",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria",
            "Gaucher Disease (GD)",
            "Niemann-Pick Disease (NPD)",
            "Tay-Sachs Disease (TSD)",
            "Wilson's Disease (WD)",
            "Fabry Disease",
            "Marfan Syndrome",
            "Neurofibromatosis Type 1 (NF1)",
            "Huntington's Disease (HD)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Fragile X Syndrome (FXS)",
            "Polycystic Kidney Disease (PKD)",
            "Familial Mediterranean Fever (FMF)",
            "Hereditary Hemochromatosis (HH)",
            "Familial Amyloid Polyneuropathy (FAP)",
            "Alkaptonuria"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Phenylketonuria (PKU)",
                "Hemochromatosis (HH)",
                "Familial Hypercholesterolemia (FH)",
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Galactosemia",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria",
                "Gaucher Disease (GD)",
                "Niemann-Pick Disease (NPD)",
                "Tay-Sachs Disease (TSD)",
                "Wilson's Disease (WD)",
                "Fabry Disease",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Huntington's Disease (HD)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Fragile X Syndrome (FXS)",
                "Polycystic Kidney Disease (PKD)",
                "Familial Mediterranean Fever (FMF)",
                "Hereditary Hemochromatosis (HH)",
                "Familial Amyloid Polyneuropathy (FAP)",
                "Alkaptonuria"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria",
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Cystic Fibrosis (CF)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Fabry Disease",
                "Familial Amyloid Polyneuropathy (FAP)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "Fragile X Syndrome (FXS)",
                "Galactosemia",
                "Gaucher Disease (GD)",
                "Hemochromatosis (HH)",
                "Hereditary Hemochromatosis (HH)",
                "Homocystinuria",
                "Huntington's Disease (HD)",
                "Maple Syrup Urine Disease (MSUD)",
                "Marfan Syndrome",
                "Neurofibromatosis Type 1 (NF1)",
                "Niemann-Pick Disease (NPD)",
                "Phenylketonuria (PKU)",
                "Polycystic Kidney Disease (PKD)",
                "Sickle Cell Disease (SCD)",
                "Tay-Sachs Disease (TSD)",
                "Wilson's Disease (WD)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Cystic Fibrosis (CF)",
            "Sickle Cell Disease (SCD)",
            "Phenylketonuria (PKU)",
            "Huntington's Disease (HD)",
            "Familial Hypercholesterolemia (FH)",
            "Duchenne Muscular Dystrophy (DMD)",
            "Maple Syrup Urine Disease (MSUD)",
            "Homocystinuria (HCU)",
            "Galactosemia (GAL)",
            "Niemann-Pick Disease (NPD)",
            "Fabry Disease (FD)",
            "Pompe Disease (PD)",
            "Gaucher Disease (GD)",
            "Alkaptonuria (AKU)",
            "Lesch-Nyhan Syndrome (LNS)",
            "Wilson's Disease (WD)",
            "Tyrosinemia Type I (TYR1)",
            "Familial Mediterranean Fever (FMF)",
            "Familial Amyloid Polyneuropathy (FAP)",
            "Familial Hypercalcemia (FHC)",
            "Familial Dysautonomia (FD)",
            "Familial Hyperinsulinism (FHI)",
            "Familial Hypobetalipoproteinemia (FHBL)",
            "Familial Hypercholesterolemia Type III (FH3)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Cystic Fibrosis (CF)",
                "Sickle Cell Disease (SCD)",
                "Phenylketonuria (PKU)",
                "Huntington's Disease (HD)",
                "Familial Hypercholesterolemia (FH)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Maple Syrup Urine Disease (MSUD)",
                "Homocystinuria (HCU)",
                "Galactosemia (GAL)",
                "Niemann-Pick Disease (NPD)",
                "Fabry Disease (FD)",
                "Pompe Disease (PD)",
                "Gaucher Disease (GD)",
                "Alkaptonuria (AKU)",
                "Lesch-Nyhan Syndrome (LNS)",
                "Wilson's Disease (WD)",
                "Tyrosinemia Type I (TYR1)",
                "Familial Mediterranean Fever (FMF)",
                "Familial Amyloid Polyneuropathy (FAP)",
                "Familial Hypercalcemia (FHC)",
                "Familial Dysautonomia (FD)",
                "Familial Hyperinsulinism (FHI)",
                "Familial Hypobetalipoproteinemia (FHBL)",
                "Familial Hypercholesterolemia Type III (FH3)"
            ],
            "mismatches": [],
            "true_referents": [
                "Alkaptonuria (AKU)",
                "Cystic Fibrosis (CF)",
                "Duchenne Muscular Dystrophy (DMD)",
                "Fabry Disease (FD)",
                "Familial Amyloid Polyneuropathy (FAP)",
                "Familial Dysautonomia (FD)",
                "Familial Hypercalcemia (FHC)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Hypercholesterolemia Type III (FH3)",
                "Familial Hyperinsulinism (FHI)",
                "Familial Hypobetalipoproteinemia (FHBL)",
                "Familial Mediterranean Fever (FMF)",
                "Galactosemia (GAL)",
                "Gaucher Disease (GD)",
                "Homocystinuria (HCU)",
                "Huntington's Disease (HD)",
                "Lesch-Nyhan Syndrome (LNS)",
                "Maple Syrup Urine Disease (MSUD)",
                "Niemann-Pick Disease (NPD)",
                "Phenylketonuria (PKU)",
                "Pompe Disease (PD)",
                "Sickle Cell Disease (SCD)",
                "Tyrosinemia Type I (TYR1)",
                "Wilson's Disease (WD)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
            "Familial Hypercholesterolemia (FH)",
            "Hereditary Hemochromatosis (HH)",
            "Cystic Fibrosis (CF)",
            "Huntington's Disease (HD)",
            "Familial Mediterranean Fever (FMF)",
            "Sickle Cell Anemia (SCA)",
            "Thalassemia Major",
            "Tay-Sachs Disease",
            "Gaucher Disease",
            "Crigler-Najjar Syndrome",
            "Gilbert's Syndrome",
            "Porphyria",
            "Fabry Disease",
            "Mucolipidosis IV",
            "Mucopolysaccharidosis Type VII",
            "Neurofibromatosis Type 1 (NF1)",
            "Von Willebrand Disease",
            "Ehlers-Danlos Syndrome Type IV",
            "Marfan Syndrome",
            "Osteogenesis Imperfecta Type I",
            "Erythromelalgia",
            "MEN1 (Multiple Endocrine Neoplasia Type 1)",
            "Familial Adenomatous Polyposis (FAP)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Familial Hypercholesterolemia (FH)",
                "Hereditary Hemochromatosis (HH)",
                "Cystic Fibrosis (CF)",
                "Huntington's Disease (HD)",
                "Familial Mediterranean Fever (FMF)",
                "Sickle Cell Anemia (SCA)",
                "Thalassemia Major",
                "Tay-Sachs Disease",
                "Gaucher Disease",
                "Crigler-Najjar Syndrome",
                "Gilbert's Syndrome",
                "Porphyria",
                "Fabry Disease",
                "Mucolipidosis IV",
                "Mucopolysaccharidosis Type VII",
                "Neurofibromatosis Type 1 (NF1)",
                "Von Willebrand Disease",
                "Ehlers-Danlos Syndrome Type IV",
                "Marfan Syndrome",
                "Osteogenesis Imperfecta Type I",
                "Erythromelalgia",
                "MEN1 (Multiple Endocrine Neoplasia Type 1)",
                "Familial Adenomatous Polyposis (FAP)"
            ],
            "mismatches": [],
            "true_referents": [
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Crigler-Najjar Syndrome",
                "Cystic Fibrosis (CF)",
                "Ehlers-Danlos Syndrome Type IV",
                "Erythromelalgia",
                "Fabry Disease",
                "Familial Adenomatous Polyposis (FAP)",
                "Familial Hypercholesterolemia (FH)",
                "Familial Mediterranean Fever (FMF)",
                "Gaucher Disease",
                "Gilbert's Syndrome",
                "Hereditary Hemochromatosis (HH)",
                "Huntington's Disease (HD)",
                "MEN1 (Multiple Endocrine Neoplasia Type 1)",
                "Marfan Syndrome",
                "Mucolipidosis IV",
                "Mucopolysaccharidosis Type VII",
                "Neurofibromatosis Type 1 (NF1)",
                "Osteogenesis Imperfecta Type I",
                "Porphyria",
                "Sickle Cell Anemia (SCA)",
                "Tay-Sachs Disease",
                "Thalassemia Major",
                "Von Willebrand Disease"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": [
            "Lactose Intolerance (LCT)",
            "Cystic Fibrosis (CF)",
            "Phenylketonuria (PKU)",
            "Sickle Cell Anemia (SCA)",
            "Thalassemia",
            "Alpha-1 Antitrypsin Deficiency (A1AD)",
            "Hereditary Hemochromatosis (HFE)",
            "Celiac Disease (CD)",
            "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
            "Familial Hypercholesterolemia (FH)",
            "Marfan Syndrome (MFS)",
            "Ehlers-Danlos Syndrome (EDS)",
            "Osteogenesis Imperfecta (OI)",
            "Mitochondrial Myopathies (MM)",
            "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD)",
            "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD)",
            "Very Long-Chain Acyl-CoA Dehydrogenase Deficiency (VLCAD)",
            "Glucose-6-Phosphate Dehydrogenase Deficiency (G6PDD)",
            "Alpha-Thalassemia (\u03b1-thal)",
            "beta-Thalassemia (\u03b2-thal)",
            "Sj\u00f6gren-Larsson Syndrome (SLS)",
            "Mevalonate Kinase Deficiency (MKD)",
            "Abetalipoproteinemia (ABL)",
            "Familial Dysautonomia (FD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lactose Intolerance (LCT)",
                "Cystic Fibrosis (CF)",
                "Phenylketonuria (PKU)",
                "Sickle Cell Anemia (SCA)",
                "Thalassemia",
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Hereditary Hemochromatosis (HFE)",
                "Celiac Disease (CD)",
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Familial Hypercholesterolemia (FH)",
                "Marfan Syndrome (MFS)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Osteogenesis Imperfecta (OI)",
                "Mitochondrial Myopathies (MM)",
                "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD)",
                "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD)",
                "Very Long-Chain Acyl-CoA Dehydrogenase Deficiency (VLCAD)",
                "Glucose-6-Phosphate Dehydrogenase Deficiency (G6PDD)",
                "Alpha-Thalassemia (\u03b1-thal)",
                "beta-Thalassemia (\u03b2-thal)",
                "Sj\u00f6gren-Larsson Syndrome (SLS)",
                "Mevalonate Kinase Deficiency (MKD)",
                "Abetalipoproteinemia (ABL)",
                "Familial Dysautonomia (FD)"
            ],
            "mismatches": [],
            "true_referents": [
                "Abetalipoproteinemia (ABL)",
                "Alpha-1 Antitrypsin Deficiency (A1AD)",
                "Alpha-Thalassemia (\u03b1-thal)",
                "Autosomal Dominant Polycystic Kidney Disease (ADPKD)",
                "Celiac Disease (CD)",
                "Cystic Fibrosis (CF)",
                "Ehlers-Danlos Syndrome (EDS)",
                "Familial Dysautonomia (FD)",
                "Familial Hypercholesterolemia (FH)",
                "Glucose-6-Phosphate Dehydrogenase Deficiency (G6PDD)",
                "Hereditary Hemochromatosis (HFE)",
                "Lactose Intolerance (LCT)",
                "Long-Chain 3-Hydroxyacyl-CoA Dehydrogenase Deficiency (LCHAD)",
                "Marfan Syndrome (MFS)",
                "Medium-Chain Acyl-CoA Dehydrogenase Deficiency (MCAD)",
                "Mevalonate Kinase Deficiency (MKD)",
                "Mitochondrial Myopathies (MM)",
                "Osteogenesis Imperfecta (OI)",
                "Phenylketonuria (PKU)",
                "Sickle Cell Anemia (SCA)",
                "Sj\u00f6gren-Larsson Syndrome (SLS)",
                "Thalassemia",
                "Very Long-Chain Acyl-CoA Dehydrogenase Deficiency (VLCAD)",
                "beta-Thalassemia (\u03b2-thal)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "monogenic disease with high (1>20000) prevalence in human",
        "domain": "medicine",
        "response": "[\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        },\n        {\n            \"Referent\": \"Huntington's disease\",\n            \"Canonical name\": \"Huntington's disease\" (HD)\n        }\n    ]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]