[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Staphylococcus aureus (S. aureus)",
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Pseudomonas aeruginosa (P. aeruginosa)",
            "Enterococcus faecalis (E. faecalis)",
            "Bacillus anthracis (B. anthracis)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Bordetella pertussis (B. pertussis)",
            "Neisseria meningitidis (N. meningitidis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Clostridium difficile (C. difficile)",
            "Campylobacter jejuni (C. jejuni)",
            "Salmonella enterica (S. enterica)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Chlamydia trachomatis (C. trachomatis)",
            "Mycoplasma pneumoniae (M. pneumoniae)",
            "Treponema pallidum (T. pallidum)",
            "Brucella abortus (B. abortus)",
            "Francisella tularensis (F. tularensis)",
            "Yersinia pestis (Y. pestis)",
            "Haemophilus influenzae (H. influenzae)",
            "Vibrio cholerae (V. cholerae)",
            "Borrelia burgdorferi (B. burgdorferi)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bacillus anthracis (B. anthracis)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Streptococcus pyogenes (S. pyogenes)"
            ],
            "mismatches": [
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Staphylococcus aureus (S. aureus)",
                "Escherichia coli (E. coli)",
                "Klebsiella pneumoniae (K. pneumoniae)",
                "Pseudomonas aeruginosa (P. aeruginosa)",
                "Enterococcus faecalis (E. faecalis)",
                "Bordetella pertussis (B. pertussis)",
                "Neisseria meningitidis (N. meningitidis)",
                "Clostridium difficile (C. difficile)",
                "Campylobacter jejuni (C. jejuni)",
                "Salmonella enterica (S. enterica)",
                "Chlamydia trachomatis (C. trachomatis)",
                "Mycoplasma pneumoniae (M. pneumoniae)",
                "Treponema pallidum (T. pallidum)",
                "Brucella abortus (B. abortus)",
                "Francisella tularensis (F. tularensis)",
                "Yersinia pestis (Y. pestis)",
                "Haemophilus influenzae (H. influenzae)",
                "Vibrio cholerae (V. cholerae)",
                "Borrelia burgdorferi (B. burgdorferi)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bovine Tuberculosis",
                "Diphtheria",
                "Enterotoxemia",
                "Female Genital Tuberculosis",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Paratuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (pneumococcus)",
            "Staphylococcus aureus (golden staph)",
            "Streptococcus pyogenes (group A strep)",
            "Enterococcus faecalis",
            "Staphylococcus epidermidis",
            "Streptococcus agalactiae (group B strep)",
            "Streptococcus mutans",
            "Streptococcus sanguinis",
            "Staphylococcus saprophyticus",
            "Enterococcus faecium",
            "Streptococcus bovis",
            "Streptococcus dysgalactiae (group C and G strep)",
            "Streptococcus salivarius",
            "Streptococcus anginosus (group F strep)",
            "Streptococcus mitis",
            "Streptococcus oralis",
            "Streptococcus gordonii",
            "Streptococcus dysgalactiae subspecies equisimilis",
            "Streptococcus pyogenes subspecies zooepidemicus",
            "Streptococcus suis",
            "Streptococcus equi",
            "Streptococcus canis",
            "Streptococcus suis subspecies suis",
            "Streptococcus equi subspecies equi"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Streptococcus pneumoniae (pneumococcus)",
                "Staphylococcus aureus (golden staph)",
                "Streptococcus pyogenes (group A strep)",
                "Enterococcus faecalis",
                "Staphylococcus epidermidis",
                "Streptococcus agalactiae (group B strep)",
                "Streptococcus mutans",
                "Streptococcus sanguinis",
                "Staphylococcus saprophyticus",
                "Enterococcus faecium",
                "Streptococcus bovis",
                "Streptococcus dysgalactiae (group C and G strep)",
                "Streptococcus salivarius",
                "Streptococcus anginosus (group F strep)",
                "Streptococcus mitis",
                "Streptococcus oralis",
                "Streptococcus gordonii",
                "Streptococcus dysgalactiae subspecies equisimilis",
                "Streptococcus pyogenes subspecies zooepidemicus",
                "Streptococcus suis",
                "Streptococcus equi",
                "Streptococcus canis",
                "Streptococcus suis subspecies suis",
                "Streptococcus equi subspecies equi"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Impetigo",
                "Latent Tuberculosis",
                "Listeriosis",
                "Oral Tuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pyogenes (Group A Streptococcus)",
            "Streptococcus pneumoniae (Pneumococcus)",
            "Enterococcus faecalis",
            "Listeria monocytogenes",
            "Clostridium difficile",
            "Bacillus anthracis",
            "Staphylococcus epidermidis",
            "Streptococcus agalactiae (Group B Streptococcus)",
            "Streptococcus mutans",
            "Streptococcus salivarius",
            "Streptococcus sanguinis",
            "Streptococcus mitis",
            "Streptococcus oralis",
            "Streptococcus constellatus",
            "Streptococcus anginosus",
            "Streptococcus intermedius",
            "Streptococcus bovis",
            "Streptococcus equinus",
            "Streptococcus parasanguinis",
            "Streptococcus gordonii",
            "Streptococcus vestibularis",
            "Streptococcus vestibularis",
            "Streptococcus australis",
            "Streptococcus cristatus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pyogenes (Group A Streptococcus)",
                "Streptococcus pneumoniae (Pneumococcus)",
                "Enterococcus faecalis",
                "Listeria monocytogenes",
                "Clostridium difficile",
                "Bacillus anthracis",
                "Staphylococcus epidermidis",
                "Streptococcus agalactiae (Group B Streptococcus)",
                "Streptococcus mutans",
                "Streptococcus salivarius",
                "Streptococcus sanguinis",
                "Streptococcus mitis",
                "Streptococcus oralis",
                "Streptococcus constellatus",
                "Streptococcus anginosus",
                "Streptococcus intermedius",
                "Streptococcus bovis",
                "Streptococcus equinus",
                "Streptococcus parasanguinis",
                "Streptococcus gordonii",
                "Streptococcus vestibularis",
                "Streptococcus australis",
                "Streptococcus cristatus"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Diphtheria",
                "Impetigo",
                "Listeria Meningitis",
                "Listeriosis",
                "Oral Tuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Bacillus anthracis (B. anthracis)",
            "Clostridium perfringens (C. perfringens)",
            "Clostridium botulinum (C. botulinum)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Enterococcus faecalis (E. faecalis)",
            "Enterococcus faecium (E. faecium)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Mycobacterium leprae (M. leprae)",
            "Nocardia asteroides (N. asteroides)",
            "Actinomyces israelii (A. israelii)",
            "Propionibacterium acnes (P. acnes)",
            "Gardnerella vaginalis (G. vaginalis)",
            "Peptostreptococcus anaerobius (P. anaerobius)",
            "Cutibacterium acnes (C. acnes)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Bacillus cereus (B. cereus)",
            "Clostridium tetani (C. tetani)",
            "Staphylococcus epidermidis (S. epidermidis)",
            "Streptococcus mutans (S. mutans)",
            "Lactobacillus acidophilus (L. acidophilus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Bacillus anthracis (B. anthracis)",
                "Clostridium perfringens (C. perfringens)",
                "Clostridium botulinum (C. botulinum)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Enterococcus faecalis (E. faecalis)",
                "Enterococcus faecium (E. faecium)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Mycobacterium leprae (M. leprae)",
                "Nocardia asteroides (N. asteroides)",
                "Actinomyces israelii (A. israelii)",
                "Propionibacterium acnes (P. acnes)",
                "Gardnerella vaginalis (G. vaginalis)",
                "Peptostreptococcus anaerobius (P. anaerobius)",
                "Cutibacterium acnes (C. acnes)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Bacillus cereus (B. cereus)",
                "Clostridium tetani (C. tetani)",
                "Staphylococcus epidermidis (S. epidermidis)",
                "Streptococcus mutans (S. mutans)",
                "Lactobacillus acidophilus (L. acidophilus)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Female Genital Tuberculosis",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Nocardia Infections",
                "Paratuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Staphylococcus aureus (S. aureus)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Enterococcus faecalis (E. faecalis)",
            "Bacillus anthracis (B. anthracis)",
            "Clostridium botulinum (C. botulinum)",
            "Clostridium difficile (C. difficile)",
            "Clostridium tetani (C. tetani)",
            "Clostridium perfringens (C. perfringens)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Actinomyces israelii (A. israelii)",
            "Nocardia asteroides (N. asteroides)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Mycobacterium leprae (M. leprae)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Streptococcus mutans (S. mutans)",
            "Streptococcus viridans (S. viridans)",
            "Staphylococcus epidermidis (S. epidermidis)",
            "Staphylococcus saprophyticus (S. saprophyticus)",
            "Bacillus cereus (B. cereus)",
            "Lactobacillus acidophilus (L. acidophilus)",
            "Streptococcus bovis (S. bovis)",
            "Streptococcus suis (S. suis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Staphylococcus aureus (S. aureus)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Bacillus anthracis (B. anthracis)",
                "Clostridium botulinum (C. botulinum)",
                "Clostridium tetani (C. tetani)",
                "Clostridium perfringens (C. perfringens)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Actinomyces israelii (A. israelii)",
                "Nocardia asteroides (N. asteroides)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Mycobacterium leprae (M. leprae)",
                "Streptococcus pyogenes (S. pyogenes)"
            ],
            "mismatches": [
                "Enterococcus faecalis (E. faecalis)",
                "Clostridium difficile (C. difficile)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Streptococcus mutans (S. mutans)",
                "Streptococcus viridans (S. viridans)",
                "Staphylococcus epidermidis (S. epidermidis)",
                "Staphylococcus saprophyticus (S. saprophyticus)",
                "Bacillus cereus (B. cereus)",
                "Lactobacillus acidophilus (L. acidophilus)",
                "Streptococcus bovis (S. bovis)",
                "Streptococcus suis (S. suis)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (SA)",
            "Streptococcus pneumoniae (SP)",
            "Bacillus anthracis (BA)",
            "Listeria monocytogenes (LM)",
            "Clostridium tetani (CT)",
            "Clostridium botulinum (CB)",
            "Enterococcus faecalis (EF)",
            "Streptococcus pyogenes (SPy)",
            "Staphylococcus epidermidis (SE)",
            "Bacillus cereus (BC)",
            "Mycobacterium tuberculosis (MTB)",
            "Nocardia asteroides (NA)",
            "Corynebacterium diphtheriae (CD)",
            "Peptostreptococcus anaerobius (PA)",
            "Rhodococcus equi (RE)",
            "Actinomyces israelii (AI)",
            "Bacillus subtilis (BS)",
            "Micrococcus luteus (ML)",
            "Enterobacter faecium (EFm)",
            "Staphylococcus saprophyticus (SS)",
            "Streptococcus agalactiae (SA)",
            "Lactobacillus acidophilus (LA)",
            "Propionibacterium acnes (PAc)",
            "Arthrobacter species (AS)",
            "Corynebacterium urealyticum (CU)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nocardia asteroides (NA)",
                "Corynebacterium diphtheriae (CD)",
                "Actinomyces israelii (AI)",
                "Mycobacterium tuberculosis (MTB)"
            ],
            "mismatches": [
                "Staphylococcus aureus (SA)",
                "Streptococcus pneumoniae (SP)",
                "Bacillus anthracis (BA)",
                "Listeria monocytogenes (LM)",
                "Clostridium tetani (CT)",
                "Clostridium botulinum (CB)",
                "Enterococcus faecalis (EF)",
                "Streptococcus pyogenes (SPy)",
                "Staphylococcus epidermidis (SE)",
                "Bacillus cereus (BC)",
                "Peptostreptococcus anaerobius (PA)",
                "Rhodococcus equi (RE)",
                "Bacillus subtilis (BS)",
                "Micrococcus luteus (ML)",
                "Enterobacter faecium (EFm)",
                "Staphylococcus saprophyticus (SS)",
                "Streptococcus agalactiae (SA)",
                "Lactobacillus acidophilus (LA)",
                "Propionibacterium acnes (PAc)",
                "Arthrobacter species (AS)",
                "Corynebacterium urealyticum (CU)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Extensively Drug-Resistant Tuberculosis",
                "Impetigo",
                "Latent Tuberculosis",
                "Listeria Meningitis",
                "Listeriosis",
                "Multibacillary Leprosy",
                "Mycetoma",
                "Nocardia Infections",
                "Paucibacillary Leprosy",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Silicotuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 4,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus Aureus Infection (SAI)",
            "Streptococcus Pneumoniae Infection (SPI)",
            "Streptococcus Pyogenes Infection (SPI)",
            "Enterococcus Faecalis Infection (EFI)",
            "Enterococcus Faecium Infection (EFMI)",
            "Listeria Monocytogenes Infection (LMI)",
            "Bacillus Anthracis Infection (BAI)",
            "Bacillus Cereus Infection (BCI)",
            "Staphylococcus Epidermidis Infection (SEI)",
            "Staphylococcus Saprophyticus Infection (SSI)",
            "Streptococcus Agalactiae Infection (SAI)",
            "Streptococcus Mutans Infection (SMI)",
            "Streptococcus Salivarius Infection (SSI)",
            "Streptococcus Sanguinis Infection (SSI)",
            "Streptococcus Sobrinus Infection (SSI)",
            "Streptococcus Thermophilus Infection (STI)",
            "Streptococcus Uberis Infection (SUI)",
            "Streptococcus Equi Infection (SEI)",
            "Streptococcus Canis Infection (SCI)",
            "Streptococcus Gallolyticus Infection (SGI)",
            "Streptococcus Gordonii Infection (SGI)",
            "Streptococcus Mitis Infection (SMI)",
            "Streptococcus Oralis Infection (SOI)",
            "Streptococcus Pneumoniae Infection (SPI)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Staphylococcus Aureus Infection (SAI)",
                "Streptococcus Pneumoniae Infection (SPI)",
                "Streptococcus Pyogenes Infection (SPI)",
                "Enterococcus Faecalis Infection (EFI)",
                "Enterococcus Faecium Infection (EFMI)",
                "Listeria Monocytogenes Infection (LMI)",
                "Bacillus Anthracis Infection (BAI)",
                "Bacillus Cereus Infection (BCI)",
                "Staphylococcus Epidermidis Infection (SEI)",
                "Staphylococcus Saprophyticus Infection (SSI)",
                "Streptococcus Agalactiae Infection (SAI)",
                "Streptococcus Mutans Infection (SMI)",
                "Streptococcus Salivarius Infection (SSI)",
                "Streptococcus Sanguinis Infection (SSI)",
                "Streptococcus Sobrinus Infection (SSI)",
                "Streptococcus Thermophilus Infection (STI)",
                "Streptococcus Uberis Infection (SUI)",
                "Streptococcus Equi Infection (SEI)",
                "Streptococcus Canis Infection (SCI)",
                "Streptococcus Gallolyticus Infection (SGI)",
                "Streptococcus Gordonii Infection (SGI)",
                "Streptococcus Mitis Infection (SMI)",
                "Streptococcus Oralis Infection (SOI)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Cervicofacial Actinomycosis",
                "Furunculosis",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium avium-intracellulare Infection",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tuberculosis"
            ],
            "TP": 0,
            "FP": 23,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcal infection (Staphylococcus aureus)",
            "Streptococcal infection (Streptococcus pyogenes)",
            "Pneumococcal infection (Streptococcus pneumoniae)",
            "Tuberculosis (Mycobacterium tuberculosis)",
            "Gonorrhea (Neisseria gonorrhoeae)",
            "Meningitis (Neisseria meningitidis)",
            "Lyme disease (Borrelia burgdorferi)",
            "Cholera (Vibrio cholerae)",
            "Diphtheria (Corynebacterium diphtheriae)",
            "Tetanus (Clostridium tetani)",
            "Botulism (Clostridium botulinum)",
            "Pneumonia (Klebsiella pneumoniae)",
            "Legionnaires' disease (Legionella pneumophila)",
            "Salmonellosis (Salmonella enterica)",
            "Shigellosis (Shigella species)",
            "Tularemia (Francisella tularensis)",
            "Anthrax (Bacillus anthracis)",
            "Listeriosis (Listeria monocytogenes)",
            "Gastroenteritis (Campylobacter jejuni)",
            "Peptic ulcer (Helicobacter pylori)",
            "Syphilis (Treponema pallidum)",
            "Chlamydia infection (Chlamydia trachomatis)",
            "Legionella infection (Legionella species)",
            "Leptospirosis (Leptospira interrogans)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal Infections",
                "Streptococcal Infections",
                "Pneumococcal Infections",
                "Tuberculosis",
                "Diphtheria",
                "Tetanus",
                "Botulism",
                "Anthrax",
                "Listeriosis"
            ],
            "mismatches": [
                "Gonorrhea (Neisseria gonorrhoeae)",
                "Meningitis (Neisseria meningitidis)",
                "Lyme disease (Borrelia burgdorferi)",
                "Cholera (Vibrio cholerae)",
                "Pneumonia (Klebsiella pneumoniae)",
                "Legionnaires' disease (Legionella pneumophila)",
                "Salmonellosis (Salmonella enterica)",
                "Shigellosis (Shigella species)",
                "Tularemia (Francisella tularensis)",
                "Gastroenteritis (Campylobacter jejuni)",
                "Peptic ulcer (Helicobacter pylori)",
                "Syphilis (Treponema pallidum)",
                "Chlamydia infection (Chlamydia trachomatis)",
                "Legionella infection (Legionella species)",
                "Leptospirosis (Leptospira interrogans)"
            ],
            "true_referents": [
                "Anthrax",
                "Bacillaceae Infections",
                "Botulism",
                "Buruli Ulcer",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Female Genital Tuberculosis",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Paratuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcal skin infection",
            "Staphylococcal pneumonia",
            "Staphylococcal endocarditis",
            "Streptococcal pharyngitis (Strep throat)",
            "Streptococcal pneumonia",
            "Streptococcal meningitis",
            "Streptococcal cellulitis",
            "Streptococcal toxic shock syndrome",
            "Group B streptococcal neonatal sepsis",
            "Enterococcal endocarditis",
            "Listeriosis",
            "Diphtheria",
            "Tetanus",
            "Botulism",
            "Gas gangrene (Clostridial myonecrosis)",
            "Anthrax (Bacillus anthracis infection)",
            "Actinomycosis",
            "Peptostreptococcal abscess",
            "Clostridioides difficile infection (C. difficile infection)",
            "Bacillus cereus food poisoning",
            "Corynebacterial diphtheria",
            "Rhodococcal pneumonia (Rhodococcus equi infection)",
            "Methicillin-resistant Staphylococcus aureus infection (MRSA)",
            "Vancomycin-resistant Enterococci infection (VRE)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal pneumonia",
                "Listeriosis",
                "Diphtheria",
                "Tetanus",
                "Botulism",
                "Gas gangrene (Clostridial myonecrosis)",
                "Anthrax (Bacillus anthracis infection)",
                "Actinomycosis"
            ],
            "mismatches": [
                "Staphylococcal skin infection",
                "Staphylococcal endocarditis",
                "Streptococcal pharyngitis (Strep throat)",
                "Streptococcal pneumonia",
                "Streptococcal meningitis",
                "Streptococcal cellulitis",
                "Streptococcal toxic shock syndrome",
                "Group B streptococcal neonatal sepsis",
                "Enterococcal endocarditis",
                "Peptostreptococcal abscess",
                "Clostridioides difficile infection (C. difficile infection)",
                "Bacillus cereus food poisoning",
                "Corynebacterial diphtheria",
                "Rhodococcal pneumonia (Rhodococcus equi infection)",
                "Methicillin-resistant Staphylococcus aureus infection (MRSA)",
                "Vancomycin-resistant Enterococci infection (VRE)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Gas Gangrene",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tetanus"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal pharyngitis (Strep throat)",
            "Pneumococcal pneumonia",
            "Staphylococcus aureus infection",
            "Listeriosis",
            "Clostridium difficile infection (C. diff)",
            "Bacillus anthracis infection (Anthrax)",
            "Mycobacterium tuberculosis infection (TB)",
            "Corynebacterium diphtheriae infection (Diphtheria)",
            "Group B Streptococcus infection",
            "Enterococcus faecalis infection",
            "Actinomyces infection",
            "Propionibacterium acnes infection",
            "Lactobacillus infection",
            "Bifidobacterium infection",
            "Nocardia infection",
            "Rhodococcus infection",
            "Erysipelothrix rhusiopathiae infection",
            "Gardnerella vaginalis infection",
            "Peptostreptococcus infection",
            "Clostridium perfringens infection",
            "Bacillus cereus infection",
            "Streptococcus pneumoniae infection",
            "Streptococcus pyogenes infection",
            "Mycobacterium leprae infection (Leprosy)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Pneumococcal pneumonia",
                "Listeriosis",
                "Clostridium difficile infection (C. diff)",
                "Bacillus anthracis infection (Anthrax)",
                "Corynebacterium diphtheriae infection (Diphtheria)",
                "Nocardia infection",
                "Erysipelothrix rhusiopathiae infection",
                "Mycobacterium leprae infection (Leprosy)"
            ],
            "mismatches": [
                "Streptococcal pharyngitis (Strep throat)",
                "Staphylococcus aureus infection",
                "Mycobacterium tuberculosis infection (TB)",
                "Group B Streptococcus infection",
                "Enterococcus faecalis infection",
                "Actinomyces infection",
                "Propionibacterium acnes infection",
                "Lactobacillus infection",
                "Bifidobacterium infection",
                "Rhodococcus infection",
                "Gardnerella vaginalis infection",
                "Peptostreptococcus infection",
                "Clostridium perfringens infection",
                "Bacillus cereus infection",
                "Streptococcus pneumoniae infection",
                "Streptococcus pyogenes infection"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Female Genital Tuberculosis",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium Infections",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tuberculoid Leprosy",
                "Tuberculosis",
                "Whipple Disease"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection (S. aureus)",
            "Streptococcus pyogenes infection (S. pyogenes)",
            "Listeria monocytogenes infection",
            "Bacillus anthracis infection",
            "Clostridium botulinum infection",
            "Clostridium tetani infection",
            "Clostridium difficile infection (C. diff)",
            "Corynebacterium diphtheriae infection",
            "Mycobacterium tuberculosis infection (TB)",
            "Mycobacterium leprae infection",
            "Nocardia asteroides infection",
            "Actinomyces israelii infection",
            "Propionibacterium acnes infection",
            "Enterococcus faecalis infection (E. faecalis)",
            "Streptococcus pneumoniae infection (S. pneumoniae)",
            "Group B Streptococcus infection (GBS)",
            "Peptostreptococcus infection",
            "Gardnerella vaginalis infection",
            "Bacillus cereus infection",
            "Lactobacillus acidophilus infection",
            "Actinomyces bovis infection",
            "Mycoplasma pneumoniae infection",
            "Ureaplasma urealyticum infection",
            "Erysipelothrix rhusiopathiae infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Listeriosis",
                "Mycobacterium Infections",
                "Nocardia Infections",
                "Staphylococcal Infections",
                "Streptococcal Infections",
                "Erysipelothrix Infections"
            ],
            "mismatches": [
                "Staphylococcus aureus infection (S. aureus)",
                "Streptococcus pyogenes infection (S. pyogenes)",
                "Listeria monocytogenes infection",
                "Bacillus anthracis infection",
                "Clostridium botulinum infection",
                "Clostridium tetani infection",
                "Clostridium difficile infection (C. diff)",
                "Corynebacterium diphtheriae infection",
                "Mycobacterium tuberculosis infection (TB)",
                "Mycobacterium leprae infection",
                "Actinomyces israelii infection",
                "Propionibacterium acnes infection",
                "Enterococcus faecalis infection (E. faecalis)",
                "Streptococcus pneumoniae infection (S. pneumoniae)",
                "Group B Streptococcus infection (GBS)",
                "Peptostreptococcus infection",
                "Gardnerella vaginalis infection",
                "Bacillus cereus infection",
                "Lactobacillus acidophilus infection",
                "Actinomyces bovis infection",
                "Mycoplasma pneumoniae infection",
                "Ureaplasma urealyticum infection"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Buruli Ulcer",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Female Genital Tuberculosis",
                "Furunculosis",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium Infections",
                "Mycobacterium avium-intracellulare Infection",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculosis",
                "Urogenital Tuberculosis",
                "Whipple Disease"
            ],
            "TP": 8,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcal infections (e.g. Staphylococcus aureus)",
            "Streptococcal infections (e.g. Streptococcus pyogenes)",
            "Enterococcal infections (e.g. Enterococcus faecalis)",
            "Listeriosis (Listeria monocytogenes)",
            "Anthrax (Bacillus anthracis)",
            "Actinomycosis (Actinomyces spp.)",
            "Nocardiosis (Nocardia spp.)",
            "Clostridial infections (e.g. Clostridium difficile)",
            "Tuberculosis (Mycobacterium tuberculosis)",
            "Leprosy (Mycobacterium leprae)",
            "Diphtheria (Corynebacterium diphtheriae)",
            "Erysipeloid (Erysipelothrix rhusiopathiae)",
            "Tetanus (Clostridium tetani)",
            "Botulism (Clostridium botulinum)",
            "Gas gangrene (Clostridium perfringens)",
            "Pneumococcal infections (Streptococcus pneumoniae)",
            "Meningococcal infections (Neisseria meningitidis)",
            "Gonorrhea (Neisseria gonorrhoeae)",
            "Syphilis (Treponema pallidum)",
            "Leptospirosis (Leptospira spp.)",
            "Borreliosis (Borrelia spp.)",
            "Brucellosis (Brucella spp.)",
            "Chlamydial infections (Chlamydia spp.)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal infections (e.g. Staphylococcus aureus)",
                "Streptococcal infections (e.g. Streptococcus pyogenes)",
                "Listeriosis (Listeria monocytogenes)",
                "Anthrax (Bacillus anthracis)",
                "Actinomycosis (Actinomyces spp.)",
                "Tuberculosis (Mycobacterium tuberculosis)",
                "Leprosy (Mycobacterium leprae)",
                "Diphtheria (Corynebacterium diphtheriae)",
                "Erysipeloid (Erysipelothrix rhusiopathiae)",
                "Tetanus (Clostridium tetani)",
                "Botulism (Clostridium botulinum)",
                "Gas gangrene (Clostridium perfringens)",
                "Pneumococcal infections (Streptococcus pneumoniae)"
            ],
            "mismatches": [
                "Enterococcal infections (e.g. Enterococcus faecalis)",
                "Nocardiosis (Nocardia spp.)",
                "Clostridial infections (e.g. Clostridium difficile)",
                "Meningococcal infections (Neisseria meningitidis)",
                "Gonorrhea (Neisseria gonorrhoeae)",
                "Syphilis (Treponema pallidum)",
                "Leptospirosis (Leptospira spp.)",
                "Borreliosis (Borrelia spp.)",
                "Brucellosis (Brucella spp.)",
                "Chlamydial infections (Chlamydia spp.)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Erysipelas",
                "Erysipeloid",
                "Female Genital Tuberculosis",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Nocardia Infections",
                "Paratuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 13,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Pseudomonas aeruginosa (P. aeruginosa)",
            "Clostridium difficile (C. diff)",
            "Bacillus anthracis (B. anthracis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Haemophilus influenzae (H. influenzae)",
            "Neisseria gonorrhoeae (N. gonorrhoeae)",
            "Salmonella enterica (S. enterica)",
            "Shigella flexneri (S. flexneri)",
            "Vibrio cholerae (V. cholerae)",
            "Borrelia burgdorferi (B. burgdorferi)",
            "Treponema pallidum (T. pallidum)",
            "Chlamydia trachomatis (C. trachomatis)",
            "Legionella pneumophila (L. pneumophila)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Enterococcus faecalis (E. faecalis)",
            "Acinetobacter baumannii (A. baumannii)",
            "Campylobacter jejuni (C. jejuni)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Clostridium difficile (C. diff)",
                "Bacillus anthracis (B. anthracis)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Listeria monocytogenes (L. monocytogenes)"
            ],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Escherichia coli (E. coli)",
                "Klebsiella pneumoniae (K. pneumoniae)",
                "Pseudomonas aeruginosa (P. aeruginosa)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Haemophilus influenzae (H. influenzae)",
                "Neisseria gonorrhoeae (N. gonorrhoeae)",
                "Salmonella enterica (S. enterica)",
                "Shigella flexneri (S. flexneri)",
                "Vibrio cholerae (V. cholerae)",
                "Borrelia burgdorferi (B. burgdorferi)",
                "Treponema pallidum (T. pallidum)",
                "Chlamydia trachomatis (C. trachomatis)",
                "Legionella pneumophila (L. pneumophila)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Enterococcus faecalis (E. faecalis)",
                "Acinetobacter baumannii (A. baumannii)",
                "Campylobacter jejuni (C. jejuni)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bovine Tuberculosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Female Genital Tuberculosis",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Paratuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Streptococcal Infections",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 5,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Meningitis (Gram-positive meningitis)",
            "Endocarditis (Gram-positive endocarditis)",
            "Pneumonia (Gram-positive pneumonia)",
            "Sepsis (Gram-positive sepsis)",
            "Urinary Tract Infection (UTI) (Gram-positive UTI)",
            "Skin Infection (Gram-positive skin infection)",
            "Bone and Joint Infection (Gram-positive bone and joint infection)",
            "Osteomyelitis (Gram-positive osteomyelitis)",
            "Arthritis (Gram-positive arthritis)",
            "Bacteremia (Gram-positive bacteremia)",
            "Endophthalmitis (Gram-positive endophthalmitis)",
            "Keratitis (Gram-positive keratitis)",
            "Conjunctivitis (Gram-positive conjunctivitis)",
            "Otitis Media (Gram-positive otitis media)",
            "Otitis Externa (Gram-positive otitis externa)",
            "Tonsillitis (Gram-positive tonsillitis)",
            "Pharyngitis (Gram-positive pharyngitis)",
            "Laryngitis (Gram-positive laryngitis)",
            "Bronchitis (Gram-positive bronchitis)",
            "Pleurisy (Gram-positive pleurisy)",
            "Pericarditis (Gram-positive pericarditis)",
            "Abscess (Gram-positive abscess)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Meningitis (Gram-positive meningitis)",
                "Endocarditis (Gram-positive endocarditis)",
                "Pneumonia (Gram-positive pneumonia)",
                "Sepsis (Gram-positive sepsis)",
                "Urinary Tract Infection (UTI) (Gram-positive UTI)",
                "Skin Infection (Gram-positive skin infection)",
                "Bone and Joint Infection (Gram-positive bone and joint infection)",
                "Osteomyelitis (Gram-positive osteomyelitis)",
                "Arthritis (Gram-positive arthritis)",
                "Bacteremia (Gram-positive bacteremia)",
                "Endophthalmitis (Gram-positive endophthalmitis)",
                "Keratitis (Gram-positive keratitis)",
                "Conjunctivitis (Gram-positive conjunctivitis)",
                "Otitis Media (Gram-positive otitis media)",
                "Otitis Externa (Gram-positive otitis externa)",
                "Tonsillitis (Gram-positive tonsillitis)",
                "Pharyngitis (Gram-positive pharyngitis)",
                "Laryngitis (Gram-positive laryngitis)",
                "Bronchitis (Gram-positive bronchitis)",
                "Pleurisy (Gram-positive pleurisy)",
                "Pericarditis (Gram-positive pericarditis)",
                "Abscess (Gram-positive abscess)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Diphtheria",
                "Enterotoxemia",
                "Impetigo",
                "Laryngeal Tuberculosis",
                "Listeria Meningitis",
                "Listeriosis",
                "Osteoarticular Tuberculosis",
                "Pleural Tuberculosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Rheumatic Heart Disease",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tuberculosis",
                "Tuberculous Pericarditis",
                "Urogenital Tuberculosis"
            ],
            "TP": 0,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Anthrax (Bacillus anthracis)",
            "Listeria (Listeria monocytogenes)",
            "Streptococcal pneumonia (Streptococcus pneumoniae)",
            "Tuberculosis (Mycobacterium tuberculosis)",
            "Diphtheria (Corynebacterium diphtheriae)",
            "Legionnaires' disease (Legionella pneumophila)",
            "Bacillary angiomatosis (Bartonella henselae)",
            "Cat-scratch disease (Bartonella henselae)",
            "Carcinoid tumor (Bacteroides fragilis)",
            "Cholera (Vibrio cholerae)",
            "Erysipelas (Streptococcus pyogenes)",
            "Food poisoning (Bacillus cereus)",
            "Glanders (Burkholderia mallei)",
            "Melioidosis (Burkholderia pseudomallei)",
            "Meningitis (Streptococcus pneumoniae)",
            "Nocardiosis (Nocardia asteroides)",
            "Ophthalmic infection (Corynebacterium macginleyi)",
            "Pneumococcal infection (Streptococcus pneumoniae)",
            "Pseudomonas aeruginosa infection (Pseudomonas aeruginosa)",
            "Q fever (Coxiella burnetii)",
            "Rat-bite fever (Streptobacillus moniliformis)",
            "Scarlet fever (Streptococcus pyogenes)",
            "Trachoma (Chlamydia trachomatis)",
            "Tularemia (Francisella tularensis)",
            "Urinary tract infection (Staphylococcus saprophyticus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Anthrax",
                "Diphtheria",
                "Erysipelas",
                "Listeriosis",
                "Nocardia Infections",
                "Scarlet Fever",
                "Streptococcal Infections",
                "Tuberculosis"
            ],
            "mismatches": [
                "Streptococcal pneumonia (Streptococcus pneumoniae)",
                "Legionnaires' disease (Legionella pneumophila)",
                "Bacillary angiomatosis (Bartonella henselae)",
                "Cat-scratch disease (Bartonella henselae)",
                "Carcinoid tumor (Bacteroides fragilis)",
                "Cholera (Vibrio cholerae)",
                "Food poisoning (Bacillus cereus)",
                "Glanders (Burkholderia mallei)",
                "Melioidosis (Burkholderia pseudomallei)",
                "Meningitis (Streptococcus pneumoniae)",
                "Ophthalmic infection (Corynebacterium macginleyi)",
                "Pneumococcal infection (Streptococcus pneumoniae)",
                "Pseudomonas aeruginosa infection (Pseudomonas aeruginosa)",
                "Q fever (Coxiella burnetii)",
                "Rat-bite fever (Streptobacillus moniliformis)",
                "Trachoma (Chlamydia trachomatis)",
                "Tularemia (Francisella tularensis)",
                "Urinary tract infection (Staphylococcus saprophyticus)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Erysipelas",
                "Erysipeloid",
                "Female Genital Tuberculosis",
                "Furunculosis",
                "Gastrointestinal Tuberculosis",
                "Latent Tuberculosis",
                "Listeria Meningitis",
                "Listeriosis",
                "Male Genital Tuberculosis",
                "Mycobacterium avium-intracellulare Infection",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 8,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Chlamydia trachomatis"
            },
            {
                "Referent": "Haemophilus influenzae"
            },
            {
                "Referent": "Borrelia burgdorferi"
            },
            {
                "Referent": "Treponema pallidum"
            },
            {
                "Referent": "Mycobacterium tuberculosis"
            },
            {
                "Referent": "Mycobacterium leprae"
            },
            {
                "Referent": "Salmonella enterica"
            },
            {
                "Referent": "Neisseria gonorrhoeae"
            },
            {
                "Referent": "Brucella melitensis"
            },
            {
                "Referent": "Francisella tularensis"
            },
            {
                "Referent": "Listeria monocytogenes"
            },
            {
                "Referent": "Yersinia pestis"
            },
            {
                "Referent": "Campylobacter jejuni"
            },
            {
                "Referent": "Shigella dysenteriae"
            },
            {
                "Referent": "Pseudomonas aeruginosa"
            },
            {
                "Referent": "Pasteurella multocida"
            },
            {
                "Referent": "Bordetella pertussis"
            },
            {
                "Referent": "Chlamydophila pneumoniae"
            },
            {
                "Referent": "Legionella pneumophila"
            },
            {
                "Referent": "Mycoplasma pneumoniae"
            },
            {
                "Referent": "Ureaplasma urealyticum"
            },
            {
                "Referent": "Gardnerella vaginalis"
            },
            {
                "Referent": "Atopobium vaginae"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mycobacterium tuberculosis",
                "Mycobacterium leprae",
                "Listeria monocytogenes"
            ],
            "mismatches": [
                "Chlamydia trachomatis",
                "Haemophilus influenzae",
                "Borrelia burgdorferi",
                "Treponema pallidum",
                "Salmonella enterica",
                "Neisseria gonorrhoeae",
                "Brucella melitensis",
                "Francisella tularensis",
                "Yersinia pestis",
                "Campylobacter jejuni",
                "Shigella dysenteriae",
                "Pseudomonas aeruginosa",
                "Pasteurella multocida",
                "Bordetella pertussis",
                "Chlamydophila pneumoniae",
                "Legionella pneumophila",
                "Mycoplasma pneumoniae",
                "Ureaplasma urealyticum",
                "Gardnerella vaginalis",
                "Atopobium vaginae"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bovine Tuberculosis",
                "Diphtheria",
                "Enterotoxemia",
                "Extensively Drug-Resistant Tuberculosis",
                "Female Genital Tuberculosis",
                "Furunculosis",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Male Genital Tuberculosis",
                "Mycetoma",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis",
                "Urogenital Tuberculosis"
            ],
            "TP": 3,
            "FP": 20,
            "FN": 0
        }
    }
]