[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Enterococcus faecalis (E. faecalis)",
            "Bacillus anthracis (B. anthracis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Clostridium difficile (C. difficile)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Enterococcus faecium (E. faecium)",
            "Staphylococcus epidermidis (S. epidermidis)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Bacillus cereus (B. cereus)",
            "Streptococcus mutans (S. mutans)",
            "Streptococcus suis (S. suis)",
            "Enterococcus faecalis (E. faecalis)",
            "Streptococcus mitis (S. mitis)",
            "Streptococcus anginosus (S. anginosus)",
            "Staphylococcus saprophyticus (S. saprophyticus)",
            "Streptococcus bovis (S. bovis)",
            "Streptococcus equi (S. equi)",
            "Streptococcus salivarius (S. salivarius)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Streptococcus sanguinis (S. sanguinis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bacillus anthracis (B. anthracis)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Streptococcus pneumoniae (S. pneumoniae)"
            ],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Enterococcus faecalis (E. faecalis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Clostridium difficile (C. difficile)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Enterococcus faecium (E. faecium)",
                "Staphylococcus epidermidis (S. epidermidis)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Bacillus cereus (B. cereus)",
                "Streptococcus mutans (S. mutans)",
                "Streptococcus suis (S. suis)",
                "Streptococcus mitis (S. mitis)",
                "Streptococcus anginosus (S. anginosus)",
                "Staphylococcus saprophyticus (S. saprophyticus)",
                "Streptococcus bovis (S. bovis)",
                "Streptococcus equi (S. equi)",
                "Streptococcus salivarius (S. salivarius)",
                "Streptococcus sanguinis (S. sanguinis)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Impetigo",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 4,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection (S. aureus)",
            "Streptococcus pyogenes infection (S. pyogenes)",
            "Enterococcus faecalis infection (E. faecalis)",
            "Listeria monocytogenes infection (L. monocytogenes)",
            "Streptococcus pneumoniae infection (S. pneumoniae)",
            "Clostridium difficile infection (C. difficile)",
            "Clostridium perfringens infection (C. perfringens)",
            "Bacillus anthracis infection (B. anthracis)",
            "Corynebacterium diphtheriae infection (C. diphtheriae)",
            "Streptococcus agalactiae infection (Group B Streptococcus)",
            "Streptococcus mutans infection (S. mutans)",
            "Streptococcus suis infection (S. suis)",
            "Streptococcus bovis infection (S. bovis)",
            "Streptococcus dysgalactiae infection (S. dysgalactiae)",
            "Staphylococcus epidermidis infection (S. epidermidis)",
            "Staphylococcus saprophyticus infection (S. saprophyticus)",
            "Enterococcus faecium infection (E. faecium)",
            "Listeria ivanovii infection (L. ivanovii)",
            "Listeria innocua infection (L. innocua)",
            "Clostridium botulinum infection (C. botulinum)",
            "Clostridium tetani infection (C. tetani)",
            "Bacillus cereus infection (B. cereus)",
            "Corynebacterium jeikeium infection (C. jeikeium)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal Infections",
                "Streptococcal Infections",
                "Listeriosis",
                "Pneumococcal Infections",
                "Clostridium Infections",
                "Anthrax",
                "Diphtheria",
                "Tetanus",
                "Botulism"
            ],
            "mismatches": [
                "Enterococcus faecalis infection (E. faecalis)",
                "Streptococcus agalactiae infection (Group B Streptococcus)",
                "Streptococcus mutans infection (S. mutans)",
                "Streptococcus suis infection (S. suis)",
                "Streptococcus bovis infection (S. bovis)",
                "Streptococcus dysgalactiae infection (S. dysgalactiae)",
                "Staphylococcus epidermidis infection (S. epidermidis)",
                "Staphylococcus saprophyticus infection (S. saprophyticus)",
                "Enterococcus faecium infection (E. faecium)",
                "Listeria ivanovii infection (L. ivanovii)",
                "Listeria innocua infection (L. innocua)",
                "Bacillus cereus infection (B. cereus)",
                "Corynebacterium jeikeium infection (C. jeikeium)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Gas Gangrene",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculosis"
            ],
            "TP": 9,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection (S. aureus)",
            "Streptococcus pneumoniae infection (S. pneumoniae)",
            "Enterococcus faecalis infection (E. faecalis)",
            "Clostridioides difficile infection (C. difficile)",
            "Listeria monocytogenes infection (L. monocytogenes)",
            "Bacillus anthracis infection (B. anthracis)",
            "Staphylococcus epidermidis infection (S. epidermidis)",
            "Streptococcus pyogenes infection (S. pyogenes)",
            "Streptococcus agalactiae infection (S. agalactiae)",
            "Streptococcus mutans infection (S. mutans)",
            "Staphylococcus saprophyticus infection (S. saprophyticus)",
            "Streptococcus anginosus infection (S. anginosus)",
            "Clostridium perfringens infection (C. perfringens)",
            "Streptococcus mitis infection (S. mitis)",
            "Streptococcus salivarius infection (S. salivarius)",
            "Streptococcus sanguinis infection (S. sanguinis)",
            "Streptococcus vestibularis infection (S. vestibularis)",
            "Streptococcus gordonii infection (S. gordonii)",
            "Streptococcus constellatus infection (S. constellatus)",
            "Streptococcus intermedius infection (S. intermedius)",
            "Streptococcus parasanguinis infection (S. parasanguinis)",
            "Streptococcus oralis infection (S. oralis)",
            "Streptococcus uberis infection (S. uberis)",
            "Streptococcus suis infection (S. suis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcus aureus infection (S. aureus)",
                "Streptococcus pneumoniae infection (S. pneumoniae)",
                "Clostridium perfringens infection (C. perfringens)",
                "Listeria monocytogenes infection (L. monocytogenes)",
                "Bacillus anthracis infection (B. anthracis)",
                "Streptococcus pyogenes infection (S. pyogenes)"
            ],
            "mismatches": [
                "Enterococcus faecalis infection (E. faecalis)",
                "Clostridioides difficile infection (C. difficile)",
                "Staphylococcus epidermidis infection (S. epidermidis)",
                "Streptococcus agalactiae infection (S. agalactiae)",
                "Streptococcus mutans infection (S. mutans)",
                "Staphylococcus saprophyticus infection (S. saprophyticus)",
                "Streptococcus anginosus infection (S. anginosus)",
                "Streptococcus mitis infection (S. mitis)",
                "Streptococcus salivarius infection (S. salivarius)",
                "Streptococcus sanguinis infection (S. sanguinis)",
                "Streptococcus vestibularis infection (S. vestibularis)",
                "Streptococcus gordonii infection (S. gordonii)",
                "Streptococcus constellatus infection (S. constellatus)",
                "Streptococcus intermedius infection (S. intermedius)",
                "Streptococcus parasanguinis infection (S. parasanguinis)",
                "Streptococcus oralis infection (S. oralis)",
                "Streptococcus uberis infection (S. uberis)",
                "Streptococcus suis infection (S. suis)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Diphtheria",
                "Furunculosis",
                "Gas Gangrene",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tuberculosis"
            ],
            "TP": 6,
            "FP": 18,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus skin infection (S. aureus)",
            "Streptococcal pharyngitis ('Strep throat', S. pyogenes)",
            "Methicillin-resistant Staphylococcus aureus (MRSA) pneumonia",
            "Clostridium difficile infection (C. diff)",
            "Bacillus anthracis cutaneous anthrax",
            "Listeria monocytogenes meningitis",
            "Enterococcus faecalis urinary tract infection (E. faecalis)",
            "Streptococcus pneumoniae pneumonia",
            "Clostridium perfringens gas gangrene",
            "Staphylococcus epidermidis prosthetic joint infection (S. epidermidis)",
            "Bacillus cereus food poisoning",
            "Corynebacterium diphtheriae diphtheria",
            "Streptococcus agalactiae neonatal sepsis (S. agalactiae)",
            "Clostridium tetani tetanus",
            "Staphylococcus saprophyticus UTI (S. saprophyticus)",
            "Actinomyces israelii actinomycosis",
            "Listeria monocytogenes bacteremia",
            "Streptococcus bovis endocarditis (S. bovis)",
            "Bacillus subtilis bloodstream infection",
            "Clostridium botulinum botulism",
            "Streptococcus mutans dental caries",
            "Geobacillus stearothermophilus food spoilage",
            "Paenibacillus larvae American foulbrood (bee disease)",
            "Turicella novaehollandiae pink disease (poultry)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcus aureus skin infection (S. aureus)",
                "Streptococcal pharyngitis ('Strep throat', S. pyogenes)",
                "Methicillin-resistant Staphylococcus aureus (MRSA) pneumonia",
                "Clostridium difficile infection (C. diff)",
                "Bacillus anthracis cutaneous anthrax",
                "Listeria monocytogenes meningitis",
                "Streptococcus pneumoniae pneumonia",
                "Clostridium perfringens gas gangrene",
                "Corynebacterium diphtheriae diphtheria",
                "Clostridium tetani tetanus",
                "Actinomyces israelii actinomycosis",
                "Clostridium botulinum botulism"
            ],
            "mismatches": [
                "Enterococcus faecalis urinary tract infection (E. faecalis)",
                "Staphylococcus epidermidis prosthetic joint infection (S. epidermidis)",
                "Bacillus cereus food poisoning",
                "Streptococcus agalactiae neonatal sepsis (S. agalactiae)",
                "Staphylococcus saprophyticus UTI (S. saprophyticus)",
                "Listeria monocytogenes bacteremia",
                "Streptococcus bovis endocarditis (S. bovis)",
                "Bacillus subtilis bloodstream infection",
                "Streptococcus mutans dental caries",
                "Geobacillus stearothermophilus food spoilage",
                "Paenibacillus larvae American foulbrood (bee disease)",
                "Turicella novaehollandiae pink disease (poultry)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Avian Tuberculosis",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Cutaneous Tuberculosis",
                "Diphtheria",
                "Enterotoxemia",
                "Furunculosis",
                "Gas Gangrene",
                "Impetigo",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tetanus"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Enterococcus faecalis (E. faecalis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Bacillus anthracis (B. anthracis)",
            "Clostridium difficile (C. difficile)",
            "Clostridium botulinum (C. botulinum)",
            "Clostridium tetani (C. tetani)",
            "Clostridium perfringens (C. perfringens)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Mycobacterium leprae (M. leprae)",
            "Actinomyces israelii (A. israelii)",
            "Nocardia asteroides (N. asteroides)",
            "Propionibacterium acnes (P. acnes)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Streptococcus mutans (S. mutans)",
            "Streptococcus viridans (S. viridans)",
            "Streptococcus bovis (S. bovis)",
            "Streptococcus sanguinis (S. sanguinis)",
            "Streptococcus mitis (S. mitis)",
            "Streptococcus salivarius (S. salivarius)",
            "Streptococcus anginosus (S. anginosus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Enterococcus faecalis (E. faecalis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Bacillus anthracis (B. anthracis)",
                "Clostridium difficile (C. difficile)",
                "Clostridium botulinum (C. botulinum)",
                "Clostridium tetani (C. tetani)",
                "Clostridium perfringens (C. perfringens)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Mycobacterium leprae (M. leprae)",
                "Actinomyces israelii (A. israelii)",
                "Nocardia asteroides (N. asteroides)",
                "Propionibacterium acnes (P. acnes)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Streptococcus mutans (S. mutans)",
                "Streptococcus viridans (S. viridans)",
                "Streptococcus bovis (S. bovis)",
                "Streptococcus sanguinis (S. sanguinis)",
                "Streptococcus mitis (S. mitis)",
                "Streptococcus salivarius (S. salivarius)",
                "Streptococcus anginosus (S. anginosus)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (MRSA)",
            "Streptococcus pneumoniae (SP)",
            "Streptococcus pyogenes (Group A Strep)",
            "Enterococcus faecalis (E. faecalis)",
            "Bacillus anthracis (Anthrax)",
            "Clostridium perfringens (Gas Gangrene)",
            "Listeria monocytogenes (Listeriosis)",
            "Streptococcus agalactiae (Group B Strep)",
            "Clostridium tetani (Tetanus)",
            "Staphylococcus epidermidis (Coagulase-negative Staph)",
            "Corynebacterium diphtheriae (Diphtheria)",
            "Nocardia asteroides (Nocardiosis)",
            "Rhodococcus equi (Rhodococcosis)",
            "Actinomyces israelii (Actinomycosis)",
            "Mycobacterium leprae (Leprosy)",
            "Peptostreptococcus spp. (Anaerobic Streptococci)",
            "Bacillus cereus (Food Poisoning)",
            "Micrococcus luteus (Micrococcal Infection)",
            "Streptococcus mitis (Endocarditis)",
            "Enterococcus faecium (E. faecium)",
            "Staphylococcus saprophyticus (Urinary Tract Infection)",
            "Clostridium difficile (C. diff Infection)",
            "Lactococcus lactis (Lactic Infections)",
            "Propionibacterium acnes (Acne Vulgaris)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Anthrax",
                "Gas Gangrene",
                "Listeriosis",
                "Tetanus",
                "Diphtheria",
                "Actinomycosis",
                "Leprosy"
            ],
            "mismatches": [
                "Staphylococcus aureus (MRSA)",
                "Streptococcus pneumoniae (SP)",
                "Streptococcus pyogenes (Group A Strep)",
                "Enterococcus faecalis (E. faecalis)",
                "Streptococcus agalactiae (Group B Strep)",
                "Staphylococcus epidermidis (Coagulase-negative Staph)",
                "Nocardia asteroides (Nocardiosis)",
                "Rhodococcus equi (Rhodococcosis)",
                "Peptostreptococcus spp. (Anaerobic Streptococci)",
                "Bacillus cereus (Food Poisoning)",
                "Micrococcus luteus (Micrococcal Infection)",
                "Streptococcus mitis (Endocarditis)",
                "Enterococcus faecium (E. faecium)",
                "Staphylococcus saprophyticus (Urinary Tract Infection)",
                "Clostridium difficile (C. diff Infection)",
                "Lactococcus lactis (Lactic Infections)",
                "Propionibacterium acnes (Acne Vulgaris)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bifidobacteriales Infections",
                "Botulism",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tetanus",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection (SA)",
            "Streptococcus pneumoniae infection (SP)",
            "Listeria monocytogenes infection (LM)",
            "Enterococcus faecium infection (EF)",
            "Streptococcus pyogenes infection (GAS)",
            "Bacillus anthracis infection (BA)",
            "Clostridium perfringens infection (CP)",
            "Streptococcus agalactiae infection (GBS)",
            "Clostridium difficile infection (CDI)",
            "Staphylococcus epidermidis infection (SE)",
            "Streptococcus mutans infection (SM)",
            "Enterococcus faecalis infection (EFS)",
            "Clostridium tetani infection (CT)",
            "Streptococcus suis infection (SS)",
            "Bacillus cereus infection (BC)",
            "Clostridium botulinum infection (CB)",
            "Staphylococcus saprophyticus infection (SSP)",
            "Streptococcus equi infection (SEQ)",
            "Streptococcus canis infection (SC)",
            "Bacillus subtilis infection (BS)",
            "Enterococcus gallinarum infection (EG)",
            "Clostridium novyi infection (CN)",
            "Staphylococcus lugdunensis infection (SL)",
            "Streptococcus sanguinis infection (SSG)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal Infections",
                "Streptococcal Infections",
                "Listeriosis",
                "Anthrax",
                "Clostridium Infections",
                "Tetanus",
                "Botulism"
            ],
            "mismatches": [
                "Staphylococcus aureus infection (SA)",
                "Streptococcus pneumoniae infection (SP)",
                "Listeria monocytogenes infection (LM)",
                "Enterococcus faecium infection (EF)",
                "Streptococcus pyogenes infection (GAS)",
                "Bacillus anthracis infection (BA)",
                "Clostridium perfringens infection (CP)",
                "Streptococcus agalactiae infection (GBS)",
                "Clostridium difficile infection (CDI)",
                "Staphylococcus epidermidis infection (SE)",
                "Streptococcus mutans infection (SM)",
                "Enterococcus faecalis infection (EFS)",
                "Clostridium tetani infection (CT)",
                "Streptococcus suis infection (SS)",
                "Bacillus cereus infection (BC)",
                "Clostridium botulinum infection (CB)",
                "Staphylococcus saprophyticus infection (SSP)",
                "Streptococcus equi infection (SEQ)",
                "Streptococcus canis infection (SC)",
                "Bacillus subtilis infection (BS)",
                "Enterococcus gallinarum infection (EG)",
                "Clostridium novyi infection (CN)",
                "Staphylococcus lugdunensis infection (SL)",
                "Streptococcus sanguinis infection (SSG)"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Gas Gangrene",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium avium-intracellulare Infection",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tetanus",
                "Tuberculosis"
            ],
            "TP": 7,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection",
            "Streptococcal infection",
            "Enterococcal infection",
            "Listeria monocytogenes infection",
            "Clostridium difficile infection",
            "Bacillus anthracis infection",
            "Corynebacterium diphtheriae infection",
            "Lactobacillus infection",
            "Actinomyces infection",
            "Nocardia infection",
            "Mycobacterium tuberculosis infection",
            "Propionibacterium acnes infection",
            "Peptostreptococcus infection",
            "Erysipelothrix rhusiopathiae infection",
            "Rhodococcus equi infection",
            "Bifidobacterium infection",
            "Gardnerella vaginalis infection",
            "Micrococcus luteus infection",
            "Arcanobacterium haemolyticum infection",
            "Lactococcus lactis infection",
            "Streptomyces infection",
            "Corynebacterium jeikeium infection",
            "Bifidobacterium adolescentis infection",
            "Mycobacterium leprae infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nocardia infection",
                "Erysipelothrix rhusiopathiae infection",
                "Mycobacterium leprae infection"
            ],
            "mismatches": [
                "Staphylococcus aureus infection",
                "Streptococcal infection",
                "Enterococcal infection",
                "Listeria monocytogenes infection",
                "Clostridium difficile infection",
                "Bacillus anthracis infection",
                "Corynebacterium diphtheriae infection",
                "Lactobacillus infection",
                "Actinomyces infection",
                "Mycobacterium tuberculosis infection",
                "Propionibacterium acnes infection",
                "Peptostreptococcus infection",
                "Rhodococcus equi infection",
                "Bifidobacterium infection",
                "Gardnerella vaginalis infection",
                "Micrococcus luteus infection",
                "Arcanobacterium haemolyticum infection",
                "Lactococcus lactis infection",
                "Streptomyces infection",
                "Corynebacterium jeikeium infection",
                "Bifidobacterium adolescentis infection"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Female Genital Tuberculosis",
                "Furunculosis",
                "Impetigo",
                "Latent Tuberculosis",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium Infections",
                "Mycobacterium avium-intracellulare Infection",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pulmonary Tuberculosis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tuberculoid Leprosy",
                "Tuberculosis",
                "Whipple Disease"
            ],
            "TP": 3,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Skin and soft tissue infection (Staphylococcus aureus)",
            "Bacteremia (S. aureus)",
            "Endocarditis (S. aureus)",
            "Pharyngitis (Group A Streptococcal infection - GAS)",
            "Scarlet fever (GAS)",
            "Rheumatic fever (GAS)",
            "Neonatal sepsis (Group B Streptococcal infection - GBS)",
            "Pneumonia (Streptococcus pneumoniae)",
            "Meningitis (S. pneumoniae)",
            "Otitis media (S. pneumoniae)",
            "Urinary tract infection (Enterococcus faecalis)",
            "Endocarditis (E. faecalis)",
            "Clostridium difficile infection (C. difficile)",
            "Listeriosis (Listeria monocytogenes - L. monocytogenes)",
            "Anthrax (Bacillus anthracis - B. anthracis)",
            "Gas gangrene (Clostridium perfringens - C. perfringens)",
            "Diphtheria (Corynebacterium diphtheriae)",
            "Acne vulgaris (Cutibacterium acnes - C. acnes)",
            "Actinomycosis (Actinomyces israelii - A. israelii)",
            "Food poisoning (Bacillus cereus - B. cereus)",
            "Dental caries (Streptococcus mutans - S. mutans)",
            "Staphylococcal endocarditis (Staphylococcus epidermidis)",
            "Viridans streptococcal endocarditis (Streptococcus viridans)",
            "Staphylococcus lugdunensis infection (S. lugdunensis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Scarlet fever (GAS)",
                "Rheumatic fever (GAS)",
                "Pneumonia (Streptococcus pneumoniae)",
                "Meningitis (S. pneumoniae)",
                "Listeriosis (Listeria monocytogenes - L. monocytogenes)",
                "Anthrax (Bacillus anthracis - B. anthracis)",
                "Gas gangrene (Clostridium perfringens - C. perfringens)",
                "Diphtheria (Corynebacterium diphtheriae)",
                "Actinomycosis (Actinomyces israelii - A. israelii)"
            ],
            "mismatches": [
                "Skin and soft tissue infection (Staphylococcus aureus)",
                "Bacteremia (S. aureus)",
                "Endocarditis (S. aureus)",
                "Pharyngitis (Group A Streptococcal infection - GAS)",
                "Neonatal sepsis (Group B Streptococcal infection - GBS)",
                "Otitis media (S. pneumoniae)",
                "Urinary tract infection (Enterococcus faecalis)",
                "Endocarditis (E. faecalis)",
                "Clostridium difficile infection (C. difficile)",
                "Acne vulgaris (Cutibacterium acnes - C. acnes)",
                "Food poisoning (Bacillus cereus - B. cereus)",
                "Dental caries (Streptococcus mutans - S. mutans)",
                "Staphylococcal endocarditis (Staphylococcus epidermidis)",
                "Viridans streptococcal endocarditis (Streptococcus viridans)",
                "Staphylococcus lugdunensis infection (S. lugdunensis)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Furunculosis",
                "Gas Gangrene",
                "Impetigo",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Rheumatic Heart Disease",
                "Scarlet Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis",
                "Tetanus",
                "Urogenital Tuberculosis"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae) infection",
            "Staphylococcus aureus (S. aureus) infection",
            "Clostridium difficile (C. difficile) infection",
            "Bacillus anthracis (B. anthracis) infection",
            "Listeria monocytogenes (L. monocytogenes) infection",
            "Enterococcus faecalis (E. faecalis) infection",
            "Group A Streptococcus (GAS) infection",
            "Group B Streptococcus (GBS) infection",
            "Corynebacterium diphtheriae (C. diphtheriae) infection",
            "Mycobacterium tuberculosis (M. tuberculosis) infection",
            "Actinomyces israelii (A. israelii) infection",
            "Clostridium tetani (C. tetani) infection",
            "Bacillus cereus (B. cereus) infection",
            "Propionibacterium acnes (P. acnes) infection",
            "Streptococcus mutans (S. mutans) infection",
            "Enterococcus faecium (E. faecium) infection",
            "Staphylococcus epidermidis (S. epidermidis) infection",
            "Clostridium perfringens (C. perfringens) infection",
            "Streptococcus pyogenes (S. pyogenes) infection",
            "Lactobacillus acidophilus (L. acidophilus) infection",
            "Mycobacterium leprae (M. leprae) infection",
            "Nocardia asteroides (N. asteroides) infection",
            "Streptococcus agalactiae (S. agalactiae) infection",
            "Bacillus subtilis (B. subtilis) infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Streptococcus pneumoniae (S. pneumoniae) infection",
                "Staphylococcus aureus (S. aureus) infection",
                "Clostridium difficile (C. difficile) infection",
                "Bacillus anthracis (B. anthracis) infection",
                "Listeria monocytogenes (L. monocytogenes) infection",
                "Corynebacterium diphtheriae (C. diphtheriae) infection",
                "Mycobacterium tuberculosis (M. tuberculosis) infection",
                "Actinomyces israelii (A. israelii) infection",
                "Clostridium perfringens (C. perfringens) infection",
                "Streptococcus pyogenes (S. pyogenes) infection",
                "Mycobacterium leprae (M. leprae) infection",
                "Nocardia asteroides (N. asteroides) infection"
            ],
            "mismatches": [
                "Enterococcus faecalis (E. faecalis) infection",
                "Group A Streptococcus (GAS) infection",
                "Group B Streptococcus (GBS) infection",
                "Clostridium tetani (C. tetani) infection",
                "Bacillus cereus (B. cereus) infection",
                "Propionibacterium acnes (P. acnes) infection",
                "Streptococcus mutans (S. mutans) infection",
                "Enterococcus faecium (E. faecium) infection",
                "Staphylococcus epidermidis (S. epidermidis) infection",
                "Lactobacillus acidophilus (L. acidophilus) infection",
                "Streptococcus agalactiae (S. agalactiae) infection",
                "Bacillus subtilis (B. subtilis) infection"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium Infections",
                "Mycobacterium avium-intracellulare Infection",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Silicotuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tuberculoid Leprosy",
                "Tuberculosis"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus) infection",
            "Streptococcus pyogenes (S. pyogenes) infection",
            "Streptococcus pneumoniae (S. pneumoniae) infection",
            "Enterococcus faecalis (E. faecalis) infection",
            "Enterococcus faecium (E. faecium) infection",
            "Listeria monocytogenes (L. monocytogenes) infection",
            "Clostridium difficile (C. difficile) infection",
            "Clostridium perfringens (C. perfringens) infection",
            "Clostridium tetani (C. tetani) infection",
            "Clostridium botulinum (C. botulinum) infection",
            "Bacillus anthracis (B. anthracis) infection",
            "Bacillus cereus (B. cereus) infection",
            "Corynebacterium diphtheriae (C. diphtheriae) infection",
            "Propionibacterium acnes (P. acnes) infection",
            "Actinomyces israelii (A. israelii) infection",
            "Nocardia asteroides (N. asteroides) infection",
            "Mycobacterium tuberculosis (M. tuberculosis) infection",
            "Mycobacterium leprae (M. leprae) infection",
            "Peptostreptococcus species infection",
            "Peptococcus species infection",
            "Finegoldia magna (F. magna) infection",
            "Parvimonas micra (P. micra) infection",
            "Gardnerella vaginalis (G. vaginalis) infection",
            "Erysipelothrix rhusiopathiae (E. rhusiopathiae) infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal Infections",
                "Streptococcal Infections",
                "Pneumococcal Infections",
                "Listeriosis",
                "Clostridium Infections",
                "Botulism",
                "Anthrax",
                "Diphtheria",
                "Actinomycosis",
                "Nocardia Infections",
                "Tuberculosis",
                "Leprosy",
                "Erysipelothrix Infections"
            ],
            "mismatches": [
                "Enterococcus faecalis (E. faecalis) infection",
                "Enterococcus faecium (E. faecium) infection",
                "Bacillus cereus (B. cereus) infection",
                "Propionibacterium acnes (P. acnes) infection",
                "Peptostreptococcus species infection",
                "Peptococcus species infection",
                "Finegoldia magna (F. magna) infection",
                "Parvimonas micra (P. micra) infection",
                "Gardnerella vaginalis (G. vaginalis) infection"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Botulism",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Female Genital Tuberculosis",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycobacterium Infections",
                "Mycobacterium avium-intracellulare Infection",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Pulmonary Tuberculosis",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tuberculoid Leprosy",
                "Tuberculosis"
            ],
            "TP": 13,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Clostridium difficile (C. difficile)",
            "Enterococcus faecalis (E. faecalis)",
            "Bacillus anthracis (B. anthracis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Propionibacterium acnes (P. acnes)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Streptococcus mutans (S. mutans)",
            "Actinomyces israelii (A. israelii)",
            "Nocardia asteroides (N. asteroides)",
            "Streptococcus sanguinis (S. sanguinis)",
            "Bacillus cereus (B. cereus)",
            "Erysipelothrix rhusiopathiae (E. rhusiopathiae)",
            "Streptococcus bovis (S. bovis)",
            "Lactobacillus acidophilus (L. acidophilus)",
            "Streptococcus salivarius (S. salivarius)",
            "Micrococcus luteus (M. luteus)",
            "Streptococcus mitis (S. mitis)",
            "Streptococcus anginosus (S. anginosus)",
            "Streptococcus gordonii (S. gordonii)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bacillus anthracis (B. anthracis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Actinomyces israelii (A. israelii)",
                "Nocardia asteroides (N. asteroides)",
                "Erysipelothrix rhusiopathiae (E. rhusiopathiae)"
            ],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Clostridium difficile (C. difficile)",
                "Enterococcus faecalis (E. faecalis)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Propionibacterium acnes (P. acnes)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Streptococcus mutans (S. mutans)",
                "Bacillus cereus (B. cereus)",
                "Streptococcus bovis (S. bovis)",
                "Lactobacillus acidophilus (L. acidophilus)",
                "Streptococcus salivarius (S. salivarius)",
                "Micrococcus luteus (M. luteus)",
                "Streptococcus mitis (S. mitis)",
                "Streptococcus anginosus (S. anginosus)",
                "Streptococcus gordonii (S. gordonii)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Corynebacterium Infections",
                "Diphtheria",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Impetigo",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 7,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Enterococcus faecalis (E. faecalis)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Bacillus cereus (B. cereus)",
            "Clostridium difficile (C. difficile)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Streptococcus agalactiae (S. agalactiae)",
            "Actinomyces israelii (A. israelii)",
            "Erysipelothrix rhusiopathiae (E. rhusiopathiae)",
            "Propionibacterium acnes (P. acnes)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Mycobacterium leprae (M. leprae)",
            "Nocardia asteroides (N. asteroides)",
            "Rhodococcus equi (R. equi)",
            "Arcanobacterium haemolyticum (A. haemolyticum)",
            "Lactobacillus species (Lactobacillus spp.)",
            "Bifidobacterium species (Bifidobacterium spp.)",
            "Staphylococcus epidermidis (S. epidermidis)",
            "Streptococcus mutans (S. mutans)",
            "Streptococcus viridans (S. viridans)",
            "Enterococcus faecium (E. faecium)",
            "Clostridium perfringens (C. perfringens)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Clostridium difficile (C. difficile)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Mycobacterium leprae (M. leprae)",
                "Nocardia asteroides (N. asteroides)",
                "Erysipelothrix rhusiopathiae (E. rhusiopathiae)",
                "Actinomyces israelii (A. israelii)"
            ],
            "mismatches": [
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Enterococcus faecalis (E. faecalis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Bacillus cereus (B. cereus)",
                "Streptococcus pyogenes (S. pyogenes)",
                "Streptococcus agalactiae (S. agalactiae)",
                "Propionibacterium acnes (P. acnes)",
                "Rhodococcus equi (R. equi)",
                "Arcanobacterium haemolyticum (A. haemolyticum)",
                "Lactobacillus species (Lactobacillus spp.)",
                "Bifidobacterium species (Bifidobacterium spp.)",
                "Staphylococcus epidermidis (S. epidermidis)",
                "Streptococcus mutans (S. mutans)",
                "Streptococcus viridans (S. viridans)",
                "Enterococcus faecium (E. faecium)",
                "Clostridium perfringens (C. perfringens)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Bovine Tuberculosis",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Erysipelas",
                "Erysipelothrix Infections",
                "Gas Gangrene",
                "Impetigo",
                "Lepromatous Leprosy",
                "Leprosy",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Nocardia Infections",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Pulmonary Tuberculosis",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tuberculoid Leprosy",
                "Tuberculoma",
                "Tuberculosis"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcus aureus infection (MRSA) (S. aureus)",
            "Streptococcus pyogenes infection (S. pyogenes)",
            "Streptococcus pneumoniae infection (S. pneumoniae)",
            "Enterococcus faecalis infection (E. faecalis)",
            "Clostridium difficile infection (C. difficile)",
            "Bacillus subtilis infection (B. subtilis)",
            "Listeria monocytogenes infection (L. monocytogenes)",
            "Staphylococcus epidermidis infection (S. epidermidis)",
            "Staphylococcus saprophyticus infection (S. saprophyticus)",
            "Streptococcus agalactiae infection (S. agalactiae)",
            "Streptococcus suis infection (S. suis)",
            "Streptococcus equi infection (S. equi)",
            "Streptococcus mutans infection (S. mutans)",
            "Streptococcus salivarius infection (S. salivarius)",
            "Bacillus anthracis infection (B. anthracis)",
            "Corynebacterium diphtheriae infection (C. diphtheriae)",
            "Actinomyces israelii infection (A. israelii)",
            "Actinomyces gerencseriae infection (A. gerencseriae)",
            "Actinomyces odontolyticus infection (A. odontolyticus)",
            "Actinomyces viscosus infection (A. viscosus)",
            "Actinomyces naeslundii infection (A. naeslundii)",
            "Actinomyces meyeri infection (A. meyeri)",
            "Actinomyces europaeus infection (A. europaeus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Clostridium difficile infection (C. difficile)",
                "Bacillus anthracis infection (B. anthracis)",
                "Corynebacterium diphtheriae infection (C. diphtheriae)",
                "Actinomyces israelii infection (A. israelii)"
            ],
            "mismatches": [
                "Staphylococcus aureus infection (MRSA) (S. aureus)",
                "Streptococcus pyogenes infection (S. pyogenes)",
                "Streptococcus pneumoniae infection (S. pneumoniae)",
                "Enterococcus faecalis infection (E. faecalis)",
                "Bacillus subtilis infection (B. subtilis)",
                "Listeria monocytogenes infection (L. monocytogenes)",
                "Staphylococcus epidermidis infection (S. epidermidis)",
                "Staphylococcus saprophyticus infection (S. saprophyticus)",
                "Streptococcus agalactiae infection (S. agalactiae)",
                "Streptococcus suis infection (S. suis)",
                "Streptococcus equi infection (S. equi)",
                "Streptococcus mutans infection (S. mutans)",
                "Streptococcus salivarius infection (S. salivarius)",
                "Actinomyces gerencseriae infection (A. gerencseriae)",
                "Actinomyces odontolyticus infection (A. odontolyticus)",
                "Actinomyces viscosus infection (A. viscosus)",
                "Actinomyces naeslundii infection (A. naeslundii)",
                "Actinomyces meyeri infection (A. meyeri)",
                "Actinomyces europaeus infection (A. europaeus)"
            ],
            "true_referents": [
                "Actinomycetales Infections",
                "Actinomycosis",
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Cervicofacial Actinomycosis",
                "Clostridium Infections",
                "Corynebacterium Infections",
                "Diphtheria",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Pneumonia",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Tuberculosis"
            ],
            "TP": 4,
            "FP": 19,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            "Staphylococcal scalded skin syndrome (SSSS)",
            "Streptococcal toxic shock syndrome",
            "Enterococcal endocarditis",
            "Bacillus cereus infection",
            "Methicillin-resistant Staphylococcus aureus (MRSA) infection",
            "Listeria monocytogenes meningitis",
            "Clostridioides difficile infection (CDI)",
            "Streptococcal pharyngitis",
            "Staphylococcal food poisoning",
            "Streptococcal pneumonia",
            "Enterococcal bacteremia",
            "Staphylococcal osteomyelitis",
            "Streptococcal cellulitis",
            "Vancomycin-resistant Enterococcus (VRE) infection",
            "Staphylococcal endocarditis",
            "Streptococcal sepsis",
            "Staphylococcal conjunctivitis",
            "Streptococcal meningitis",
            "Enterococcal urinary tract infection",
            "Staphylococcal wound infection",
            "Streptococcal sinusitis",
            "Staphylococcal septic arthritis",
            "Streptococcal otitis media"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Staphylococcal scalded skin syndrome (SSSS)",
                "Staphylococcal food poisoning"
            ],
            "mismatches": [
                "Streptococcal toxic shock syndrome",
                "Enterococcal endocarditis",
                "Bacillus cereus infection",
                "Methicillin-resistant Staphylococcus aureus (MRSA) infection",
                "Listeria monocytogenes meningitis",
                "Clostridioides difficile infection (CDI)",
                "Streptococcal pharyngitis",
                "Streptococcal pneumonia",
                "Enterococcal bacteremia",
                "Staphylococcal osteomyelitis",
                "Streptococcal cellulitis",
                "Vancomycin-resistant Enterococcus (VRE) infection",
                "Staphylococcal endocarditis",
                "Streptococcal sepsis",
                "Staphylococcal conjunctivitis",
                "Streptococcal meningitis",
                "Enterococcal urinary tract infection",
                "Staphylococcal wound infection",
                "Streptococcal sinusitis",
                "Staphylococcal septic arthritis",
                "Streptococcal otitis media"
            ],
            "true_referents": [
                "Anthrax",
                "Bacillaceae Infections",
                "Bifidobacteriales Infections",
                "Clostridium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Listeria Meningitis",
                "Listeriosis",
                "Pneumococcal Infections",
                "Pneumococcal Meningitis",
                "Pneumococcal Pneumonia",
                "Pseudomembranous Enterocolitis",
                "Rheumatic Fever",
                "Scarlet Fever",
                "Staphylococcal Food Poisoning",
                "Staphylococcal Infections",
                "Staphylococcal Pneumonia",
                "Staphylococcal Scalded Skin Syndrome",
                "Staphylococcal Skin Infections",
                "Streptococcal Infections",
                "Subacute Bacterial Endocarditis"
            ],
            "TP": 2,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "Gram-positive bacterial infection",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Actinomyces",
                "Canonical name": "(Actinomyces)"
            },
            {
                "Referent": "Bacillus",
                "Canonical name": "(Bacillus)"
            },
            {
                "Referent": "Clostridium",
                "Canonical name": "(Clostridium)"
            },
            {
                "Referent": "Corynebacterium",
                "Canonical name": "(Corynebacterium)"
            },
            {
                "Referent": "Enterococcus",
                "Canonical name": "(Enterococcus)"
            },
            {
                "Referent": "Listeria",
                "Canonical name": "(Listeria)"
            },
            {
                "Referent": "Propionibacterium",
                "Canonical name": "(Propionibacterium)"
            },
            {
                "Referent": "Staphylococcus",
                "Canonical name": "(Staphylococcus)"
            },
            {
                "Referent": "Streptococcus",
                "Canonical name": "(Streptococcus)"
            },
            {
                "Referent": "Tetragenococcus",
                "Canonical name": "(Tetragenococcus)"
            },
            {
                "Referent": "Veillonella",
                "Canonical name": "(Veillonella)"
            },
            {
                "Referent": "Bifidobacterium",
                "Canonical name": "(Bifidobacterium)"
            },
            {
                "Referent": "Lactobacillus",
                "Canonical name": "(Lactobacillus)"
            },
            {
                "Referent": "Streptococcus pneumoniae",
                "Canonical name": "(Streptococcus pneumoniae)"
            },
            {
                "Referent": "Enterobacter",
                "Canonical name": "(Enterobacter)"
            },
            {
                "Referent": "Escherichia",
                "Canonical name": "(Escherichia)"
            },
            {
                "Referent": "Klebsiella",
                "Canonical name": "(Klebsiella)"
            },
            {
                "Referent": "Pseudomonas",
                "Canonical name": "(Pseudomonas)"
            },
            {
                "Referent": "Salmonella",
                "Canonical name": "(Salmonella)"
            },
            {
                "Referent": "Shigella",
                "Canonical name": "(Shigella)"
            },
            {
                "Referent": "Yersinia",
                "Canonical name": "(Yersinia)"
            },
            {
                "Referent": "Enterobacteriaceae",
                "Canonical name": "(Enterobacteriaceae)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            },
            {
                "Referent": "Enterobacteriaceae family",
                "Canonical name": "(Enterobacteriaceae family)"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Actinomyces",
                "Bacillus",
                "Clostridium",
                "Corynebacterium",
                "Enterococcus",
                "Listeria",
                "Propionibacterium",
                "Staphylococcus",
                "Streptococcus",
                "Tetragenococcus",
                "Veillonella",
                "Bifidobacterium",
                "Lactobacillus",
                "Streptococcus pneumoniae",
                "Enterobacter",
                "Escherichia",
                "Klebsiella",
                "Pseudomonas",
                "Salmonella",
                "Shigella",
                "Yersinia",
                "Enterobacteriaceae",
                "Enterobacteriaceae family"
            ],
            "true_referents": [
                "Actinomycosis",
                "Anthrax",
                "Bifidobacteriales Infections",
                "Bovine Tuberculosis",
                "Corynebacterium Infections",
                "Diphtheria",
                "Enterotoxemia",
                "Gas Gangrene",
                "Listeria Meningitis",
                "Listeriosis",
                "Mycetoma",
                "Pneumococcal Pneumonia",
                "Silicotuberculosis",
                "Staphylococcal Food Poisoning",
                "Streptococcal Infections",
                "Swine Erysipelas",
                "Tetanus",
                "Tuberculosis"
            ],
            "TP": 0,
            "FP": 23,
            "FN": 0
        }
    }
]