[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Neisseria gonorrhoeae (N. gonorrhoeae)",
            "Staphylococcus aureus (S. aureus)",
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Pseudomonas aeruginosa (P. aeruginosa)",
            "Acinetobacter baumannii (A. baumannii)",
            "Salmonella enterica (S. enterica)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Haemophilus influenzae (H. influenzae)",
            "Bordetella pertussis (B. pertussis)",
            "Campylobacter jejuni (C. jejuni)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Chlamydia trachomatis (C. trachomatis)",
            "Francisella tularensis (F. tularensis)",
            "Brucella abortus (B. abortus)",
            "Yersinia pestis (Y. pestis)",
            "Bacillus anthracis (B. anthracis)",
            "Legionella pneumophila (L. pneumophila)",
            "Vibrio cholerae (V. cholerae)",
            "Mycoplasma pneumoniae (M. pneumoniae)",
            "Methicillin-resistant Staphylococcus aureus (MRSA)",
            "Clostridium difficile (C. difficile)",
            "Enterococcus faecalis (E. faecalis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Klebsiella pneumoniae (K. pneumoniae)",
                "Pseudomonas aeruginosa (P. aeruginosa)",
                "Acinetobacter baumannii (A. baumannii)",
                "Salmonella enterica (S. enterica)",
                "Haemophilus influenzae (H. influenzae)",
                "Bordetella pertussis (B. pertussis)",
                "Campylobacter jejuni (C. jejuni)",
                "Chlamydia trachomatis (C. trachomatis)",
                "Francisella tularensis (F. tularensis)",
                "Brucella abortus (B. abortus)",
                "Yersinia pestis (Y. pestis)",
                "Legionella pneumophila (L. pneumophila)",
                "Vibrio cholerae (V. cholerae)",
                "Mycoplasma pneumoniae (M. pneumoniae)"
            ],
            "mismatches": [
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Neisseria gonorrhoeae (N. gonorrhoeae)",
                "Staphylococcus aureus (S. aureus)",
                "Escherichia coli (E. coli)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Bacillus anthracis (B. anthracis)",
                "Methicillin-resistant Staphylococcus aureus (MRSA)",
                "Clostridium difficile (C. difficile)",
                "Enterococcus faecalis (E. faecalis)"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillosis",
                "Actinobacillus Infections",
                "Bordetella Infections",
                "Bovine Brucellosis",
                "Brucellosis",
                "Campylobacter Infections",
                "Cat-Scratch Disease",
                "Chlamydial Pneumonia",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Melioidosis",
                "Mycoplasma Pneumonia",
                "Neisseriaceae Infections",
                "Plague",
                "Pseudomonas Infections",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Neisseria gonorrhoeae (N. gonorrhoeae)",
            "Neisseria meningitidis (N. meningitidis)",
            "Haemophilus influenzae (H. influenzae)",
            "Helicobacter pylori (H. pylori)",
            "Escherichia coli (E. coli) (certain strains)",
            "Proteus mirabilis (P. mirabilis)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Moraxella catarrhalis (M. catarrhalis)",
            "Haemophilus ducreyi (H. ducreyi)",
            "Campylobacter jejuni (C. jejuni)",
            "Helicobacter hepaticus (H. hepaticus)",
            "Helicobacter bilis (H. bilis)",
            "Helicobacter felis (H. felis)",
            "Helicobacter mustelae (H. mustelae)",
            "Helicobacter pullorum (H. pullorum)",
            "Helicobacter salomonis (H. salomonis)",
            "Helicobacter typhlonius (H. typhlonius)",
            "Helicobacter bizzozeronii (H. bizzozeronii)",
            "Helicobacter canis (H. canis)",
            "Helicobacter heilmannii (H. heilmannii)",
            "Helicobacter cetorum (H. cetorum)",
            "Helicobacter cetorum (H. cetorum) subsp. cetorum",
            "Helicobacter cetorum (H. cetorum) subsp. cholecystus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Neisseria gonorrhoeae (N. gonorrhoeae)",
                "Neisseria meningitidis (N. meningitidis)",
                "Haemophilus influenzae (H. influenzae)",
                "Helicobacter pylori (H. pylori)",
                "Proteus mirabilis (P. mirabilis)",
                "Klebsiella pneumoniae (K. pneumoniae)",
                "Moraxella catarrhalis (M. catarrhalis)",
                "Haemophilus ducreyi (H. ducreyi)",
                "Campylobacter jejuni (C. jejuni)"
            ],
            "mismatches": [
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Escherichia coli (E. coli) (certain strains)",
                "Helicobacter hepaticus (H. hepaticus)",
                "Helicobacter bilis (H. bilis)",
                "Helicobacter felis (H. felis)",
                "Helicobacter mustelae (H. mustelae)",
                "Helicobacter pullorum (H. pullorum)",
                "Helicobacter salomonis (H. salomonis)",
                "Helicobacter typhlonius (H. typhlonius)",
                "Helicobacter bizzozeronii (H. bizzozeronii)",
                "Helicobacter canis (H. canis)",
                "Helicobacter heilmannii (H. heilmannii)",
                "Helicobacter cetorum (H. cetorum)",
                "Helicobacter cetorum (H. cetorum) subsp. cetorum",
                "Helicobacter cetorum (H. cetorum) subsp. cholecystus"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Campylobacter Infections",
                "Chancroid",
                "Chlamydial Pneumonia",
                "Enterobacteriaceae Infections",
                "Fusobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Klebsiella Infections",
                "Leptospirosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Moraxellaceae Infections",
                "Mycoplasma Pneumonia",
                "Neisseriaceae Infections",
                "Proteus Infections",
                "Syphilis",
                "Tularemia"
            ],
            "TP": 9,
            "FP": 15,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal pharyngitis (Strep throat)",
            "Staphylococcus aureus infection",
            "Streptococcus pneumoniae pneumonia",
            "Escherichia coli urinary tract infection",
            "Streptococcus pyogenes impetigo",
            "Staphylococcus epidermidis infection",
            "Enterococcus faecalis infection",
            "Streptococcus agalactiae infection",
            "Streptococcus pneumoniae meningitis",
            "Staphylococcus aureus skin infection",
            "Streptococcus pyogenes cellulitis",
            "Staphylococcus aureus osteomyelitis",
            "Streptococcus pneumoniae otitis media",
            "Staphylococcus aureus endocarditis",
            "Streptococcus agalactiae bacteremia",
            "Streptococcus pneumoniae septicemia",
            "Staphylococcus aureus pneumonia",
            "Streptococcus pyogenes necrotizing fasciitis",
            "Staphylococcus aureus toxic shock syndrome",
            "Streptococcus pneumoniae bronchitis",
            "Staphylococcus aureus sepsis",
            "Streptococcus pyogenes scarlet fever",
            "Staphylococcus aureus abscess",
            "Streptococcus pneumoniae sinusitis",
            "Staphylococcus aureus conjunctivitis"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Streptococcal pharyngitis (Strep throat)",
                "Staphylococcus aureus infection",
                "Streptococcus pneumoniae pneumonia",
                "Escherichia coli urinary tract infection",
                "Streptococcus pyogenes impetigo",
                "Staphylococcus epidermidis infection",
                "Enterococcus faecalis infection",
                "Streptococcus agalactiae infection",
                "Streptococcus pneumoniae meningitis",
                "Staphylococcus aureus skin infection",
                "Streptococcus pyogenes cellulitis",
                "Staphylococcus aureus osteomyelitis",
                "Streptococcus pneumoniae otitis media",
                "Staphylococcus aureus endocarditis",
                "Streptococcus agalactiae bacteremia",
                "Streptococcus pneumoniae septicemia",
                "Staphylococcus aureus pneumonia",
                "Streptococcus pyogenes necrotizing fasciitis",
                "Staphylococcus aureus toxic shock syndrome",
                "Streptococcus pneumoniae bronchitis",
                "Staphylococcus aureus sepsis",
                "Streptococcus pyogenes scarlet fever",
                "Staphylococcus aureus abscess",
                "Streptococcus pneumoniae sinusitis",
                "Staphylococcus aureus conjunctivitis"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Actinobacillus Infections",
                "Bacteroides Infections",
                "Cardiovascular Syphilis",
                "Chancre",
                "Chlamydia Infections",
                "Chlamydial Pneumonia",
                "Cutaneous Syphilis",
                "Enterobacteriaceae Infections",
                "Escherichia coli Infections",
                "Escherichia coli Meningitis",
                "Fusobacterium Infections",
                "Gonorrhea",
                "Haemophilus Meningitis",
                "Hemorrhagic Septicemia",
                "Lemierre Syndrome",
                "Leptospirosis",
                "Meningococcal Meningitis",
                "Mycoplasma Pneumonia",
                "Necrotizing Ulcerative Gingivitis",
                "Pneumonic Pasteurellosis",
                "Q Fever",
                "Salmonella Infections",
                "Syphilis",
                "Tularemia"
            ],
            "TP": 0,
            "FP": 25,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal Pharyngitis (Strep Throat)",
            "Staphylococcus aureus Skin Infection",
            "Escherichia coli Urinary Tract Infection (UTI)",
            "Klebsiella pneumoniae Pneumonia",
            "Proteus mirabilis Wound Infection",
            "Haemophilus influenzae Meningitis",
            "Neisseria gonorrhoeae Gonorrhea",
            "Neisseria meningitidis Meningitis",
            "Chlamydia trachomatis Conjunctivitis",
            "Mycoplasma pneumoniae Pneumonia",
            "Legionella pneumophila Legionnaires' Disease",
            "Salmonella enterica Gastroenteritis",
            "Shigella dysenteriae Dysentery",
            "Campylobacter jejuni Gastroenteritis",
            "Vibrio cholerae Cholera",
            "Yersinia pestis Plague",
            "Bordetella pertussis Whooping Cough",
            "Clostridium perfringens Gas Gangrene",
            "Clostridium tetani Tetanus",
            "Enterococcus faecalis Bacteremia",
            "Serratia marcescens Pneumonia",
            "Citrobacter freundii Sepsis",
            "Morganella morganii Wound Infection",
            "Providencia rettgeri Pneumonia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Escherichia coli Infections",
                "Klebsiella Infections",
                "Proteus Infections",
                "Haemophilus Meningitis",
                "Gonorrhea",
                "Meningococcal Meningitis",
                "Mycoplasma Pneumonia",
                "Legionnaires' Disease",
                "Salmonella Infections",
                "Bacillary Dysentery",
                "Campylobacter Infections",
                "Cholera",
                "Plague",
                "Whooping Cough",
                "Serratia Infections"
            ],
            "mismatches": [
                "Streptococcal Pharyngitis (Strep Throat)",
                "Staphylococcus aureus Skin Infection",
                "Escherichia coli Urinary Tract Infection (UTI)",
                "Klebsiella pneumoniae Pneumonia",
                "Proteus mirabilis Wound Infection",
                "Neisseria gonorrhoeae Gonorrhea",
                "Neisseria meningitidis Meningitis",
                "Chlamydia trachomatis Conjunctivitis",
                "Legionella pneumophila Legionnaires' Disease",
                "Salmonella enterica Gastroenteritis",
                "Shigella dysenteriae Dysentery",
                "Campylobacter jejuni Gastroenteritis",
                "Vibrio cholerae Cholera",
                "Yersinia pestis Plague",
                "Bordetella pertussis Whooping Cough",
                "Clostridium perfringens Gas Gangrene",
                "Clostridium tetani Tetanus",
                "Enterococcus faecalis Bacteremia",
                "Serratia marcescens Pneumonia",
                "Citrobacter freundii Sepsis",
                "Morganella morganii Wound Infection",
                "Providencia rettgeri Pneumonia"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillosis",
                "Bacillary Dysentery",
                "Bordetella Infections",
                "Burkholderia Infections",
                "Campylobacter Infections",
                "Cat-Scratch Disease",
                "Chancre",
                "Chlamydia Infections",
                "Chlamydiaceae Infections",
                "Chlamydial Pneumonia",
                "Chlamydophila Infections",
                "Cholera",
                "Cutaneous Syphilis",
                "Enterobacteriaceae Infections",
                "Escherichia coli Infections",
                "Escherichia coli Meningitis",
                "Flavobacteriaceae Infections",
                "Fusobacteriaceae Infections",
                "Fusobacterium Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Lemierre Syndrome",
                "Lymphogranuloma Venereum",
                "Melioidosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Moraxellaceae Infections",
                "Mycoplasma Infections",
                "Mycoplasma Pneumonia",
                "Neisseriaceae Infections",
                "Plague",
                "Pneumonic Pasteurellosis",
                "Proteus Infections",
                "Rickettsial Pneumonia",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Serratia Infections",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections",
                "Yersinia pseudotuberculosis Infections"
            ],
            "TP": 15,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Escherichia coli infection (E. coli)",
            "Salmonella infection",
            "Shigella infection",
            "Helicobacter pylori infection (H. pylori)",
            "Vibrio cholerae infection",
            "Neisseria meningitidis infection",
            "Neisseria gonorrhoeae infection",
            "Haemophilus influenzae infection",
            "Bordetella pertussis infection",
            "Legionella pneumophila infection",
            "Campylobacter jejuni infection",
            "Pseudomonas aeruginosa infection",
            "Klebsiella pneumoniae infection",
            "Yersinia pestis infection",
            "Brucella infection",
            "Francisella tularensis infection",
            "Bacteroides fragilis infection",
            "Prevotella infection",
            "Fusobacterium infection",
            "Acinetobacter baumannii infection",
            "Pasteurella multocida infection",
            "Rickettsia rickettsii infection",
            "Bartonella henselae infection",
            "Chlamydia trachomatis infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Escherichia coli infection (E. coli)",
                "Salmonella infection",
                "Helicobacter pylori infection (H. pylori)",
                "Vibrio cholerae infection",
                "Neisseria meningitidis infection",
                "Neisseria gonorrhoeae infection",
                "Haemophilus influenzae infection",
                "Bordetella pertussis infection",
                "Legionella pneumophila infection",
                "Campylobacter jejuni infection",
                "Pseudomonas aeruginosa infection",
                "Klebsiella pneumoniae infection",
                "Yersinia pestis infection",
                "Brucella infection",
                "Bacteroides fragilis infection",
                "Fusobacterium infection",
                "Acinetobacter baumannii infection",
                "Pasteurella multocida infection",
                "Rickettsia rickettsii infection",
                "Bartonella henselae infection",
                "Chlamydia trachomatis infection"
            ],
            "mismatches": [
                "Shigella infection",
                "Francisella tularensis infection",
                "Prevotella infection"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillus Infections",
                "Animal Salmonella Infections",
                "Bacteroidaceae Infections",
                "Bacteroides Infections",
                "Bartonella Infections",
                "Bartonellaceae Infections",
                "Bordetella Infections",
                "Bovine Brucellosis",
                "Brucellosis",
                "Burkholderia Infections",
                "Campylobacter Infections",
                "Chlamydia Infections",
                "Chlamydial Pneumonia",
                "Chlamydophila Infections",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Escherichia coli Infections",
                "Escherichia coli Meningitis",
                "Flavobacteriaceae Infections",
                "Fusobacteriaceae Infections",
                "Fusobacterium Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Moraxellaceae Infections",
                "Neisseriaceae Infections",
                "Pasteurella Infections",
                "Pasteurellaceae Infections",
                "Plague",
                "Pneumonic Pasteurellosis",
                "Pseudomonas Infections",
                "Rickettsia Infections",
                "Rickettsiaceae Infections",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Spotted Fever Group Rickettsiosis",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections",
                "Yersinia pseudotuberculosis Infections"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Salmonella enterica (S. enterica)",
            "Shigella dysenteriae (S. dysenteriae)",
            "Proteus mirabilis (P. mirabilis)",
            "Enterobacter cloacae (E. cloacae)",
            "Serratia marcescens (S. marcescens)",
            "Haemophilus influenzae (H. influenzae)",
            "Neisseria gonorrhoeae (N. gonorrhoeae)",
            "Neisseria meningitidis (N. meningitidis)",
            "Campylobacter jejuni (C. jejuni)",
            "Vibrio cholerae (V. cholerae)",
            "Yersinia enterocolitica (Y. enterocolitica)",
            "Bordetella pertussis (B. pertussis)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Francisella tularensis (F. tularensis)",
            "Clostridium perfringens (C. perfringens)",
            "Brucella abortus (B. abortus)",
            "Rickettsia rickettsii (R. rickettsii)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Legionella pneumophila (L. pneumophila)",
            "Pseudomonas aeruginosa (P. aeruginosa)",
            "Staphylococcus aureus (S. aureus)",
            "Streptococcus pneumoniae (S. pneumoniae)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Klebsiella Infections",
                "Salmonella Infections",
                "Proteus Infections",
                "Serratia Infections",
                "Haemophilus Infections",
                "Gonorrhea",
                "Meningococcal Infections",
                "Campylobacter Infections",
                "Cholera",
                "Yersinia Infections",
                "Bordetella Infections",
                "Tularemia",
                "Brucellosis",
                "Rickettsia Infections",
                "Legionellosis",
                "Pseudomonas Infections"
            ],
            "mismatches": [
                "Escherichia coli (E. coli)",
                "Enterobacter cloacae (E. cloacae)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Clostridium perfringens (C. perfringens)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Staphylococcus aureus (S. aureus)",
                "Streptococcus pneumoniae (S. pneumoniae)"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Bacillary Dysentery",
                "Bordetella Infections",
                "Bovine Brucellosis",
                "Brucellosis",
                "Campylobacter Infections",
                "Chancroid",
                "Chlamydial Pneumonia",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Melioidosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Neisseriaceae Infections",
                "Proteus Infections",
                "Pseudomonas Infections",
                "Q Fever",
                "Rickettsia Infections",
                "Rickettsiaceae Infections",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Serratia Infections",
                "Spotted Fever Group Rickettsiosis",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections",
                "Yersinia pseudotuberculosis Infections"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Tuberculosis (TB)",
            "Streptococcal Infection (SI)",
            "Staphylococcal Infections (SIs)",
            "Pneumococcal Pneumonia (PP)",
            "Haemophilus Influenzae Infection (HII)",
            "Enterococcal Infections (EI)",
            "Escherichia Coli Infections (ECI)",
            "Legionnaires' Disease (LD)",
            "Salmonellosis (S)",
            "Shigellosis (Sg)",
            "Cholera (C)",
            "Syphilis (Sy)",
            "Gonorrhea (G)",
            "Yersiniosis (Y)",
            "Campylobacteriosis (Cp)",
            "Helicobacter Pylori Infection (HPI)",
            "Pertussis (P)",
            "Diphtheria (D)",
            "Meningococcal Disease (MD)",
            "Pseudomonas Aeruginosa Infection (PAI)",
            "Listeriosis (L)",
            "Leptospirosis (Le)",
            "Anthrax (A)",
            "Plague (P)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Escherichia Coli Infections (ECI)",
                "Legionnaires' Disease (LD)",
                "Cholera (C)",
                "Syphilis (Sy)",
                "Gonorrhea (G)",
                "Leptospirosis (Le)",
                "Plague (P)"
            ],
            "mismatches": [
                "Tuberculosis (TB)",
                "Streptococcal Infection (SI)",
                "Staphylococcal Infections (SIs)",
                "Pneumococcal Pneumonia (PP)",
                "Haemophilus Influenzae Infection (HII)",
                "Enterococcal Infections (EI)",
                "Salmonellosis (S)",
                "Shigellosis (Sg)",
                "Yersiniosis (Y)",
                "Campylobacteriosis (Cp)",
                "Helicobacter Pylori Infection (HPI)",
                "Pertussis (P)",
                "Diphtheria (D)",
                "Meningococcal Disease (MD)",
                "Pseudomonas Aeruginosa Infection (PAI)",
                "Listeriosis (L)",
                "Anthrax (A)"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillosis",
                "Burkholderia Infections",
                "Campylobacter Infections",
                "Cat-Scratch Disease",
                "Chlamydial Pneumonia",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Escherichia coli Infections",
                "Escherichia coli Meningitis",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Latent Syphilis",
                "Legionellosis",
                "Legionnaires' Disease",
                "Lemierre Syndrome",
                "Leptospirosis",
                "Melioidosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Mycoplasma Pneumonia",
                "Plague",
                "Pneumonic Pasteurellosis",
                "Pseudomonas Infections",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Whooping Cough",
                "Yersinia Infections",
                "Yersinia pseudotuberculosis Infections"
            ],
            "TP": 7,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal pharyngitis (Strep throat)",
            "Tuberculosis (TB)",
            "Salmonella infection",
            "Pneumonia",
            "Meningitis",
            "Gonorrhea",
            "Syphilis",
            "Chlamydia",
            "Lyme disease",
            "Urinary tract infection (UTI)",
            "Gastritis",
            "Impetigo",
            "Cellulitis",
            "Otitis media",
            "Sinusitis",
            "Endocarditis",
            "Bronchitis",
            "Dental caries",
            "Gingivitis",
            "Peptic ulcer disease",
            "Bacterial vaginosis",
            "Clostridium difficile infection",
            "Mycobacterium leprae infection (Leprosy)",
            "Anthrax"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Salmonella infection",
                "Gonorrhea",
                "Syphilis",
                "Lyme disease"
            ],
            "mismatches": [
                "Streptococcal pharyngitis (Strep throat)",
                "Tuberculosis (TB)",
                "Pneumonia",
                "Meningitis",
                "Chlamydia",
                "Urinary tract infection (UTI)",
                "Gastritis",
                "Impetigo",
                "Cellulitis",
                "Otitis media",
                "Sinusitis",
                "Endocarditis",
                "Bronchitis",
                "Dental caries",
                "Gingivitis",
                "Peptic ulcer disease",
                "Bacterial vaginosis",
                "Clostridium difficile infection",
                "Mycobacterium leprae infection (Leprosy)",
                "Anthrax"
            ],
            "true_referents": [
                "Animal Salmonella Infections",
                "Bacteroides Infections",
                "Campylobacter Infections",
                "Cardiovascular Syphilis",
                "Chancre",
                "Chlamydia Infections",
                "Chlamydial Pneumonia",
                "Cholera",
                "Gonorrhea",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Latent Syphilis",
                "Lemierre Syndrome",
                "Leptospirosis",
                "Lyme Disease",
                "Meningococcal Meningitis",
                "Mycoplasma Pneumonia",
                "Necrotizing Ulcerative Gingivitis",
                "Plague",
                "Post-Lyme Disease Syndrome",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough"
            ],
            "TP": 4,
            "FP": 20,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Urinary Tract Infection (UTI)",
            "Pneumonia",
            "Meningitis",
            "Gastroenteritis",
            "Septicemia",
            "Typhoid Fever (TF)",
            "Shigellosis",
            "Gonorrhea",
            "Legionnaires' Disease",
            "Cholera",
            "Plague",
            "Tularemia",
            "Whooping Cough (Pertussis)",
            "Lyme Disease",
            "Enterobacter Infection",
            "Serratia marcescens Infection",
            "Proteus Infection",
            "Morganella morganii Infection",
            "Providencia Infection",
            "Acinetobacter baumannii Infection",
            "Pseudomonas Wound Infection",
            "Pseudomonas Otitis Externa",
            "Pseudomonas Urinary Tract Infection",
            "Haemophilus ducreyi Infection"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Typhoid Fever",
                "Gonorrhea",
                "Legionnaires' Disease",
                "Cholera",
                "Plague",
                "Tularemia",
                "Whooping Cough",
                "Lyme Disease",
                "Proteus Infection"
            ],
            "mismatches": [
                "Urinary Tract Infection (UTI)",
                "Pneumonia",
                "Meningitis",
                "Gastroenteritis",
                "Septicemia",
                "Typhoid Fever (TF)",
                "Shigellosis",
                "Enterobacter Infection",
                "Serratia marcescens Infection",
                "Morganella morganii Infection",
                "Providencia Infection",
                "Acinetobacter baumannii Infection",
                "Pseudomonas Wound Infection",
                "Pseudomonas Otitis Externa",
                "Pseudomonas Urinary Tract Infection",
                "Haemophilus ducreyi Infection"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillus Infections",
                "Bordetella Infections",
                "Brucellosis",
                "Burkholderia Infections",
                "Campylobacter Infections",
                "Chancroid",
                "Chlamydia Infections",
                "Chlamydial Pneumonia",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Hemorrhagic Septicemia",
                "Legionellosis",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Lyme Disease",
                "Melioidosis",
                "Meningococcal Meningitis",
                "Moraxellaceae Infections",
                "Mycoplasma Pneumonia",
                "Paratyphoid Fever",
                "Pasteurella Infections",
                "Pasteurellaceae Infections",
                "Plague",
                "Post-Lyme Disease Syndrome",
                "Proteus Infections",
                "Pseudomonas Infections",
                "Q Fever",
                "Salmonella Food Poisoning",
                "Serratia Infections",
                "Syphilis",
                "Trench Fever",
                "Tularemia",
                "Typhoid Fever",
                "Whooping Cough"
            ],
            "TP": 9,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae",
            "Proteus mirabilis",
            "Salmonella enterica",
            "Shigella dysenteriae",
            "Pseudomonas aeruginosa",
            "Vibrio cholerae",
            "Legionella pneumophila",
            "Haemophilus influenzae",
            "Neisseria gonorrhoeae",
            "Neisseria meningitidis",
            "Moraxella catarrhalis",
            "Helicobacter pylori",
            "Campylobacter jejuni",
            "Bordetella pertussis",
            "Brucella melitensis",
            "Francisella tularensis",
            "Yersinia pestis",
            "Acinetobacter baumannii",
            "Enterobacter cloacae",
            "Serratia marcescens",
            "Citrobacter freundii",
            "Burkholderia cepacia",
            "Stenotrophomonas maltophilia"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Klebsiella pneumoniae",
                "Proteus mirabilis",
                "Salmonella enterica",
                "Pseudomonas aeruginosa",
                "Vibrio cholerae",
                "Legionella pneumophila",
                "Haemophilus influenzae",
                "Neisseria gonorrhoeae",
                "Neisseria meningitidis",
                "Moraxella catarrhalis",
                "Helicobacter pylori",
                "Campylobacter jejuni",
                "Bordetella pertussis",
                "Brucella melitensis",
                "Yersinia pestis",
                "Acinetobacter baumannii",
                "Serratia marcescens",
                "Burkholderia cepacia"
            ],
            "mismatches": [
                "Escherichia coli (E. coli)",
                "Shigella dysenteriae",
                "Francisella tularensis",
                "Enterobacter cloacae",
                "Citrobacter freundii",
                "Stenotrophomonas maltophilia"
            ],
            "true_referents": [
                "Acinetobacter Infections",
                "Actinobacillosis",
                "Bacillary Dysentery",
                "Bordetella Infections",
                "Bovine Brucellosis",
                "Brucellosis",
                "Burkholderia Infections",
                "Campylobacter Infections",
                "Chlamydial Pneumonia",
                "Cholera",
                "Enterobacteriaceae Infections",
                "Fusobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Helicobacter Infections",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Melioidosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Moraxellaceae Infections",
                "Neisseriaceae Infections",
                "Plague",
                "Proteus Infections",
                "Pseudomonas Infections",
                "Q Fever",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Serratia Infections",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pyogenes (Group A streptococcus)",
            "Staphylococcus aureus",
            "Listeria monocytogenes",
            "Enterococcus faecalis",
            "Bacillus anthracis",
            "Corynebacterium diphtheriae",
            "Actinomyces israelii",
            "Clostridium perfringens",
            "Clostridium tetani",
            "Clostridium botulinum",
            "Erysipelothrix rhusiopathiae",
            "Mycobacterium tuberculosis",
            "Nocardia asteroides",
            "Streptococcus agalactiae (Group B streptococcus)",
            "Streptococcus pneumoniae",
            "Enterococcus faecium",
            "Bacillus cereus",
            "Lactobacillus acidophilus",
            "Gardnerella vaginalis",
            "Micrococcus luteus",
            "Mycoplasma pneumoniae",
            "Ureaplasma urealyticum",
            "Aerococcus viridans",
            "Gemella morbillorum"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mycoplasma pneumoniae",
                "Ureaplasma urealyticum"
            ],
            "mismatches": [
                "Streptococcus pyogenes (Group A streptococcus)",
                "Staphylococcus aureus",
                "Listeria monocytogenes",
                "Enterococcus faecalis",
                "Bacillus anthracis",
                "Corynebacterium diphtheriae",
                "Actinomyces israelii",
                "Clostridium perfringens",
                "Clostridium tetani",
                "Clostridium botulinum",
                "Erysipelothrix rhusiopathiae",
                "Mycobacterium tuberculosis",
                "Nocardia asteroides",
                "Streptococcus agalactiae (Group B streptococcus)",
                "Streptococcus pneumoniae",
                "Enterococcus faecium",
                "Bacillus cereus",
                "Lactobacillus acidophilus",
                "Gardnerella vaginalis",
                "Micrococcus luteus",
                "Aerococcus viridans",
                "Gemella morbillorum"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Actinobacillus Infections",
                "Chlamydia Infections",
                "Chlamydial Pneumonia",
                "Gonorrhea",
                "Leptospirosis",
                "Lymphogranuloma Venereum",
                "Melioidosis",
                "Moraxellaceae Infections",
                "Mycoplasma Pneumonia",
                "Psittacosis",
                "Rocky Mountain Spotted Fever",
                "Salmonella Food Poisoning",
                "Scrub Typhus",
                "Syphilis",
                "Tularemia",
                "Ureaplasma Infections"
            ],
            "TP": 2,
            "FP": 22,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal pharyngitis (strep throat)",
            "Staphylococcal skin infections",
            "Bacterial pneumonia",
            "Bacterial meningitis",
            "Bacterial endocarditis",
            "Bacterial sepsis",
            "Bacterial gastroenteritis",
            "Bacterial vaginosis",
            "Bacterial urinary tract infections",
            "Bacterial osteomyelitis",
            "Bacterial cellulitis",
            "Bacterial peritonitis",
            "Bacterial conjunctivitis",
            "Bacterial sinusitis",
            "Bacterial otitis media",
            "Bacterial cholecystitis",
            "Bacterial prostatitis",
            "Bacterial pyelonephritis",
            "Bacterial mastitis",
            "Bacterial arthritis",
            "Bacterial endophthalmitis",
            "Bacterial keratitis",
            "Bacterial peritonsillar abscess",
            "Bacterial brain abscess"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Streptococcal pharyngitis (strep throat)",
                "Staphylococcal skin infections",
                "Bacterial pneumonia",
                "Bacterial meningitis",
                "Bacterial endocarditis",
                "Bacterial sepsis",
                "Bacterial gastroenteritis",
                "Bacterial vaginosis",
                "Bacterial urinary tract infections",
                "Bacterial osteomyelitis",
                "Bacterial cellulitis",
                "Bacterial peritonitis",
                "Bacterial conjunctivitis",
                "Bacterial sinusitis",
                "Bacterial otitis media",
                "Bacterial cholecystitis",
                "Bacterial prostatitis",
                "Bacterial pyelonephritis",
                "Bacterial mastitis",
                "Bacterial arthritis",
                "Bacterial endophthalmitis",
                "Bacterial keratitis",
                "Bacterial peritonsillar abscess",
                "Bacterial brain abscess"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Bacillary Angiomatosis",
                "Bacillary Dysentery",
                "Bovine Brucellosis",
                "Brucellosis",
                "Burkholderia Infections",
                "Cardiovascular Syphilis",
                "Chlamydial Pneumonia",
                "Cholera",
                "Cutaneous Syphilis",
                "Enterobacteriaceae Infections",
                "Escherichia coli Infections",
                "Escherichia coli Meningitis",
                "Gonorrhea",
                "Haemophilus Meningitis",
                "Hemorrhagic Septicemia",
                "Inclusion Conjunctivitis",
                "Klebsiella Infections",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Melioidosis",
                "Meningococcal Infections",
                "Meningococcal Meningitis",
                "Mycoplasma Pneumonia",
                "Ophthalmia Neonatorum",
                "Pneumonic Pasteurellosis",
                "Q Fever",
                "Syphilis",
                "Tularemia",
                "Typhoid Fever"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (S. pneumoniae)",
            "Staphylococcus aureus (S. aureus)",
            "Escherichia coli (E. coli)",
            "Klebsiella pneumoniae (K. pneumoniae)",
            "Pseudomonas aeruginosa (P. aeruginosa)",
            "Salmonella enterica (S. enterica)",
            "Shigella flexneri (S. flexneri)",
            "Neisseria gonorrhoeae (N. gonorrhoeae)",
            "Haemophilus influenzae (H. influenzae)",
            "Listeria monocytogenes (L. monocytogenes)",
            "Bacillus anthracis (B. anthracis)",
            "Clostridium tetani (C. tetani)",
            "Mycobacterium tuberculosis (M. tuberculosis)",
            "Vibrio cholerae (V. cholerae)",
            "Corynebacterium diphtheriae (C. diphtheriae)",
            "Bordetella pertussis (B. pertussis)",
            "Legionella pneumophila (L. pneumophila)",
            "Streptococcus pyogenes (S. pyogenes)",
            "Borrelia burgdorferi (B. burgdorferi)",
            "Treponema pallidum (T. pallidum)",
            "Chlamydia trachomatis (C. trachomatis)",
            "Rickettsia rickettsii (R. rickettsii)",
            "Yersinia pestis (Y. pestis)",
            "Francisella tularensis (F. tularensis)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Klebsiella pneumoniae (K. pneumoniae)",
                "Pseudomonas aeruginosa (P. aeruginosa)",
                "Salmonella enterica (S. enterica)",
                "Neisseria gonorrhoeae (N. gonorrhoeae)",
                "Haemophilus influenzae (H. influenzae)",
                "Vibrio cholerae (V. cholerae)",
                "Bordetella pertussis (B. pertussis)",
                "Legionella pneumophila (L. pneumophila)",
                "Borrelia burgdorferi (B. burgdorferi)",
                "Treponema pallidum (T. pallidum)",
                "Chlamydia trachomatis (C. trachomatis)",
                "Rickettsia rickettsii (R. rickettsii)",
                "Yersinia pestis (Y. pestis)",
                "Francisella tularensis (F. tularensis)"
            ],
            "mismatches": [
                "Streptococcus pneumoniae (S. pneumoniae)",
                "Staphylococcus aureus (S. aureus)",
                "Escherichia coli (E. coli)",
                "Shigella flexneri (S. flexneri)",
                "Listeria monocytogenes (L. monocytogenes)",
                "Bacillus anthracis (B. anthracis)",
                "Clostridium tetani (C. tetani)",
                "Mycobacterium tuberculosis (M. tuberculosis)",
                "Corynebacterium diphtheriae (C. diphtheriae)",
                "Streptococcus pyogenes (S. pyogenes)"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Bordetella Infections",
                "Borrelia Infections",
                "Chlamydial Pneumonia",
                "Cholera",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Klebsiella Infections",
                "Legionellosis",
                "Legionnaires' Disease",
                "Leptospirosis",
                "Lyme Disease",
                "Melioidosis",
                "Mycoplasma Pneumonia",
                "Neisseriaceae Infections",
                "Plague",
                "Pseudomonas Infections",
                "Rickettsia Infections",
                "Rickettsiaceae Infections",
                "Salmonella Food Poisoning",
                "Salmonella Infections",
                "Scrub Typhus",
                "Spotted Fever Group Rickettsiosis",
                "Syphilis",
                "Treponemal Infections",
                "Tularemia",
                "Typhoid Fever",
                "Vibrio Infections",
                "Whooping Cough",
                "Yersinia Infections"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcal pharyngitis (Strep Throat) (Strep)",
            "Streptococcal pneumonia (Pneumonia) (Pneum)",
            "Streptococcal skin infections (Impetigo) (Imp)",
            "Streptococcal tonsillitis (Tonsillitis) (Tons)",
            "Staphylococcal aureus infections (MRSA) (MRSA)",
            "Staphylococcal epidermidis infections (SSEp) (SSEp)",
            "Staphylococcal saprophyticus infections (SSa) (SSa)",
            "Staphylococcal hominis infections (SHo) (SHo)",
            "Streptococcus pyogenes infections (Group A Streptococcus) (GAS)",
            "Streptococcus agalactiae infections (Group B Streptococcus) (GBS)",
            "Streptococcus pneumoniae infections (Pneumococcus) (Pn)",
            "Streptococcus suis infections (Swine Streptococcus) (SS)",
            "Streptococcus equi infections (Equine Streptococcus) (SE)",
            "Streptococcus equi subsp. zooepidemicus infections (Zoo Streptococcus) (ZS)",
            "Streptococcus agalactiae infections (Group B Streptococcus) (GBS)",
            "Streptococcus iniae infections (Fish Streptococcus) (SI)",
            "Streptococcus mutans infections (Mutans Streptococcus) (SM)",
            "Streptococcus sobrinus infections (Sobrinus Streptococcus) (SSo)",
            "Streptococcus sanguinis infections (Sanguinis Streptococcus) (SSa)",
            "Streptococcus salivarius infections (Salivarius Streptococcus) (SSa)",
            "Streptococcus thermophilus infections (Thermophilus Streptococcus) (STh)",
            "Streptococcus vestibularis infections (Vestibularis Streptococcus) (SV)",
            "Staphylococcus aureus infections (MRSA) (MRSA)",
            "Staphylococcus epidermidis infections (SSEp) (SSEp)",
            "Staphylococcus saprophyticus infections (SSa) (SSa)",
            "Staphylococcus hominis infections (SHo) (SHo)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Streptococcal pharyngitis (Strep Throat) (Strep)",
                "Streptococcal pneumonia (Pneumonia) (Pneum)",
                "Streptococcal skin infections (Impetigo) (Imp)",
                "Streptococcal tonsillitis (Tonsillitis) (Tons)",
                "Staphylococcal aureus infections (MRSA) (MRSA)",
                "Staphylococcal epidermidis infections (SSEp) (SSEp)",
                "Staphylococcal saprophyticus infections (SSa) (SSa)",
                "Staphylococcal hominis infections (SHo) (SHo)",
                "Streptococcus pyogenes infections (Group A Streptococcus) (GAS)",
                "Streptococcus agalactiae infections (Group B Streptococcus) (GBS)",
                "Streptococcus pneumoniae infections (Pneumococcus) (Pn)",
                "Streptococcus suis infections (Swine Streptococcus) (SS)",
                "Streptococcus equi infections (Equine Streptococcus) (SE)",
                "Streptococcus equi subsp. zooepidemicus infections (Zoo Streptococcus) (ZS)",
                "Streptococcus iniae infections (Fish Streptococcus) (SI)",
                "Streptococcus mutans infections (Mutans Streptococcus) (SM)",
                "Streptococcus sobrinus infections (Sobrinus Streptococcus) (SSo)",
                "Streptococcus sanguinis infections (Sanguinis Streptococcus) (SSa)",
                "Streptococcus salivarius infections (Salivarius Streptococcus) (SSa)",
                "Streptococcus thermophilus infections (Thermophilus Streptococcus) (STh)",
                "Streptococcus vestibularis infections (Vestibularis Streptococcus) (SV)"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Actinobacillus Infections",
                "Bacteroidaceae Infections",
                "Bacteroides Infections",
                "Chlamydial Pneumonia",
                "Desulfovibrionaceae Infections",
                "Enterobacteriaceae Infections",
                "Gonorrhea",
                "Haemophilus Infections",
                "Haemophilus Meningitis",
                "Lemierre Syndrome",
                "Leptospirosis",
                "Lyme Disease",
                "Melioidosis",
                "Meningococcal Infections",
                "Mycoplasma Pneumonia",
                "Neisseriaceae Infections",
                "Neurosyphilis",
                "Piscirickettsiaceae Infections",
                "Pneumonic Pasteurellosis",
                "Proteus Infections",
                "Pseudomonas Infections",
                "Q Fever",
                "Rocky Mountain Spotted Fever",
                "Serratia Infections",
                "Syphilis",
                "Treponemal Infections",
                "Vibrio Infections"
            ],
            "TP": 0,
            "FP": 21,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": [
            "Streptococcus pneumoniae (pneumococcus)",
            "Staphylococcus aureus (S. aureus)",
            "Staphylococcus epidermidis",
            "Streptococcus agalactiae",
            "Streptococcus pyogenes",
            "Enterococcus faecalis",
            "Enterococcus faecium",
            "Bacillus subtilis",
            "Bacillus anthracis",
            "Listeria monocytogenes",
            "Erysipelothrix rhusiopathiae",
            "Clostridium difficile",
            "Clostridium perfringens",
            "Peptostreptococcus magnus",
            "Rothia mucilaginosa",
            "Gemella morbillorum",
            "Abiotrophia defectiva",
            "Granulicatella adiacens",
            "Granulicatella elegans",
            "Leuconostoc lactis",
            "Leuconostoc mesenteroides",
            "Lactobacillus acidophilus",
            "Lactobacillus casei",
            "Lactobacillus plantarum"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Streptococcus pneumoniae (pneumococcus)",
                "Staphylococcus aureus (S. aureus)",
                "Staphylococcus epidermidis",
                "Streptococcus agalactiae",
                "Streptococcus pyogenes",
                "Enterococcus faecalis",
                "Enterococcus faecium",
                "Bacillus subtilis",
                "Bacillus anthracis",
                "Listeria monocytogenes",
                "Erysipelothrix rhusiopathiae",
                "Clostridium difficile",
                "Clostridium perfringens",
                "Peptostreptococcus magnus",
                "Rothia mucilaginosa",
                "Gemella morbillorum",
                "Abiotrophia defectiva",
                "Granulicatella adiacens",
                "Granulicatella elegans",
                "Leuconostoc lactis",
                "Leuconostoc mesenteroides",
                "Lactobacillus acidophilus",
                "Lactobacillus casei",
                "Lactobacillus plantarum"
            ],
            "true_referents": [
                "Actinobacillosis",
                "Aliivibrio Infections",
                "Bacteroides Infections",
                "Chancroid",
                "Chlamydial Pneumonia",
                "Cholera",
                "Desulfovibrionaceae Infections",
                "Gonorrhea",
                "Granuloma Inguinale",
                "Lemierre Syndrome",
                "Leptospirosis",
                "Lymphogranuloma Venereum",
                "Melioidosis",
                "Moraxellaceae Infections",
                "Mycoplasma Pneumonia",
                "Pasteurella Infections",
                "Pinta",
                "Piscirickettsiaceae Infections",
                "Pneumonic Pasteurellosis",
                "Proteus Infections",
                "Rocky Mountain Spotted Fever",
                "Salmonella Food Poisoning",
                "Scrub Typhus",
                "Syphilis",
                "Tabes Dorsalis",
                "Trachoma",
                "Tularemia"
            ],
            "TP": 0,
            "FP": 24,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "Gram-negative bacterial infection",
        "domain": "medicine",
        "response": "[\n        {\n            \"Referent\": \"Streptococcus pneumoniae\" (SPN)\n        },\n        {\n            \"Referent\": \"Haemophilus influenzae\" (HIF)\n        },\n        {\n            \"Referent\": \"Chlamydia pneumoniae\" (CPN)\n        },\n        {\n            \"Referent\": \"Mycoplasma pneumoniae\" (MPN)\n        },\n        {\n            \"Referent\": \"Legionella pneumophila\" (LPN)\n        },\n        {\n            \"Referent\": \"Pasteurella multocida\" (PM)\n        },\n        {\n            \"Referent\": \"Bordetella pertussis\" (BP)\n        },\n        {\n            \"Referent\": \"Moraxella catarrhalis\" (MC)\n        },\n        {\n            \"Referent\": \"Pseudomonas aeruginosa\" (PA)\n        },\n        {\n            \"Referent\": \"Klebsiella pneumoniae\" (KP)\n        },\n        {\n            \"Referent\": \"Acinetobacter baumannii\" (AB)\n        },\n        {\n            \"Referent\": \"Escherichia coli\" (EC)\n        },\n        {\n            \"Referent\": \"Proteus mirabilis\" (PM)\n        },\n        {\n            \"Referent\": \"Enterobacter cloacae\" (ECL)\n        },\n        {\n            \"Referent\": \"Serratia marcescens\" (SM)\n        },\n        {\n            \"Referent\": \"Citrobacter freundii\" (CF)\n        },\n        {\n            \"Referent\": \"Enterobacter aerogenes\" (EA)\n        },\n        {\n            \"Referent\": \"Salmonella enterica\" (SE)\n        },\n        {\n            \"Referent\": \"Shigella flexneri\" (SF)\n        },\n        {\n            \"Referent\": \"Yersinia enterocolitica\" (YE)\n        },\n        {\n            \"Referent\": \"Campylobacter jejuni\" (CJ)\n        },\n        {\n            \"Referent\": \"Helicobacter pylori\" (HP)\n        },\n        {\n            \"Referent\": \"Ureaplasma urealyticum\" (UU)\n        },\n        {\n            \"Referent\": \"Mycoplasma hominis\" (MH)\n        },\n        {\n            \"Referent\": \"Ureaplasma parvum\" (UP)\n        }\n    ]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]