[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Human Immunodeficiency Virus (HIV)",
            "Rabies virus (RABV)",
            "Coronavirus (CoV)",
            "Ebola virus (EBOV)",
            "SARS-CoV-2",
            "Hepatitis C virus (HCV)",
            "Rotavirus (RV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Zika virus (ZIKV)",
            "Hantavirus (HTNV)",
            "Dengue virus (DENV)",
            "Marburg virus (MARV)",
            "Respiratory syncytial virus (RSV)",
            "Measles virus (MeV)",
            "Mumps virus (MuV)",
            "Poliovirus (PV)",
            "Cytomegalovirus (CMV)",
            "Herpes simplex virus (HSV)",
            "Adenovirus (AdV)",
            "Rift Valley fever virus (RVFV)",
            "Eastern equine encephalitis virus (EEEV)",
            "Chikungunya virus (CHIKV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "HIV",
                "Rabies virus",
                "Coronavirus",
                "SARS-CoV-2",
                "Rotavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus",
                "Dengue Virus",
                "Marburgvirus",
                "Measles virus",
                "Mumps virus",
                "Poliovirus",
                "Rift Valley fever virus",
                "Eastern Equine Encephalitis Virus",
                "Chikungunya virus"
            ],
            "mismatches": [
                "Ebola virus (EBOV)",
                "Hepatitis C virus (HCV)",
                "Hantavirus (HTNV)",
                "Respiratory syncytial virus (RSV)",
                "Cytomegalovirus (CMV)",
                "Herpes simplex virus (HSV)",
                "Adenovirus (AdV)"
            ],
            "true_referents": [
                "Betacoronavirus 1",
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Deltacoronavirus",
                "Dengue Virus",
                "Eastern Equine Encephalitis Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Marburgvirus",
                "Measles virus",
                "Morbillivirus",
                "Mumps virus",
                "Norovirus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Sapovirus",
                "West Nile virus",
                "Western Equine Encephalitis Virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Influenza B virus (IBV)",
            "Influenza C virus (ICV)",
            "Human immunodeficiency virus type 1 (HIV-1)",
            "Human immunodeficiency virus type 2 (HIV-2)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Middle East respiratory syndrome coronavirus (MERS-CoV)",
            "Human coronavirus 229E (HCoV-229E)",
            "Human coronavirus NL63 (HCoV-NL63)",
            "Human coronavirus OC43 (HCoV-OC43)",
            "Human coronavirus HKU1 (HCoV-HKU1)",
            "Rabies virus (RABV)",
            "Ebola virus (EBOV)",
            "Marburg virus (MARV)",
            "Zika virus (ZIKV)",
            "Dengue virus (DENV)",
            "Yellow fever virus (YFV)",
            "West Nile virus (WNV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Crimean-Congo hemorrhagic fever virus (CCHFV)",
            "Lassa virus (LASV)",
            "Rift Valley fever virus (RVFV)",
            "Japanese encephalitis virus (JEV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "Influenza B virus",
                "HIV-1",
                "HIV-2",
                "SARS-CoV-2",
                "Middle East Respiratory Syndrome Coronavirus",
                "Human Coronavirus 229E",
                "Human Coronavirus NL63",
                "Human Coronavirus OC43",
                "Rabies virus",
                "Marburgvirus",
                "Zika Virus",
                "Dengue Virus",
                "Yellow fever virus",
                "West Nile virus",
                "Hepatitis E virus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Lassa virus",
                "Rift Valley fever virus",
                "Japanese Encephalitis Virus"
            ],
            "mismatches": [
                "Influenza C virus (ICV)",
                "Human coronavirus HKU1 (HCoV-HKU1)",
                "Ebola virus (EBOV)",
                "Hepatitis C virus (HCV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Coronavirus",
                "Canine Coronavirus",
                "Chikungunya virus",
                "Colorado tick fever virus",
                "Coronaviridae",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Erbovirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Coronavirus 229E",
                "Human Coronavirus NL63",
                "Human Coronavirus OC43",
                "Human Enterovirus C",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Rabies virus",
                "Rift Valley fever virus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (FLU)",
            "Ebola virus (EBOV)",
            "Zika virus (ZIKV)",
            "Dengue virus (DENV)",
            "West Nile virus (WNV)",
            "Hepatitis C virus (HCV)",
            "Human immunodeficiency virus (HIV) (though HIV is a retrovirus, it uses RNA as genetic material)",
            "Rabies virus (RABV)",
            "Measles virus (MV)",
            "Mumps virus (MuV)",
            "Rubella virus (RRV)",
            "Polio virus (PV)",
            "Rotavirus (RV)",
            "Norovirus (NoV)",
            "SARS-CoV-2 (though SARS-CoV-2 is a coronavirus, it uses RNA as genetic material)",
            "Hepatitis A virus (HAV)",
            "Hepatitis E virus (HEV)",
            "Yellow fever virus (YFV)",
            "Japanese encephalitis virus (JEV)",
            "Tick-borne encephalitis virus (TBEV)",
            "Foot-and-mouth disease virus (FMDV)",
            "Coxsackievirus (CV)",
            "Enterovirus (EV)",
            "Respiratory syncytial virus (RSV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Zika virus (ZIKV)",
                "Dengue virus (DENV)",
                "West Nile virus (WNV)",
                "Human immunodeficiency virus (HIV) (though HIV is a retrovirus, it uses RNA as genetic material)",
                "Rabies virus (RABV)",
                "Measles virus (MV)",
                "Mumps virus (MuV)",
                "Rotavirus (RV)",
                "Norovirus (NoV)",
                "SARS-CoV-2 (though SARS-CoV-2 is a coronavirus, it uses RNA as genetic material)",
                "Hepatitis A virus (HAV)",
                "Hepatitis E virus (HEV)",
                "Yellow fever virus (YFV)",
                "Japanese encephalitis virus (JEV)",
                "Tick-borne encephalitis virus (TBEV)",
                "Foot-and-mouth disease virus (FMDV)",
                "Enterovirus (EV)",
                "Respiratory syncytial virus (RSV)"
            ],
            "mismatches": [
                "Influenza virus (FLU)",
                "Ebola virus (EBOV)",
                "Hepatitis C virus (HCV)",
                "Rubella virus (RRV)",
                "Polio virus (PV)",
                "Coxsackievirus (CV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Comovirus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Fabavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Hepatitis A Virus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lyssavirus",
                "Mamastrovirus",
                "Measles virus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Pestivirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Tick-Borne Encephalitis Viruses",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Human Immunodeficiency Virus (HIV)",
            "Influenza Virus A (Influenza A)",
            "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
            "Hepatitis C Virus (HCV)",
            "West Nile Virus (WNV)",
            "Zika Virus (ZIKV)",
            "Ebola Virus (EBOV)",
            "Rabies Virus (RABV)",
            "Norovirus (NoV)",
            "Rotavirus (Rotavirus)",
            "Measles Virus (MeV)",
            "Mumps Virus (MuV)",
            "Rubella Virus (RuV)",
            "Polio Virus (PV)",
            "Venezuelan Equine Encephalitis Virus (VEEV)",
            "Japanese Encephalitis Virus (JEV)",
            "Dengue Virus (DENV)",
            "Chikungunya Virus (CHIKV)",
            "Yellow Fever Virus (YFV)",
            "Human Respiratory Syncytial Virus (HRSV)",
            "Human Metapneumovirus (hMPV)",
            "Human Parechovirus (HPeV)",
            "Marburg Virus (MARV)",
            "Lassa Virus (LASV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Human Immunodeficiency Virus (HIV)",
                "Influenza Virus A (Influenza A)",
                "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
                "West Nile Virus (WNV)",
                "Zika Virus (ZIKV)",
                "Ebola Virus (EBOV)",
                "Rabies Virus (RABV)",
                "Norovirus (NoV)",
                "Rotavirus (Rotavirus)",
                "Measles Virus (MeV)",
                "Mumps Virus (MuV)",
                "Rubella Virus (RuV)",
                "Polio Virus (PV)",
                "Venezuelan Equine Encephalitis Virus (VEEV)",
                "Japanese Encephalitis Virus (JEV)",
                "Dengue Virus (DENV)",
                "Chikungunya Virus (CHIKV)",
                "Yellow Fever Virus (YFV)",
                "Human Respiratory Syncytial Virus (HRSV)"
            ],
            "mismatches": [
                "Hepatitis C Virus (HCV)",
                "Human Metapneumovirus (hMPV)",
                "Human Parechovirus (HPeV)",
                "Marburg Virus (MARV)",
                "Lassa Virus (LASV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Eastern Equine Encephalitis Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "Giardiavirus",
                "H1N1 Subtype Influenza A Virus",
                "H1N2 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Metapneumovirus",
                "Morbillivirus",
                "Mumps virus",
                "Murine pneumonia virus",
                "Norovirus",
                "Norwalk virus",
                "Parechovirus",
                "Picornaviridae",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Transmissible gastroenteritis virus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Hepatitis C virus (HCV)",
            "Human Immunodeficiency Virus (HIV)",
            "SARS-CoV-2",
            "Ebola virus",
            "Zika virus",
            "Dengue virus (DENV)",
            "West Nile virus (WNV)",
            "Rabies virus",
            "Measles virus",
            "Mumps virus",
            "Rubella virus",
            "Poliovirus",
            "Rhinovirus",
            "Yellow fever virus",
            "Norovirus",
            "Hepatitis E virus (HEV)",
            "Hepatitis A virus (HAV)",
            "Respiratory Syncytial Virus (RSV)",
            "Chikungunya virus",
            "Lassa virus",
            "Crimean-Congo Hemorrhagic Fever virus (CCHFV)",
            "Marburg virus",
            "Nipah virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "SARS-CoV-2",
                "Zika virus",
                "Dengue virus (DENV)",
                "West Nile virus (WNV)",
                "Rabies virus",
                "Measles virus",
                "Mumps virus",
                "Rubella virus",
                "Poliovirus",
                "Rhinovirus",
                "Yellow fever virus",
                "Norovirus",
                "Hepatitis E virus (HEV)",
                "Hepatitis A virus (HAV)",
                "Chikungunya virus",
                "Lassa virus",
                "Crimean-Congo Hemorrhagic Fever virus (CCHFV)",
                "Nipah virus"
            ],
            "mismatches": [
                "Influenza A virus (IAV)",
                "Hepatitis C virus (HCV)",
                "Human Immunodeficiency Virus (HIV)",
                "Ebola virus",
                "Respiratory Syncytial Virus (RSV)",
                "Marburg virus"
            ],
            "true_referents": [
                "Arenavirus",
                "Betacoronavirus 1",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Chikungunya virus",
                "Colorado tick fever virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Hepatitis A Virus",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Morbillivirus",
                "Mumps virus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "HIV (Human Immunodeficiency Virus)",
            "Influenza Virus (IAV)",
            "SARS-CoV-2 (Severe Acute Respiratory Syndrome Coronavirus 2)",
            "Ebola Virus (EBOV)",
            "Zika Virus (ZIKV)",
            "Dengue Virus (DENV)",
            "West Nile Virus (WNV)",
            "Hepatitis C Virus (HCV)",
            "Measles Virus (MV)",
            "Mumps Virus (MuV)",
            "Rabies Virus (RABV)",
            "Poliovirus (PV)",
            "Togavirus (Togaviridae)",
            "Newcastle Disease Virus (NDV)",
            "Avian Influenza Virus (AIV)",
            "Lassa Fever Virus (LASV)",
            "Yellow Fever Virus (YFV)",
            "Chikungunya Virus (CHIKV)",
            "Sindbis Virus (SINV)",
            "Borna Disease Virus (BDV)",
            "Human Respiratory Syncytial Virus (HRSV)",
            "Feline Immunodeficiency Virus (FIV)",
            "Canine Parvovirus (CPV)",
            "Rift Valley Fever Virus (RVFV)",
            "Hantavirus (Sin Nombre Virus)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV (Human Immunodeficiency Virus)",
                "SARS-CoV-2 (Severe Acute Respiratory Syndrome Coronavirus 2)",
                "Ebola Virus (EBOV)",
                "Zika Virus (ZIKV)",
                "Dengue Virus (DENV)",
                "West Nile Virus (WNV)",
                "Measles Virus (MV)",
                "Mumps Virus (MuV)",
                "Rabies Virus (RABV)",
                "Poliovirus (PV)",
                "Newcastle Disease Virus (NDV)",
                "Yellow Fever Virus (YFV)",
                "Chikungunya Virus (CHIKV)",
                "Sindbis Virus (SINV)",
                "Borna Disease Virus (BDV)",
                "Human Respiratory Syncytial Virus (HRSV)",
                "Feline Immunodeficiency Virus (FIV)",
                "Rift Valley Fever Virus (RVFV)"
            ],
            "mismatches": [
                "Influenza Virus (IAV)",
                "Hepatitis C Virus (HCV)",
                "Togavirus (Togaviridae)",
                "Avian Influenza Virus (AIV)",
                "Lassa Fever Virus (LASV)",
                "Canine Parvovirus (CPV)",
                "Hantavirus (Sin Nombre Virus)"
            ],
            "true_referents": [
                "Arenavirus",
                "Avian Encephalomyelitis Virus",
                "Bluetongue virus",
                "Borna disease virus",
                "Bornaviridae",
                "Bovine Immunodeficiency Virus",
                "Bovine Respiratory Syncytial Virus",
                "Canine Distemper Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Feline Immunodeficiency Virus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Isavirus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Measles virus",
                "Mumps virus",
                "Newcastle disease virus",
                "Nodaviridae",
                "Parechovirus",
                "Pestivirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Sindbis Virus",
                "Tobamovirus",
                "Togaviridae",
                "Totivirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (IV)",
            "Hepatitis C virus (HCV)",
            "Human Immunodeficiency Virus (HIV)",
            "Zika virus (ZIKV)",
            "Dengue virus (DENV)",
            "Rabies virus (RABV)",
            "Measles virus (MV)",
            "Mumps virus (MuV)",
            "Poliovirus (PV)",
            "Rotavirus (RV)",
            "Marburg virus (MARV)",
            "Ebola virus (EBOV)",
            "West Nile virus (WNV)",
            "Yellow Fever virus (YFV)",
            "Respiratory Syncytial Virus (RSV)",
            "Severe Acute Respiratory Syndrome Coronavirus (SARS-CoV)",
            "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
            "Human coronavirus 229E (HCoV-229E)",
            "Human coronavirus OC43 (HCoV-OC43)",
            "Human coronavirus NL63 (HCoV-NL63)",
            "Human coronavirus HKU1 (HCoV-HKU1)",
            "Norovirus (NoV)",
            "Foot-and-mouth disease virus (FMDV)",
            "Newcastle disease virus (NDV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "Zika Virus",
                "Dengue Virus",
                "Rabies virus",
                "Measles virus",
                "Mumps virus",
                "Poliovirus",
                "Rotavirus",
                "Marburgvirus",
                "West Nile virus",
                "Yellow fever virus",
                "Human Coronavirus 229E",
                "Human Coronavirus NL63",
                "Human Coronavirus OC43",
                "Middle East Respiratory Syndrome Coronavirus",
                "Foot-and-Mouth Disease Virus",
                "Newcastle disease virus"
            ],
            "mismatches": [
                "Influenza virus (IV)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Respiratory Syncytial Virus (RSV)",
                "Severe Acute Respiratory Syndrome Coronavirus (SARS-CoV)",
                "Human coronavirus HKU1 (HCoV-HKU1)",
                "Norovirus (NoV)"
            ],
            "true_referents": [
                "Bovine Coronavirus",
                "Bovine Respiratory Syncytial Virus",
                "Canine Coronavirus",
                "Coronaviridae",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Fabavirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Coronavirus 229E",
                "Human Coronavirus NL63",
                "Human Coronavirus OC43",
                "Human Parainfluenza Virus 4",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Marburgvirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Newcastle disease virus",
                "Nodaviridae",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Pestivirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus",
            "HIV",
            "Ebola virus",
            "Zika virus",
            "SARS-CoV-2",
            "Hepatitis C virus",
            "Measles virus",
            "Rabies virus",
            "Poliovirus",
            "Mumps virus",
            "Dengue virus",
            "West Nile virus",
            "Yellow fever virus",
            "Hantavirus",
            "Norovirus",
            "Rhinovirus",
            "Coronavirus",
            "Hepatitis A virus",
            "Rotavirus",
            "Japanese encephalitis virus",
            "Chikungunya virus",
            "Lassa virus",
            "Marburg virus",
            "Rift Valley fever virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "Zika virus",
                "SARS-CoV-2",
                "Measles virus",
                "Rabies virus",
                "Poliovirus",
                "Mumps virus",
                "Dengue virus",
                "West Nile virus",
                "Yellow fever virus",
                "Norovirus",
                "Rhinovirus",
                "Coronavirus",
                "Hepatitis A virus",
                "Rotavirus",
                "Chikungunya virus",
                "Lassa virus",
                "Rift Valley fever virus"
            ],
            "mismatches": [
                "Influenza virus",
                "Ebola virus",
                "Hepatitis C virus",
                "Hantavirus",
                "Japanese encephalitis virus",
                "Marburg virus"
            ],
            "true_referents": [
                "Arenavirus",
                "Betacoronavirus 1",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "Giardiavirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Human Hepatitis A Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Morbillivirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "SARS-CoV-2",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Influenza B virus",
            "Influenza C virus",
            "Human Immunodeficiency Virus 1 (HIV-1)",
            "Human Immunodeficiency Virus 2 (HIV-2)",
            "SARS-CoV-2",
            "SARS-CoV",
            "MERS-CoV",
            "Ebola virus",
            "Marburg virus",
            "Rabies virus",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Zika virus",
            "Dengue virus",
            "Chikungunya virus",
            "West Nile virus (WNV)",
            "Norovirus",
            "Rotavirus",
            "Poliovirus",
            "Rhinovirus",
            "Respiratory Syncytial Virus (RSV)",
            "Measles virus",
            "Mumps virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza B virus",
                "HIV-1",
                "HIV-2",
                "SARS-CoV-2",
                "Marburgvirus",
                "Rabies virus",
                "Hepatitis E virus",
                "Zika Virus",
                "Dengue Virus",
                "Chikungunya virus",
                "Norovirus",
                "Rotavirus",
                "Poliovirus",
                "Rhinovirus",
                "Measles virus",
                "Mumps virus"
            ],
            "mismatches": [
                "Influenza A virus (IAV)",
                "Influenza C virus",
                "SARS-CoV",
                "MERS-CoV",
                "Ebola virus",
                "Hepatitis C virus (HCV)",
                "West Nile virus (WNV)",
                "Respiratory Syncytial Virus (RSV)"
            ],
            "true_referents": [
                "Betacoronavirus 1",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "GB virus B",
                "GB virus C",
                "Giardiavirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus C",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rotavirus",
                "Rubella virus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Zika Virus"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Human immunodeficiency virus (HIV)",
            "Influenza virus",
            "Ebola virus",
            "Zika virus",
            "Measles virus",
            "Rabies virus",
            "Hepatitis C virus (HCV)",
            "West Nile virus",
            "Dengue virus",
            "Yellow fever virus",
            "Poliovirus",
            "Rhinovirus",
            "Coronavirus (including SARS-CoV-2)",
            "Rotavirus",
            "Norovirus",
            "Hantavirus",
            "Chikungunya virus",
            "Mumps virus",
            "Rubella virus",
            "Respiratory syncytial virus (RSV)",
            "Enterovirus",
            "Nipah virus",
            "Rift Valley fever virus",
            "Lassa virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Measles virus",
                "Rabies virus",
                "West Nile virus",
                "Dengue virus",
                "Yellow fever virus",
                "Poliovirus",
                "Rhinovirus",
                "Rotavirus",
                "Norovirus",
                "Chikungunya virus",
                "Mumps virus",
                "Rubella virus",
                "Enterovirus",
                "Nipah virus",
                "Rift Valley fever virus",
                "Lassa virus"
            ],
            "mismatches": [
                "Human immunodeficiency virus (HIV)",
                "Influenza virus",
                "Ebola virus",
                "Hepatitis C virus (HCV)",
                "Coronavirus (including SARS-CoV-2)",
                "Hantavirus",
                "Respiratory syncytial virus (RSV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "Giardiavirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 16,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Human immunodeficiency virus (HIV)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Ebola virus (EBOV)",
            "Measles morbillivirus (MeV)",
            "Rabies lyssavirus (RABV)",
            "Hepatitis C virus (HCV)",
            "Zika virus (ZIKV)",
            "West Nile virus (WNV)",
            "Dengue virus (DENV)",
            "Yellow fever virus (YFV)",
            "Poliovirus (PV)",
            "Rhinovirus (RV)",
            "Norovirus (NoV)",
            "Respiratory syncytial virus (RSV)",
            "Rotavirus (RV)",
            "Rubella virus (RuV)",
            "Mumps orthorubulavirus (MuV)",
            "Chikungunya virus (CHIKV)",
            "Venezuelan equine encephalitis virus (VEEV)",
            "Lassa mammarenavirus (LASV)",
            "Crimean-Congo hemorrhagic fever orthonairovirus (CCHFV)",
            "Nipah henipavirus (NiV)",
            "Hendra henipavirus (HeV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "HIV",
                "SARS-CoV-2",
                "Measles virus",
                "Rabies virus",
                "Zika Virus",
                "West Nile virus",
                "Dengue Virus",
                "Yellow fever virus",
                "Poliovirus",
                "Rhinovirus",
                "Norovirus",
                "Rotavirus",
                "Rubella virus",
                "Mumps virus",
                "Chikungunya virus",
                "Venezuelan Equine Encephalitis Virus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Nipah Virus",
                "Hendra Virus"
            ],
            "mismatches": [
                "Ebola virus (EBOV)",
                "Hepatitis C virus (HCV)",
                "Respiratory syncytial virus (RSV)",
                "Lassa mammarenavirus (LASV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Colorado tick fever virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Eastern Equine Encephalitis Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Mammalian Orthoreovirus",
                "Measles virus",
                "Morbillivirus",
                "Mumps virus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Orthomyxoviridae",
                "Ovine-Caprine Lentiviruses",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Venezuelan Equine Encephalitis Virus",
                "Vesiculovirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (e.g. H1N1, H3N2)",
            "Hepatitis C virus (HCV)",
            "Zika virus (ZIKV)",
            "Ebola virus (EBOV)",
            "Human immunodeficiency virus (HIV)",
            "Measles virus (MeV)",
            "Rabies virus (RABV)",
            "Poliovirus (PV)",
            "Dengue virus (DENV)",
            "Chikungunya virus (CHIKV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
            "Middle East respiratory syndrome coronavirus (MERS-CoV)",
            "Rhinovirus (RV)",
            "Hepatitis E virus (HEV)",
            "Norovirus (NoV)",
            "Enterovirus (EV)",
            "Rotavirus (RV)",
            "Respiratory syncytial virus (RSV)",
            "Hantavirus (HTNV)",
            "Coronavirus disease 2019 (COVID-19)",
            "Hepatitis A virus (HAV)",
            "Hepatitis B virus (HBV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Zika virus (ZIKV)",
                "Measles virus (MeV)",
                "Rabies virus (RABV)",
                "Poliovirus (PV)",
                "Dengue virus (DENV)",
                "Chikungunya virus (CHIKV)",
                "West Nile virus (WNV)",
                "Yellow fever virus (YFV)",
                "Rhinovirus (RV)",
                "Hepatitis E virus (HEV)",
                "Norovirus (NoV)",
                "Enterovirus (EV)",
                "Rotavirus (RV)",
                "Hepatitis A virus (HAV)"
            ],
            "mismatches": [
                "Influenza virus (e.g. H1N1, H3N2)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Human immunodeficiency virus (HIV)",
                "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
                "Middle East respiratory syndrome coronavirus (MERS-CoV)",
                "Respiratory syncytial virus (RSV)",
                "Hantavirus (HTNV)",
                "Coronavirus disease 2019 (COVID-19)",
                "Hepatitis B virus (HBV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Flavivirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Hepatitis A Virus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (Flu)",
            "Hepatitis C virus (HCV)",
            "Human immunodeficiency virus (HIV)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Zika virus (ZIKV)",
            "Ebola virus (EBOV)",
            "Measles virus (MeV)",
            "Poliovirus (PV)",
            "Dengue virus (DENV)",
            "Rabies virus (RABV)",
            "Coronavirus (CoV)",
            "Rhinovirus (RV)",
            "Norovirus (NoV)",
            "Rotavirus (RoV)",
            "Respiratory syncytial virus (RSV)",
            "Enterovirus (EV)",
            "Adenovirus (AdV)",
            "Parvovirus (PPV)",
            "Papillomavirus (HPV)",
            "Hantavirus (HTNV)",
            "Chikungunya virus (CHIKV)",
            "Hepatitis A virus (HAV)",
            "Hepatitis E virus (HEV)",
            "Rubella virus (RUBV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Human immunodeficiency virus (HIV)",
                "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
                "Zika virus (ZIKV)",
                "Measles virus (MeV)",
                "Poliovirus (PV)",
                "Dengue virus (DENV)",
                "Rabies virus (RABV)",
                "Coronavirus (CoV)",
                "Rhinovirus (RV)",
                "Norovirus (NoV)",
                "Rotavirus (RoV)",
                "Respiratory syncytial virus (RSV)",
                "Enterovirus (EV)",
                "Chikungunya virus (CHIKV)",
                "Hepatitis A virus (HAV)",
                "Hepatitis E virus (HEV)",
                "Rubella virus (RUBV)"
            ],
            "mismatches": [
                "Influenza virus (Flu)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Adenovirus (AdV)",
                "Parvovirus (PPV)",
                "Papillomavirus (HPV)",
                "Hantavirus (HTNV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Deltacoronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "Gammaretrovirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Hepatitis A Virus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Parechovirus",
                "Pestivirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Sapovirus",
                "Severe acute respiratory syndrome-related coronavirus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "HIV (Human Immunodeficiency Virus)",
            "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
            "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
            "Ebola virus",
            "Marburg virus",
            "Lassa virus",
            "Rabies virus",
            "Hantavirus",
            "Nipah virus",
            "Henipavirus",
            "Coronavirus",
            "SARS-CoV-2 (COVID-19 virus)",
            "Influenza virus",
            "Rotavirus",
            "Norovirus",
            "Enterovirus",
            "Reovirus",
            "Poliovirus",
            "Retrovirus",
            "HTLV (Human T-lymphotropic virus)",
            "HTLV-1",
            "HTLV-2",
            "HTLV-3",
            "HTLV-4",
            "HTLV-5",
            "HTLV-6"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV (Human Immunodeficiency Virus)",
                "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Lassa virus",
                "Rabies virus",
                "Nipah virus",
                "Henipavirus",
                "Coronavirus",
                "SARS-CoV-2 (COVID-19 virus)",
                "Rotavirus",
                "Norovirus",
                "Enterovirus",
                "Poliovirus",
                "HTLV-1",
                "HTLV-2",
                "HTLV-3"
            ],
            "mismatches": [
                "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Ebola virus",
                "Marburg virus",
                "Hantavirus",
                "Influenza virus",
                "Reovirus",
                "Retrovirus",
                "HTLV (Human T-lymphotropic virus)",
                "HTLV-4",
                "HTLV-5",
                "HTLV-6"
            ],
            "true_referents": [
                "Arenavirus",
                "Cardiovirus",
                "Coronavirus",
                "Ebolavirus",
                "Endogenous Retroviruses",
                "Enterovirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Foot-and-Mouth Disease Virus",
                "Giardiavirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human T-lymphotropic virus 1",
                "Human T-lymphotropic virus 2",
                "Human T-lymphotropic virus 3",
                "Influenza A virus",
                "Influenza B virus",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Poliovirus",
                "Primate T-lymphotropic virus 1",
                "Primate T-lymphotropic virus 2",
                "Primate T-lymphotropic virus 3",
                "Rabies virus",
                "Reoviridae",
                "Respirovirus",
                "Retroviridae",
                "Rotavirus",
                "Rubella virus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Zika Virus"
            ],
            "TP": 15,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Hepatitis A virus (HAV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Poliovirus (PV)",
            "Rhinovirus (RV)",
            "Coronavirus (CoV)",
            "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
            "Middle East respiratory syndrome coronavirus (MERS-CoV)",
            "Human immunodeficiency virus 1 (HIV-1)",
            "Human immunodeficiency virus 2 (HIV-2)",
            "Simian immunodeficiency virus (SIV)",
            "Feline immunodeficiency virus (FIV)",
            "Influenza A virus",
            "Influenza B virus",
            "Influenza C virus",
            "Rotavirus (RV)",
            "Norovirus (NoV)",
            "Dengue virus (DENV)",
            "Yellow fever virus (YFV)",
            "Japanese encephalitis virus (JEV)",
            "West Nile virus (WNV)",
            "Togaviridae (e.g. Rubella virus)",
            "Astrovirus (HAstV)",
            "Enterovirus (EV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hepatitis A virus (HAV)",
                "Hepatitis E virus (HEV)",
                "Poliovirus (PV)",
                "Rhinovirus (RV)",
                "Coronavirus (CoV)",
                "Middle East respiratory syndrome coronavirus (MERS-CoV)",
                "Human immunodeficiency virus 1 (HIV-1)",
                "Human immunodeficiency virus 2 (HIV-2)",
                "Simian immunodeficiency virus (SIV)",
                "Feline immunodeficiency virus (FIV)",
                "Influenza A virus",
                "Influenza B virus",
                "Rotavirus (RV)",
                "Norovirus (NoV)",
                "Dengue virus (DENV)",
                "Yellow fever virus (YFV)",
                "Japanese encephalitis virus (JEV)",
                "West Nile virus (WNV)",
                "Togaviridae (e.g. Rubella virus)",
                "Enterovirus (EV)"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
                "Influenza C virus",
                "Astrovirus (HAstV)"
            ],
            "true_referents": [
                "Astroviridae",
                "Bovine Immunodeficiency Virus",
                "Coronavirus",
                "Deltacoronavirus",
                "Dengue Virus",
                "Enterovirus",
                "Feline Immunodeficiency Virus",
                "Flavivirus",
                "GB virus B",
                "GB virus C",
                "H1N1 Subtype Influenza A Virus",
                "H1N2 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Enterovirus C",
                "Human Hepatitis A Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Middle East Respiratory Syndrome Coronavirus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Simian Immunodeficiency Virus",
                "Simian T-lymphotropic virus 1",
                "Togaviridae",
                "West Nile virus",
                "Yellow fever virus"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            {
                "Referent": "HIV",
                "Canonical Name": "human immunodeficiency virus"
            },
            {
                "Referent": "Influenza",
                "Canonical Name": "influenza virus"
            },
            {
                "Referent": "Coronavirus",
                "Canonical Name": "coronavirus"
            },
            {
                "Referent": "Rotavirus",
                "Canonical Name": "rotavirus"
            },
            {
                "Referent": "Hepatitis C virus",
                "Canonical Name": "hepatitis c virus"
            },
            {
                "Referent": "Rabies virus",
                "Canonical Name": "rabies virus"
            },
            {
                "Referent": "Papillomavirus",
                "Canonical Name": "papillomavirus"
            },
            {
                "Referent": "Hepatitis B virus",
                "Canonical Name": "hepatitis b virus"
            },
            {
                "Referent": "Hepatitis A virus",
                "Canonical Name": "hepatitis a virus"
            },
            {
                "Referent": "Ebola virus",
                "Canonical Name": "ebola virus"
            },
            {
                "Referent": "Coronavirus NL63",
                "Canonical Name": "coronavirus nl63"
            },
            {
                "Referent": "Coronavirus 229E",
                "Canonical Name": "coronavirus 229e"
            },
            {
                "Referent": "Coronavirus OC43",
                "Canonical Name": "coronavirus oc43"
            },
            {
                "Referent": "Coronavirus HKU1",
                "Canonical Name": "coronavirus hku1"
            },
            {
                "Referent": "Coronavirus MERS-CoV",
                "Canonical Name": "coronavirus mers-cov"
            },
            {
                "Referent": "Coronavirus SARS-CoV",
                "Canonical Name": "coronavirus sars-cov"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            },
            {
                "Referent": "Coronavirus SARS-CoV-2",
                "Canonical Name": "coronavirus sars-cov-2"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "Coronavirus",
                "Rotavirus",
                "Rabies virus",
                "Hepatitis A virus",
                "Human Coronavirus NL63",
                "Human Coronavirus 229E",
                "Human Coronavirus OC43",
                "SARS-CoV-2"
            ],
            "mismatches": [
                "Influenza",
                "Hepatitis C virus",
                "Papillomavirus",
                "Hepatitis B virus",
                "Ebola virus",
                "Coronavirus HKU1",
                "Coronavirus MERS-CoV",
                "Coronavirus SARS-CoV"
            ],
            "true_referents": [
                "Betacoronavirus 1",
                "Comovirus",
                "Coronaviridae",
                "Coronavirus",
                "Deltacoronavirus",
                "Ebolavirus",
                "Filoviridae",
                "Flavivirus",
                "Gammacoronavirus",
                "Giardiavirus",
                "H1N1 Subtype Influenza A Virus",
                "H7N7 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Human Coronavirus 229E",
                "Human Coronavirus NL63",
                "Human Coronavirus OC43",
                "Human Hepatitis A Virus",
                "Influenza A virus",
                "Lyssavirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Nodaviridae",
                "Picornaviridae",
                "Rabies virus",
                "Rotavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Yellow fever virus"
            ],
            "TP": 9,
            "FP": 8,
            "FN": 0
        }
    }
]