[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (Flu)",
            "Hepatitis C virus (HCV)",
            "Human immunodeficiency virus (HIV)",
            "Coronavirus (SARS-CoV-2)",
            "Rabies virus (RABV)",
            "Ebola virus (EBOV)",
            "Hepatitis B virus (HBV)",
            "Rotavirus (RV)",
            "Measles virus (MV)",
            "Mumps virus (MuV)",
            "Yellow fever virus (YFV)",
            "Sindbis virus (SINV)",
            "Rift Valley fever virus (RVFV)",
            "Hantavirus (HTNV)",
            "Lassa virus (LASV)",
            "West Nile virus (WNV)",
            "Japanese encephalitis virus (JEV)",
            "Chikungunya virus (CHIKV)",
            "Dengue virus (DENV)",
            "Poliovirus (PV)",
            "Coxsackievirus (CV)",
            "Echovirus (EchV)",
            "Norovirus (NoV)",
            "Respiratory syncytial virus (RSV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Human immunodeficiency virus (HIV)",
                "Coronavirus (SARS-CoV-2)",
                "Rabies virus (RABV)",
                "Rotavirus (RV)",
                "Measles virus (MV)",
                "Mumps virus (MuV)",
                "Yellow fever virus (YFV)",
                "Sindbis virus (SINV)",
                "Rift Valley fever virus (RVFV)",
                "Lassa virus (LASV)",
                "West Nile virus (WNV)",
                "Japanese encephalitis virus (JEV)",
                "Chikungunya virus (CHIKV)",
                "Dengue virus (DENV)",
                "Poliovirus (PV)",
                "Norovirus (NoV)"
            ],
            "mismatches": [
                "Influenza virus (Flu)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Hepatitis B virus (HBV)",
                "Hantavirus (HTNV)",
                "Coxsackievirus (CV)",
                "Echovirus (EchV)",
                "Respiratory syncytial virus (RSV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Echovirus 9",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Echovirus 6",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Isavirus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Measles virus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Sindbis Virus",
                "West Nile virus",
                "Yellow fever virus"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Influenza B virus (IBV)",
            "Influenza C virus (ICV)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Middle East respiratory syndrome coronavirus (MERS-CoV)",
            "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Norovirus",
            "Rotavirus",
            "Rhinovirus",
            "Coronaviridae (family)",
            "Picornaviridae (family)",
            "Retroviridae (family)",
            "Flaviviridae (family)",
            "Togaviridae (family)",
            "Orthomyxoviridae (family)",
            "Rhabdoviridae (family)",
            "Paramyxoviridae (family)",
            "Caliciviridae (family)",
            "Astroviridae (family)",
            "Bunyavirales (order)",
            "Filoviridae (family)",
            "Orthoreovirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "Influenza B virus",
                "SARS-CoV-2",
                "Middle East Respiratory Syndrome Coronavirus",
                "Hepatitis E virus",
                "Norovirus",
                "Rotavirus",
                "Rhinovirus",
                "Coronaviridae",
                "Picornaviridae",
                "Retroviridae",
                "Flaviviridae",
                "Togaviridae",
                "Orthomyxoviridae",
                "Rhabdoviridae",
                "Paramyxoviridae",
                "Caliciviridae",
                "Astroviridae",
                "Filoviridae",
                "Orthoreovirus"
            ],
            "mismatches": [
                "Influenza C virus (ICV)",
                "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
                "Hepatitis C virus (HCV)",
                "Bunyavirales (order)"
            ],
            "true_referents": [
                "Astroviridae",
                "Avian Orthoreovirus",
                "Bunyaviridae",
                "Caliciviridae",
                "Closteroviridae",
                "Coronaviridae",
                "Coronavirus",
                "Enterovirus",
                "Filoviridae",
                "Flaviviridae",
                "Flavivirus",
                "Flexiviridae",
                "Giardiavirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus C",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Mammalian Orthoreovirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Nodaviridae",
                "Norovirus",
                "Norwalk virus",
                "Orthomyxoviridae",
                "Orthoreovirus",
                "Paramyxoviridae",
                "Paramyxovirinae",
                "Picornaviridae",
                "Pneumovirus",
                "Reoviridae",
                "Retroviridae",
                "Rhabdoviridae",
                "Rhinovirus",
                "Rotavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Togaviridae",
                "Totiviridae"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Influenza B virus (IBV)",
            "Influenza C virus (ICV)",
            "Respiratory syncytial virus (RSV)",
            "Measles virus (MeV)",
            "Mumps virus (MuV)",
            "Rubella virus (RuV)",
            "Polio virus (PV)",
            "Coxsackievirus",
            "Echovirus",
            "Norovirus",
            "Rotavirus",
            "Dengue virus (DENV)",
            "Zika virus (ZIKV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Hepatitis C virus (HCV)",
            "Rabies virus (RABV)",
            "Ebola virus (EBOV)",
            "SARS-CoV-2",
            "SARS-CoV",
            "MERS-CoV",
            "Hepatitis E virus (HEV)",
            "Human immunodeficiency virus (HIV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "Influenza B virus",
                "Measles virus",
                "Mumps virus",
                "Rubella virus",
                "Poliovirus",
                "Norovirus",
                "Rotavirus",
                "Dengue virus",
                "Zika Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Rabies virus",
                "SARS-CoV-2",
                "Hepatitis E virus",
                "HIV"
            ],
            "mismatches": [
                "Influenza C virus (ICV)",
                "Respiratory syncytial virus (RSV)",
                "Polio virus (PV)",
                "Coxsackievirus",
                "Echovirus",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "SARS-CoV",
                "MERS-CoV",
                "Human immunodeficiency virus (HIV)"
            ],
            "true_referents": [
                "Betacoronavirus 1",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Echovirus 9",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "Giardiavirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Echovirus 6",
                "Human Enterovirus C",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lyssavirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 16,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Human Immunodeficiency Virus (HIV)",
            "Influenza A virus (IAV)",
            "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
            "Hepatitis C virus (HCV)",
            "West Nile virus (WNV)",
            "Zika virus (ZIKV)",
            "Ebola virus (EBOV)",
            "Measles virus (MeV)",
            "Mumps virus (MuV)",
            "Rubella virus (RuV)",
            "Polio virus (PV)",
            "Norovirus (NoV)",
            "Rotavirus (RotV)",
            "Venezuelan equine encephalitis virus (VEEV)",
            "Japanese encephalitis virus (JEV)",
            "Dengue virus (DENV)",
            "Yellow fever virus (YFV)",
            "Chikungunya virus (CHIKV)",
            "Rabies virus (RABV)",
            "Human respiratory syncytial virus (HRSV)",
            "Human metapneumovirus (HMPV)",
            "Foot-and-mouth disease virus (FMDV)",
            "Tomato spotted wilt virus (TSWV)",
            "Turnip mosaic virus (TuMV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Human Immunodeficiency Virus (HIV)",
                "Influenza A virus (IAV)",
                "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
                "West Nile virus (WNV)",
                "Zika virus (ZIKV)",
                "Ebola virus (EBOV)",
                "Measles virus (MeV)",
                "Mumps virus (MuV)",
                "Rubella virus (RuV)",
                "Norovirus (NoV)",
                "Rotavirus (RotV)",
                "Venezuelan equine encephalitis virus (VEEV)",
                "Japanese encephalitis virus (JEV)",
                "Dengue virus (DENV)",
                "Yellow fever virus (YFV)",
                "Chikungunya virus (CHIKV)",
                "Rabies virus (RABV)",
                "Human respiratory syncytial virus (HRSV)",
                "Foot-and-mouth disease virus (FMDV)"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Polio virus (PV)",
                "Human metapneumovirus (HMPV)",
                "Tomato spotted wilt virus (TSWV)",
                "Turnip mosaic virus (TuMV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Eastern Equine Encephalitis Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Fabavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "Giardiavirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lyssavirus",
                "Measles virus",
                "Metapneumovirus",
                "Mumps virus",
                "Murine pneumonia virus",
                "Norovirus",
                "Norwalk virus",
                "Pestivirus",
                "Picornaviridae",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Sequivirus",
                "Severe acute respiratory syndrome-related coronavirus",
                "Thogotovirus",
                "Tobacco Mosaic Virus",
                "Tobamovirus",
                "Tymovirus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Human Immunodeficiency Virus (HIV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Rabies virus (RV)",
            "Ebola virus (EBOV)",
            "Zika virus (ZIKV)",
            "Dengue virus (DENV)",
            "Norovirus (NoV)",
            "Rotavirus (RV)",
            "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
            "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
            "Rhinovirus (RV)",
            "Poliovirus (PV)",
            "Measles virus (MV)",
            "Rubella virus (RuV)",
            "Yellow fever virus (YFV)",
            "West Nile virus (WNV)",
            "Chikungunya virus (CHIKV)",
            "Respiratory Syncytial Virus (RSV)",
            "Hantavirus (HTNV)",
            "Crimean-Congo Hemorrhagic Fever Virus (CCHFV)",
            "Lassa virus (LASV)",
            "Marburg virus (MARV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "HIV",
                "Hepatitis E virus",
                "Rabies virus",
                "Zika Virus",
                "Dengue Virus",
                "Norovirus",
                "Rotavirus",
                "SARS-CoV-2",
                "Middle East Respiratory Syndrome Coronavirus",
                "Rhinovirus",
                "Poliovirus",
                "Measles virus",
                "Rubella virus",
                "Yellow fever virus",
                "West Nile virus",
                "Chikungunya virus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Lassa virus"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Respiratory Syncytial Virus (RSV)",
                "Hantavirus (HTNV)",
                "Marburg virus (MARV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Colorado tick fever virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Marburgvirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (IAV)",
            "Human Immunodeficiency Virus (HIV)",
            "Hepatitis C virus (HCV)",
            "SARS-CoV-2 (SARS2)",
            "Ebola virus (EBOV)",
            "Dengue virus (DENV)",
            "Zika virus (ZIKV)",
            "Rabies virus (RABV)",
            "Measles virus (MV)",
            "Mumps virus (MuV)",
            "Poliovirus (PV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Togaviridae family (e.g., Chikungunya virus)",
            "Norovirus (NoV)",
            "Foot-and-mouth disease virus (FMDV)",
            "Newcastle disease virus (NDV)",
            "Sindbis virus (SINDV)",
            "Hantavirus (HV)",
            "Lassa fever virus (LASV)",
            "Crimean-Congo hemorrhagic fever virus (CCHFV)",
            "Coxsackievirus (CV)",
            "Borna disease virus (BDV)",
            "Avian influenza virus (AIV)",
            "Human coronavirus (HCoV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "SARS-CoV-2",
                "Dengue Virus",
                "Zika Virus",
                "Rabies virus",
                "Measles virus",
                "Mumps virus",
                "Poliovirus",
                "West Nile virus",
                "Yellow fever virus",
                "Togaviridae",
                "Norovirus",
                "Foot-and-Mouth Disease Virus",
                "Newcastle disease virus",
                "Sindbis Virus",
                "Borna disease virus",
                "Crimean-Congo Hemorrhagic Fever Virus"
            ],
            "mismatches": [
                "Influenza virus (IAV)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Hantavirus (HV)",
                "Lassa fever virus (LASV)",
                "Coxsackievirus (CV)",
                "Avian influenza virus (AIV)",
                "Human coronavirus (HCoV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Avian Encephalomyelitis Virus",
                "Bluetongue virus",
                "Borna disease virus",
                "Bornaviridae",
                "Bunyaviridae",
                "Cardiovirus",
                "Chikungunya virus",
                "Colorado tick fever virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Fabavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "H5N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Influenza A virus",
                "Influenza B virus",
                "Isavirus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Measles virus",
                "Mumps virus",
                "Newcastle disease virus",
                "Nodaviridae",
                "Norovirus",
                "Norwalk virus",
                "Pestivirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Rubella virus",
                "SARS-CoV-2",
                "Sindbis Virus",
                "Togaviridae",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 17,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (IV)",
            "Human Immunodeficiency Virus (HIV)",
            "Hepatitis C Virus (HCV)",
            "Zika Virus (ZIKV)",
            "Ebola Virus (EBOV)",
            "Rabies Virus (RABV)",
            "Measles Virus (MV)",
            "Mumps Virus (MuV)",
            "Respiratory Syncytial Virus (RSV)",
            "Poliovirus (PV)",
            "Norovirus (NoV)",
            "Dengue Virus (DENV)",
            "West Nile Virus (WNV)",
            "Yellow Fever Virus (YFV)",
            "Marburg Virus (MARV)",
            "Lassa Fever Virus (LFV)",
            "Ross River Virus (RRV)",
            "Hepatitis E Virus (HEV)",
            "Hantavirus (HV)",
            "SARS-CoV-2 (COVID-19)",
            "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
            "Severe Acute Respiratory Syndrome Coronavirus (SARS-CoV)",
            "Human Metapneumovirus (hMPV)",
            "Rotavirus (RV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "Zika Virus",
                "Rabies virus",
                "Measles virus",
                "Mumps virus",
                "Poliovirus",
                "Norovirus",
                "Dengue Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Marburgvirus",
                "Ross River virus",
                "Hepatitis E virus",
                "SARS-CoV-2",
                "Middle East Respiratory Syndrome Coronavirus",
                "Rotavirus"
            ],
            "mismatches": [
                "Influenza virus (IV)",
                "Hepatitis C Virus (HCV)",
                "Ebola Virus (EBOV)",
                "Respiratory Syncytial Virus (RSV)",
                "Lassa Fever Virus (LFV)",
                "Hantavirus (HV)",
                "Severe Acute Respiratory Syndrome Coronavirus (SARS-CoV)",
                "Human Metapneumovirus (hMPV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Parainfluenza Virus 4",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Marburgvirus",
                "Measles virus",
                "Metapneumovirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Murine pneumonia virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Ross River virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (Flu virus)",
            "HIV (Human Immunodeficiency Virus)",
            "SARS-CoV-2 (Severe Acute Respiratory Syndrome Coronavirus 2)",
            "Ebola virus",
            "Zika virus",
            "Hepatitis C virus (HCV)",
            "Dengue virus",
            "Measles virus",
            "Poliovirus",
            "Rabies virus",
            "Hantavirus",
            "West Nile virus",
            "Yellow fever virus",
            "Hepatitis E virus (HEV)",
            "Norovirus",
            "Mumps virus",
            "Rubella virus (German measles)",
            "Rhinovirus",
            "Rotavirus",
            "Coronavirus",
            "Hepatitis A virus (HAV)",
            "Japanese encephalitis virus",
            "Lassa virus",
            "Chikungunya virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV (Human Immunodeficiency Virus)",
                "SARS-CoV-2 (Severe Acute Respiratory Syndrome Coronavirus 2)",
                "Dengue virus",
                "Measles virus",
                "Poliovirus",
                "Rabies virus",
                "West Nile virus",
                "Yellow fever virus",
                "Hepatitis E virus (HEV)",
                "Norovirus",
                "Mumps virus",
                "Rhinovirus",
                "Rotavirus",
                "Coronavirus",
                "Hepatitis A virus (HAV)",
                "Japanese encephalitis virus",
                "Lassa virus",
                "Chikungunya virus"
            ],
            "mismatches": [
                "Influenza virus (Flu virus)",
                "Ebola virus",
                "Hepatitis C virus (HCV)",
                "Hantavirus",
                "Rubella virus (German measles)"
            ],
            "true_referents": [
                "Arenavirus",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Feline Coronavirus",
                "Filoviridae",
                "Flavivirus",
                "Foot-and-Mouth Disease Virus",
                "Giardiavirus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Hepatitis A Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Rhinovirus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
            "Influenza A virus",
            "Hepatitis C virus (HCV)",
            "Ebola virus",
            "Zika virus",
            "Rabies virus",
            "Poliovirus",
            "Measles virus",
            "Respiratory Syncytial Virus (RSV)",
            "Dengue virus",
            "Norovirus",
            "Marburg virus",
            "Tick-borne encephalitis virus (TBEV)",
            "Hantavirus",
            "Chikungunya virus",
            "Rhinovirus",
            "Enterovirus D68",
            "West Nile virus",
            "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
            "Lassa virus",
            "Crimean-Congo hemorrhagic fever virus",
            "Junin virus",
            "Rift Valley fever virus",
            "La Crosse virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "SARS-CoV-2",
                "Influenza A virus",
                "Zika virus",
                "Rabies virus",
                "Poliovirus",
                "Measles virus",
                "Dengue virus",
                "Norovirus",
                "Marburgvirus",
                "Rhinovirus",
                "West Nile virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Lassa virus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Junin virus",
                "Rift Valley fever virus",
                "La Crosse virus",
                "Chikungunya virus"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Ebola virus",
                "Respiratory Syncytial Virus (RSV)",
                "Tick-borne encephalitis virus (TBEV)",
                "Hantavirus",
                "Enterovirus D68"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Enterovirus",
                "Bovine Respiratory Syncytial Virus",
                "Cardiovirus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "H1N1 Subtype Influenza A Virus",
                "H1N2 Subtype Influenza A Virus",
                "Hantaan virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Japanese Encephalitis Virus",
                "Junin virus",
                "La Crosse virus",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rubella virus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Tick-Borne Encephalitis Viruses",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Human immunodeficiency virus (HIV)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Ebola virus (EBOV)",
            "Zika virus (ZIKV)",
            "Hepatitis C virus (HCV)",
            "Measles virus (MeV)",
            "Rabies virus (RABV)",
            "Dengue virus (DENV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Rotavirus (RV)",
            "Norovirus (NoV)",
            "Poliovirus (PV)",
            "Rhinovirus (RV)",
            "Respiratory syncytial virus (RSV)",
            "Mumps virus (MuV)",
            "Chikungunya virus (CHIKV)",
            "Hantavirus (HTNV)",
            "Rift Valley fever virus (RVFV)",
            "Vesicular stomatitis virus (VSV)",
            "Rubella virus (RuV)",
            "Sindbis virus (SINV)",
            "Borna disease virus (BoDV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "HIV",
                "SARS-CoV-2",
                "Measles virus",
                "Rabies virus",
                "Dengue virus",
                "West Nile virus",
                "Yellow fever virus",
                "Rotavirus",
                "Norovirus",
                "Poliovirus",
                "Rhinovirus",
                "Mumps virus",
                "Chikungunya virus",
                "Rift Valley fever virus",
                "Rubella virus",
                "Sindbis Virus",
                "Borna disease virus"
            ],
            "mismatches": [
                "Ebola virus (EBOV)",
                "Hepatitis C virus (HCV)",
                "Respiratory syncytial virus (RSV)",
                "Hantavirus (HTNV)",
                "Vesicular stomatitis virus (VSV)"
            ],
            "true_referents": [
                "Borna disease virus",
                "Bornaviridae",
                "Bovine Respiratory Syncytial Virus",
                "Bovine Viral Diarrhea Virus 1",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Isavirus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Sindbis Virus",
                "Vesicular stomatitis Indiana virus",
                "Vesicular stomatitis New Jersey virus",
                "Vesiculovirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Human immunodeficiency virus 1 (HIV-1)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Ebola virus (EBOV)",
            "Zika virus (ZIKV)",
            "Dengue virus (DENV)",
            "West Nile virus (WNV)",
            "Measles virus (MeV)",
            "Mumps virus (MuV)",
            "Rabies virus (RABV)",
            "Respiratory syncytial virus (RSV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Norwalk virus (NV)",
            "Poliovirus (PV)",
            "Rhinovirus (RV)",
            "Yellow fever virus (YFV)",
            "Chikungunya virus (CHIKV)",
            "Venezuelan equine encephalitis virus (VEEV)",
            "Lassa virus (LASV)",
            "Nipah virus (NiV)",
            "Hantavirus (HTNV)",
            "Rotavirus (RV)",
            "Rubella virus (RuV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus (IAV)",
                "Human immunodeficiency virus 1 (HIV-1)",
                "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
                "Zika virus (ZIKV)",
                "Dengue virus (DENV)",
                "West Nile virus (WNV)",
                "Measles virus (MeV)",
                "Mumps virus (MuV)",
                "Rabies virus (RABV)",
                "Hepatitis E virus (HEV)",
                "Norwalk virus (NV)",
                "Poliovirus (PV)",
                "Rhinovirus (RV)",
                "Yellow fever virus (YFV)",
                "Chikungunya virus (CHIKV)",
                "Venezuelan equine encephalitis virus (VEEV)",
                "Lassa virus (LASV)",
                "Nipah virus (NiV)",
                "Rotavirus (RV)",
                "Rubella virus (RuV)"
            ],
            "mismatches": [
                "Ebola virus (EBOV)",
                "Respiratory syncytial virus (RSV)",
                "Hepatitis C virus (HCV)",
                "Hantavirus (HTNV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Dengue Virus",
                "Eastern Equine Encephalitis Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Newcastle disease virus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza virus (Influenza A virus, Influenza B virus)",
            "Human immunodeficiency virus (HIV)",
            "Hepatitis C virus (HCV)",
            "Ebola virus (EBOV)",
            "Zika virus (ZIKV)",
            "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
            "Middle East respiratory syndrome coronavirus (MERS-CoV)",
            "Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)",
            "Measles virus (MeV)",
            "Rabies virus (RABV)",
            "Polio virus (PV)",
            "Dengue virus (DENV)",
            "West Nile virus (WNV)",
            "Yellow fever virus (YFV)",
            "Hepatitis E virus (HEV)",
            "Human T-lymphotropic virus (HTLV)",
            "Respiratory syncytial virus (RSV)",
            "Norovirus (NoV)",
            "Rotavirus (RV)",
            "Enterovirus (EV)",
            "Rhinovirus (RV)",
            "Hepatitis D virus (HDV)",
            "Coronavirus (CoV)",
            "Retrovirus (RV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "Influenza B virus",
                "HIV",
                "Zika Virus",
                "SARS-CoV-2",
                "Measles virus",
                "Rabies virus",
                "Dengue Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Hepatitis E virus",
                "Human T-lymphotropic virus 1",
                "Human T-lymphotropic virus 2",
                "Human T-lymphotropic virus 3",
                "Respiratory Syncytial Viruses",
                "Norovirus",
                "Rotavirus",
                "Enterovirus",
                "Rhinovirus",
                "Coronavirus"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Severe acute respiratory syndrome coronavirus (SARS-CoV)",
                "Middle East respiratory syndrome coronavirus (MERS-CoV)",
                "Polio virus (PV)",
                "Hepatitis D virus (HDV)",
                "Retrovirus (RV)"
            ],
            "true_referents": [
                "Bovine Respiratory Syncytial Virus",
                "Coronavirus",
                "Deltacoronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Endogenous Retroviruses",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "Gammaretrovirus",
                "H1N2 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis Delta Virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus B",
                "Human Respiratory Syncytial Virus",
                "Human T-lymphotropic virus 1",
                "Human T-lymphotropic virus 2",
                "Human T-lymphotropic virus 3",
                "Influenza A virus",
                "Influenza B virus",
                "Lyssavirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Oleavirus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Retroviridae",
                "Rhinovirus",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 20,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Influenza A virus (IAV)",
            "Influenza B virus (IBV)",
            "Influenza C virus (ICV)",
            "Severe Acute Respiratory Syndrome Coronavirus 2 (SARS-CoV-2)",
            "Human Immunodeficiency Virus (HIV)",
            "Hepatitis C virus (HCV)",
            "Zika virus (ZIKV)",
            "Ebola virus (EBOV)",
            "Dengue virus (DENV)",
            "Chikungunya virus (CHIKV)",
            "Measles virus (MeV)",
            "Poliovirus (PV)",
            "Rabies virus (RABV)",
            "Respiratory Syncytial Virus (RSV)",
            "Hepatitis E virus (HEV)",
            "Norovirus (NoV)",
            "Enterovirus D68 (EV-D68)",
            "Coronavirus (CoV)",
            "Rubella virus (RUBV)",
            "Hantavirus (HTNV)",
            "Lassa virus (LASV)",
            "Rift Valley Fever virus (RVFV)",
            "Nipah virus (NiV)",
            "Hendra virus (HeV)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Influenza A virus",
                "Influenza B virus",
                "SARS-CoV-2",
                "HIV",
                "Zika Virus",
                "Dengue Virus",
                "Chikungunya virus",
                "Measles virus",
                "Poliovirus",
                "Rabies virus",
                "Hepatitis E virus",
                "Norovirus",
                "Coronavirus",
                "Rubella virus",
                "Lassa virus",
                "Rift Valley fever virus",
                "Nipah Virus",
                "Hendra Virus"
            ],
            "mismatches": [
                "Influenza C virus (ICV)",
                "Hepatitis C virus (HCV)",
                "Ebola virus (EBOV)",
                "Respiratory Syncytial Virus (RSV)",
                "Enterovirus D68 (EV-D68)",
                "Hantavirus (HTNV)"
            ],
            "true_referents": [
                "Arenavirus",
                "Bovine Enterovirus",
                "Bovine Respiratory Syncytial Virus",
                "Chikungunya virus",
                "Coronavirus",
                "Crimean-Congo Hemorrhagic Fever Virus",
                "Deltacoronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Erbovirus",
                "Filoviridae",
                "Flavivirus",
                "H5N1 Subtype Influenza A Virus",
                "H7N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hendra Virus",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus A",
                "Human Enterovirus C",
                "Human Respiratory Syncytial Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Lassa virus",
                "Lyssavirus",
                "Measles virus",
                "Mumps virus",
                "Nipah Virus",
                "Norovirus",
                "Norwalk virus",
                "Picornaviridae",
                "Poliovirus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Rubella virus",
                "Rubivirus",
                "Rubulavirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Human Immunodeficiency Virus (HIV) (HIV-1)",
            "SARS-CoV-2",
            "Influenza A virus",
            "Hepatitis C virus (HCV)",
            "Ebola virus",
            "Marburg virus",
            "Rabies virus",
            "Hantavirus",
            "Lassa fever virus",
            "Coronavirus (SARS-CoV)",
            "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
            "Norovirus",
            "Rotavirus",
            "Respiratory Syncytial Virus (RSV)",
            "Human T-lymphotropic virus (HTLV-1)",
            "Epstein-Barr virus (EBV)",
            "Herpes simplex virus (HSV-1)",
            "Cytomegalovirus (CMV)",
            "Hepatitis B virus (HBV)",
            "West Nile virus",
            "Dengue virus",
            "Zika virus",
            "Yellow fever virus",
            "Rift Valley fever virus",
            "Chikungunya virus",
            "Japanese encephalitis virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "SARS-CoV-2",
                "Influenza A virus",
                "Norovirus",
                "Rotavirus",
                "West Nile virus",
                "Dengue virus",
                "Zika virus",
                "Yellow fever virus",
                "Rift Valley fever virus"
            ],
            "mismatches": [
                "Human Immunodeficiency Virus (HIV) (HIV-1)",
                "Hepatitis C virus (HCV)",
                "Ebola virus",
                "Marburg virus",
                "Rabies virus",
                "Hantavirus",
                "Lassa fever virus",
                "Coronavirus (SARS-CoV)",
                "Middle East Respiratory Syndrome Coronavirus (MERS-CoV)",
                "Respiratory Syncytial Virus (RSV)",
                "Human T-lymphotropic virus (HTLV-1)",
                "Epstein-Barr virus (EBV)",
                "Herpes simplex virus (HSV-1)",
                "Cytomegalovirus (CMV)",
                "Hepatitis B virus (HBV)",
                "Chikungunya virus",
                "Japanese encephalitis virus"
            ],
            "true_referents": [
                "Arenavirus",
                "Betacoronavirus 1",
                "Bovine Respiratory Syncytial Virus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "Giardiavirus",
                "H1N1 Subtype Influenza A Virus",
                "H1N2 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hantaan virus",
                "Hepatitis A virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Respiratory Syncytial Virus",
                "Human T-lymphotropic virus 1",
                "Human T-lymphotropic virus 2",
                "Human T-lymphotropic virus 3",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Mamastrovirus",
                "Marburgvirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Norovirus",
                "Norwalk virus",
                "Rabies virus",
                "Respiratory Syncytial Viruses",
                "Rift Valley fever virus",
                "Rotavirus",
                "Rubella virus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "Venezuelan Equine Encephalitis Virus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 9,
            "FP": 17,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            "Hepatitis A virus (HAV)",
            "Hepatitis C virus (HCV)",
            "Hepatitis E virus (HEV)",
            "Influenza A virus",
            "Influenza B virus",
            "Influenza C virus",
            "Human immunodeficiency virus 1 (HIV-1)",
            "Human immunodeficiency virus 2 (HIV-2)",
            "SARS-CoV-2",
            "MERS-CoV",
            "SARS-CoV",
            "Ebola virus",
            "Marburg virus",
            "Lassa virus",
            "Dengue virus",
            "Yellow fever virus",
            "West Nile virus",
            "Japanese encephalitis virus",
            "Rabies virus",
            "Measles virus",
            "Mumps virus",
            "Rubella virus",
            "Poliovirus",
            "Rhinovirus",
            "Enterovirus 71"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Influenza A virus",
                "Influenza B virus",
                "HIV-1",
                "HIV-2",
                "SARS-CoV-2",
                "Lassa virus",
                "Dengue virus",
                "Yellow fever virus",
                "West Nile virus",
                "Japanese encephalitis virus",
                "Rabies virus",
                "Measles virus",
                "Mumps virus",
                "Rubella virus",
                "Poliovirus",
                "Rhinovirus"
            ],
            "mismatches": [
                "Hepatitis C virus (HCV)",
                "Influenza C virus",
                "MERS-CoV",
                "SARS-CoV",
                "Ebola virus",
                "Marburg virus",
                "Enterovirus 71"
            ],
            "true_referents": [
                "Arenavirus",
                "Betacoronavirus 1",
                "Cardiovirus",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Enterovirus",
                "Filoviridae",
                "Flavivirus",
                "GB virus B",
                "GB virus C",
                "H1N1 Subtype Influenza A Virus",
                "H1N2 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Hepevirus",
                "Human Enterovirus C",
                "Human Hepatitis A Virus",
                "Infectious bronchitis virus",
                "Influenza A virus",
                "Influenza B virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Lagovirus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Measles virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Morbillivirus",
                "Mumps virus",
                "Pneumovirus",
                "Poliovirus",
                "Rabies virus",
                "Rhinovirus",
                "Rubella virus",
                "Rubivirus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Yellow fever virus",
                "Zika Virus"
            ],
            "TP": 18,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "RNA virus",
        "domain": "medicine",
        "response": [
            {
                "Referent": "HIV (Human Immunodeficiency Virus)",
                "Canonical Name": "HIV"
            },
            {
                "Referent": "Influenza A Virus (Flu A)",
                "Canonical Name": "Flu A"
            },
            {
                "Referent": "Hepatitis C Virus (HCV)",
                "Canonical Name": "HCV"
            },
            {
                "Referent": "Coronavirus (CoV)",
                "Canonical Name": "CoV"
            },
            {
                "Referent": "Coronavirus Disease 2019 (COVID-19)",
                "Canonical Name": "COVID-19"
            },
            {
                "Referent": "Ebola Virus (EBOV)",
                "Canonical Name": "EBOV"
            },
            {
                "Referent": "Marburg Virus (MARV)",
                "Canonical Name": "MARV"
            },
            {
                "Referent": "Rabies Virus (RABV)",
                "Canonical Name": "RABV"
            },
            {
                "Referent": "Hepatitis E Virus (HEV)",
                "Canonical Name": "HEV"
            },
            {
                "Referent": "West Nile Virus (WNV)",
                "Canonical Name": "WNV"
            },
            {
                "Referent": "Dengue Virus (DENV)",
                "Canonical Name": "DENV"
            },
            {
                "Referent": "Zika Virus (ZIKV)",
                "Canonical Name": "ZIKV"
            },
            {
                "Referent": "Lassa Virus (LASV)",
                "Canonical Name": "LASV"
            },
            {
                "Referent": "Nipah Virus (NiV)",
                "Canonical Name": "NiV"
            },
            {
                "Referent": "Coronavirus HKU1 (CoV HKU1)",
                "Canonical Name": "CoV HKU1"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            },
            {
                "Referent": "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Canonical Name": "SARS-CoV"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            },
            {
                "Referent": "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Canonical Name": "SARS-CoV"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            },
            {
                "Referent": "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Canonical Name": "SARS-CoV"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            },
            {
                "Referent": "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Canonical Name": "SARS-CoV"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            },
            {
                "Referent": "SARS-CoV (Severe Acute Respiratory Syndrome Coronavirus)",
                "Canonical Name": "SARS-CoV"
            },
            {
                "Referent": "MERS-CoV (Middle East Respiratory Syndrome Coronavirus)",
                "Canonical Name": "MERS-CoV"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "HIV",
                "Coronavirus",
                "Rabies virus",
                "Marburgvirus",
                "Hepatitis E virus",
                "West Nile virus",
                "Dengue Virus",
                "Zika Virus",
                "Lassa virus",
                "Nipah Virus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Severe acute respiratory syndrome-related coronavirus"
            ],
            "mismatches": [
                "Influenza A Virus (Flu A)",
                "Hepatitis C Virus (HCV)",
                "Coronavirus Disease 2019 (COVID-19)",
                "Ebola Virus (EBOV)",
                "Coronavirus HKU1 (CoV HKU1)"
            ],
            "true_referents": [
                "Arenaviridae",
                "Arenavirus",
                "Betacoronavirus 1",
                "Coronavirus",
                "Dengue Virus",
                "Ebolavirus",
                "Filoviridae",
                "Flaviviridae",
                "Flavivirus",
                "Gammacoronavirus",
                "H1N1 Subtype Influenza A Virus",
                "H5N1 Subtype Influenza A Virus",
                "HIV",
                "HIV-1",
                "HIV-2",
                "Henipavirus",
                "Hepatitis A virus",
                "Hepatitis E virus",
                "Hepatovirus",
                "Human Hepatitis A Virus",
                "Influenza A virus",
                "Lassa virus",
                "Lyssavirus",
                "Marburgvirus",
                "Middle East Respiratory Syndrome Coronavirus",
                "Nidovirales",
                "Nipah Virus",
                "Rabies virus",
                "SARS-CoV-2",
                "Severe acute respiratory syndrome-related coronavirus",
                "West Nile virus",
                "Zika Virus"
            ],
            "TP": 12,
            "FP": 5,
            "FN": 0
        }
    }
]