[
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Mammalian orthoreovirus 3",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Mammalian orthoreovirus 3",
                "Picobirnavirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Human Enterovirus B",
            "Chikungunya virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Human Enterovirus B",
                "Chikungunya virus"
            ],
            "mismatches": [
                "Picobirnavirus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 3,
            "FP": 1,
            "FN": 5
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Picobirnavirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "phi-v4",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Infectious pancreatic necrosis virus",
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Human Enterovirus B",
            "St. Louis Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "Ross River virus",
            "O'nyong-nyong Virus",
            "Palyam Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Infectious pancreatic necrosis virus",
                "La Crosse virus",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Palyam Virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 6,
            "FP": 5,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Infectious pancreatic necrosis virus",
            "Japanese Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "Chikungunya virus",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Infectious pancreatic necrosis virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 1,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses",
            "Palyam Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [
                "Palyam Virus"
            ],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus"
            ],
            "mismatches": [
                "Equartevirus"
            ],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 6,
            "FP": 1,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "gemma-3",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Infectious pancreatic necrosis virus",
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Infectious bursal disease virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Chikungunya virus",
            "Bluetongue virus",
            "Ross River virus",
            "O'nyong-nyong Virus",
            "Palyam Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus",
                "Equartevirus"
            ],
            "mismatches": [
                "Infectious pancreatic necrosis virus",
                "La Crosse virus",
                "St. Louis Encephalitis Virus",
                "Infectious bursal disease virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Bluetongue virus",
                "Palyam Virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 8,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus",
            "Palyam Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus",
                "Palyam Virus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "Ross River virus",
            "O'nyong-nyong Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus",
                "Equartevirus"
            ],
            "mismatches": [
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 8,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "La Crosse virus",
            "Coltivirus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus"
            ],
            "mismatches": [
                "La Crosse virus"
            ],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 7,
            "FP": 1,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4o-mini",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Chikungunya virus",
            "Bluetongue virus",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Bluetongue virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 5,
            "FP": 1,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "West Nile virus",
            "Eastern Equine Encephalitis Virus",
            "Western Equine Encephalitis Virus",
            "California Encephalitis Virus",
            "La Crosse virus",
            "Murray Valley Encephalitis Virus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "West Nile virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "La Crosse virus",
                "Murray Valley Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [
                "Western Equine Encephalitis Virus"
            ],
            "true_referents": [
                "California Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "La Crosse virus",
                "West Nile virus",
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "TP": 7,
            "FP": 1,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "African Horse Sickness Virus",
            "Mammalian Orthoreovirus",
            "Totivirus",
            "Avibirnavirus",
            "Bluetongue virus",
            "Orbivirus",
            "Rotavirus",
            "Orthoreovirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "African Horse Sickness Virus",
                "Mammalian Orthoreovirus",
                "Totivirus",
                "Avibirnavirus",
                "Bluetongue virus",
                "Orbivirus",
                "Rotavirus",
                "Orthoreovirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Orbivirus",
                "Avibirnavirus",
                "Rotavirus",
                "Totivirus",
                "African Horse Sickness Virus",
                "Mammalian Orthoreovirus",
                "Bluetongue virus",
                "Orthoreovirus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-4-32k",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "West Nile virus",
            "Eastern Equine Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Western Equine Encephalitis Virus",
            "Cucumovirus",
            "Potyvirus",
            "Murray Valley Encephalitis Virus",
            "Closterovirus",
            "Canine Coronavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Western Equine Encephalitis Virus",
                "Cucumovirus",
                "Potyvirus",
                "Closterovirus",
                "Canine Coronavirus"
            ],
            "mismatches": [
                "Japanese Encephalitis Virus",
                "West Nile virus",
                "Eastern Equine Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Murray Valley Encephalitis Virus"
            ],
            "true_referents": [
                "Canine Coronavirus",
                "Cucumovirus",
                "Lagovirus",
                "Western Equine Encephalitis Virus",
                "Flexiviridae",
                "Closterovirus",
                "Potyvirus",
                "Tymoviridae"
            ],
            "TP": 5,
            "FP": 5,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Western Equine Encephalitis Virus",
            "California Encephalitis Virus",
            "La Crosse virus",
            "Murray Valley Encephalitis Virus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "California Encephalitis Virus",
                "La Crosse virus",
                "Murray Valley Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [
                "Western Equine Encephalitis Virus"
            ],
            "true_referents": [
                "California Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "La Crosse virus",
                "West Nile virus",
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "TP": 7,
            "FP": 1,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Mammalian Orthoreovirus",
            "Totivirus",
            "Eastern Equine Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Western Equine Encephalitis Virus",
            "Avibirnavirus",
            "Lagovirus",
            "Bluetongue virus",
            "Orbivirus",
            "Tymoviridae",
            "Rotavirus",
            "California Encephalitis Virus",
            "Flexiviridae",
            "Cucumovirus",
            "Potyvirus",
            "La Crosse virus",
            "Murray Valley Encephalitis Virus",
            "Orthoreovirus",
            "Closterovirus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mammalian Orthoreovirus",
                "Totivirus",
                "Avibirnavirus",
                "Bluetongue virus",
                "Orbivirus",
                "Rotavirus",
                "Orthoreovirus"
            ],
            "mismatches": [
                "Eastern Equine Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Western Equine Encephalitis Virus",
                "Lagovirus",
                "Tymoviridae",
                "California Encephalitis Virus",
                "Flexiviridae",
                "Cucumovirus",
                "Potyvirus",
                "La Crosse virus",
                "Murray Valley Encephalitis Virus",
                "Closterovirus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Orbivirus",
                "Avibirnavirus",
                "Rotavirus",
                "Totivirus",
                "African Horse Sickness Virus",
                "Mammalian Orthoreovirus",
                "Bluetongue virus",
                "Orthoreovirus"
            ],
            "TP": 7,
            "FP": 13,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "African Horse Sickness Virus",
            "Mammalian Orthoreovirus",
            "West Nile virus",
            "Totivirus",
            "Eastern Equine Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Western Equine Encephalitis Virus",
            "Avibirnavirus",
            "Lagovirus",
            "Bluetongue virus",
            "Orbivirus",
            "Tymoviridae",
            "Rotavirus",
            "California Encephalitis Virus",
            "Flexiviridae",
            "Cucumovirus",
            "Potyvirus",
            "La Crosse virus",
            "Murray Valley Encephalitis Virus",
            "Orthoreovirus",
            "Closterovirus",
            "Tick-Borne Encephalitis Viruses",
            "Canine Coronavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Western Equine Encephalitis Virus",
                "Lagovirus",
                "Tymoviridae",
                "Flexiviridae",
                "Cucumovirus",
                "Potyvirus",
                "Closterovirus",
                "Canine Coronavirus"
            ],
            "mismatches": [
                "Japanese Encephalitis Virus",
                "African Horse Sickness Virus",
                "Mammalian Orthoreovirus",
                "West Nile virus",
                "Totivirus",
                "Eastern Equine Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Avibirnavirus",
                "Bluetongue virus",
                "Orbivirus",
                "Rotavirus",
                "California Encephalitis Virus",
                "La Crosse virus",
                "Murray Valley Encephalitis Virus",
                "Orthoreovirus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Canine Coronavirus",
                "Cucumovirus",
                "Lagovirus",
                "Western Equine Encephalitis Virus",
                "Flexiviridae",
                "Closterovirus",
                "Potyvirus",
                "Tymoviridae"
            ],
            "TP": 8,
            "FP": 16,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus",
            "Palyam Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus",
                "Palyam Virus"
            ],
            "mismatches": [
                "Equartevirus"
            ],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 8,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "o1-mini",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "Tick-Borne Encephalitis Viruses",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 5,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus",
            "Palyam Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus",
                "Palyam Virus"
            ],
            "mismatches": [
                "Equartevirus"
            ],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 8,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "Tick-Borne Encephalitis Viruses",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 4,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 8,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Coltivirus",
            "Birnaviridae",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Birnaviridae",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 3,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Birnaviridae",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Birnaviridae",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 5,
            "FP": 0,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Coltivirus",
            "Eastern Equine Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Japanese Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Chikungunya virus",
            "O'nyong-nyong Virus",
            "Palyam Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "O'nyong-nyong Virus",
                "Equartevirus"
            ],
            "mismatches": [
                "La Crosse virus",
                "Coltivirus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Palyam Virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 7,
            "FP": 8,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "Birnaviridae",
            "Infectious bursal disease virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Birnaviridae",
                "Infectious bursal disease virus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 3,
            "FP": 0,
            "FN": 5
        }
    },
    {
        "test": "decide-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "Chikungunya virus",
            "Tick-Borne Encephalitis Viruses",
            "Ross River virus",
            "O'nyong-nyong Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 5,
            "FP": 1,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Infectious pancreatic necrosis virus",
            "La Crosse virus",
            "Coltivirus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Bluetongue virus",
            "Picobirnavirus",
            "Ross River virus",
            "Palyam Virus",
            "Equartevirus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Infectious pancreatic necrosis virus",
                "Coltivirus",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Bluetongue virus",
                "Picobirnavirus",
                "Palyam Virus"
            ],
            "mismatches": [
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Ross River virus",
                "Equartevirus"
            ],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 7,
            "FP": 5,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Infectious pancreatic necrosis virus",
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Coltivirus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Mammalian orthoreovirus 3",
            "Chikungunya virus",
            "Bluetongue virus",
            "Ross River virus",
            "O'nyong-nyong Virus",
            "Palyam Virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus"
            ],
            "mismatches": [
                "Infectious pancreatic necrosis virus",
                "La Crosse virus",
                "Coltivirus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Mammalian orthoreovirus 3",
                "Bluetongue virus",
                "Palyam Virus"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 5,
            "FP": 8,
            "FN": 3
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Encephalitis Viruses",
        "domain": "medicine",
        "response": [
            "Japanese Encephalitis Virus",
            "Eastern Equine Encephalitis Virus",
            "St. Louis Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "California Encephalitis Virus",
            "Japanese Encephalitis Viruses",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Japanese Encephalitis Virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "California Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "Tick-Borne Encephalitis Viruses"
            ],
            "mismatches": [],
            "true_referents": [
                "Murray Valley Encephalitis Virus",
                "Japanese Encephalitis Viruses",
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "California Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses",
                "St. Louis Encephalitis Virus",
                "Japanese Encephalitis Virus"
            ],
            "TP": 7,
            "FP": 0,
            "FN": 1
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Double Stranded RNA Viruses",
        "domain": "medicine",
        "response": [
            "Birnaviridae",
            "Picobirnavirus",
            "Mammalian orthoreovirus 3",
            "Infectious bursal disease virus",
            "Infectious pancreatic necrosis virus",
            "Bluetongue virus"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Birnaviridae",
                "Picobirnavirus",
                "Mammalian orthoreovirus 3",
                "Infectious bursal disease virus",
                "Infectious pancreatic necrosis virus",
                "Bluetongue virus"
            ],
            "mismatches": [],
            "true_referents": [
                "Mammalian orthoreovirus 3",
                "Picobirnavirus",
                "Coltivirus",
                "Infectious pancreatic necrosis virus",
                "Infectious bursal disease virus",
                "Birnaviridae",
                "Palyam Virus",
                "Bluetongue virus"
            ],
            "TP": 6,
            "FP": 0,
            "FN": 2
        }
    },
    {
        "test": "decide-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "Positive-Strand RNA Viruses",
        "domain": "medicine",
        "response": [
            "Coronavirus",
            "Japanese Encephalitis Virus",
            "La Crosse virus",
            "Eastern Equine Encephalitis Virus",
            "Human Enterovirus B",
            "Plum Pox Virus",
            "St. Louis Encephalitis Virus",
            "Chikungunya virus",
            "Ross River virus",
            "O'nyong-nyong Virus",
            "Palyam Virus",
            "Equartevirus",
            "California Encephalitis Virus",
            "Murray Valley Encephalitis Virus",
            "Tick-Borne Encephalitis Viruses"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Coronavirus",
                "Japanese Encephalitis Virus",
                "Human Enterovirus B",
                "Plum Pox Virus",
                "Chikungunya virus",
                "Ross River virus",
                "O'nyong-nyong Virus",
                "Equartevirus"
            ],
            "mismatches": [
                "La Crosse virus",
                "Eastern Equine Encephalitis Virus",
                "St. Louis Encephalitis Virus",
                "Palyam Virus",
                "California Encephalitis Virus",
                "Murray Valley Encephalitis Virus",
                "Tick-Borne Encephalitis Viruses"
            ],
            "true_referents": [
                "Ross River virus",
                "Human Enterovirus B",
                "O'nyong-nyong Virus",
                "Plum Pox Virus",
                "Japanese Encephalitis Virus",
                "Equartevirus",
                "Coronavirus",
                "Chikungunya virus"
            ],
            "TP": 8,
            "FP": 7,
            "FN": 0
        }
    }
]