[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Aspirin (ASA)",
            "Ibuprofen",
            "Naproxen",
            "Diclofenac",
            "Ketoprofen",
            "Indomethacin",
            "Celecoxib",
            "Doxylamine",
            "Meclizine",
            "Hydroxyzine",
            "Diphenhydramine",
            "Pseudoephedrine",
            "Codeine",
            "Oxycodone",
            "Tramadol",
            "Paracetamol (APAP)",
            "Naproxen Sodium",
            "Ketorolac",
            "Meloxicam",
            "Piroxicam",
            "Nabumetone",
            "Sulindac",
            "Etodolac"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Aspirin (ASA)",
                "Ibuprofen",
                "Naproxen",
                "Diclofenac",
                "Ketoprofen",
                "Indomethacin",
                "Celecoxib",
                "Doxylamine",
                "Meclizine",
                "Hydroxyzine",
                "Diphenhydramine",
                "Pseudoephedrine",
                "Codeine",
                "Oxycodone",
                "Tramadol",
                "Paracetamol (APAP)",
                "Naproxen Sodium",
                "Ketorolac",
                "Meloxicam",
                "Piroxicam",
                "Nabumetone",
                "Sulindac",
                "Etodolac"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aspirin (ASA)",
                "Celecoxib",
                "Codeine",
                "Diclofenac",
                "Diphenhydramine",
                "Doxylamine",
                "Etodolac",
                "Hydroxyzine",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Meclizine",
                "Meloxicam",
                "Nabumetone",
                "Naproxen",
                "Naproxen Sodium",
                "Oxycodone",
                "Paracetamol (APAP)",
                "Piroxicam",
                "Pseudoephedrine",
                "Sulindac",
                "Tramadol"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Paracetamol)",
            "Ibuprofen",
            "Aspirin (Acetylsalicylic Acid)",
            "Naproxen",
            "Diclofenac",
            "Ketoprofen",
            "Celecoxib",
            "Meloxicam",
            "Nabumetone",
            "Indomethacin",
            "Diclofenac Sodium",
            "Ketorolac",
            "Flurbiprofen",
            "Piroxicam",
            "Sulindac",
            "Tiaprofenic Acid",
            "Choline Salicylate",
            "Tolfenamic Acid",
            "Oxaprozin",
            "Fenoprofen",
            "Tenoxicam",
            "Zomepirac",
            "Suprofen",
            "Rofecoxib"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Paracetamol)",
                "Ibuprofen",
                "Aspirin (Acetylsalicylic Acid)",
                "Naproxen",
                "Diclofenac",
                "Ketoprofen",
                "Celecoxib",
                "Meloxicam",
                "Nabumetone",
                "Indomethacin",
                "Diclofenac Sodium",
                "Ketorolac",
                "Flurbiprofen",
                "Piroxicam",
                "Sulindac",
                "Tiaprofenic Acid",
                "Choline Salicylate",
                "Tolfenamic Acid",
                "Oxaprozin",
                "Fenoprofen",
                "Tenoxicam",
                "Zomepirac",
                "Suprofen",
                "Rofecoxib"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (Paracetamol)",
                "Aspirin (Acetylsalicylic Acid)",
                "Celecoxib",
                "Choline Salicylate",
                "Diclofenac",
                "Diclofenac Sodium",
                "Fenoprofen",
                "Flurbiprofen",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Meloxicam",
                "Nabumetone",
                "Naproxen",
                "Oxaprozin",
                "Piroxicam",
                "Rofecoxib",
                "Sulindac",
                "Suprofen",
                "Tenoxicam",
                "Tiaprofenic Acid",
                "Tolfenamic Acid",
                "Zomepirac"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Paracetamol)",
            "Aspirin (Acetylsalicylic acid)",
            "Ibuprofen",
            "Naproxen",
            "Ketoprofen",
            "Diclofenac",
            "Mefenamic acid",
            "Phenylbutazone",
            "Indomethacin",
            "Celecoxib",
            "Rofecoxib",
            "Etoricoxib",
            "Valdecoxib",
            "Lumiracoxib",
            "Parecoxib",
            "Meloxicam",
            "Oxaprozin",
            "Piroxicam",
            "Sulindac",
            "Tolmetin",
            "Choline salicylate",
            "Magnesium salicylate",
            "Salsalate",
            "Diflunisal"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Paracetamol)",
                "Aspirin (Acetylsalicylic acid)",
                "Ibuprofen",
                "Naproxen",
                "Ketoprofen",
                "Diclofenac",
                "Mefenamic acid",
                "Phenylbutazone",
                "Indomethacin",
                "Celecoxib",
                "Rofecoxib",
                "Etoricoxib",
                "Valdecoxib",
                "Lumiracoxib",
                "Parecoxib",
                "Meloxicam",
                "Oxaprozin",
                "Piroxicam",
                "Sulindac",
                "Tolmetin",
                "Choline salicylate",
                "Magnesium salicylate",
                "Salsalate",
                "Diflunisal"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (Paracetamol)",
                "Aspirin (Acetylsalicylic acid)",
                "Celecoxib",
                "Choline salicylate",
                "Diclofenac",
                "Diflunisal",
                "Etoricoxib",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Lumiracoxib",
                "Magnesium salicylate",
                "Mefenamic acid",
                "Meloxicam",
                "Naproxen",
                "Oxaprozin",
                "Parecoxib",
                "Phenylbutazone",
                "Piroxicam",
                "Rofecoxib",
                "Salsalate",
                "Sulindac",
                "Tolmetin",
                "Valdecoxib"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Aspirin (ASA)",
            "Ibuprofen (IBU)",
            "Naproxen (NAP)",
            "Acetaminophen (APAP)",
            "Paracetamol (PAR)",
            "Diclofenac (DIC)",
            "Ketoprofen (KET)",
            "Flurbiprofen (FLB)",
            "Indomethacin (IND)",
            "Piroxicam (PIX)",
            "Meloxicam (MEL)",
            "Celecoxib (CEL)",
            "Rofecoxib (ROF)",
            "Valdecoxib (VAL)",
            "Arcoxia (ETC)",
            "Nabumetone (NAB)",
            "Tolmetin (TLM)",
            "Fenoprofen (FNP)",
            "Salicylamide (SAL)",
            "Diflunisal (DFL)",
            "Etodolac (ETD)",
            "Lonazolac (LNZ)",
            "Zomepirac (ZOM)",
            "Glycol salicylate (GLY)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aspirin (ASA)",
                "Ibuprofen (IBU)",
                "Naproxen (NAP)",
                "Acetaminophen (APAP)",
                "Paracetamol (PAR)",
                "Diclofenac (DIC)",
                "Ketoprofen (KET)",
                "Flurbiprofen (FLB)",
                "Indomethacin (IND)",
                "Piroxicam (PIX)",
                "Meloxicam (MEL)",
                "Celecoxib (CEL)",
                "Rofecoxib (ROF)",
                "Valdecoxib (VAL)",
                "Arcoxia (ETC)",
                "Nabumetone (NAB)",
                "Tolmetin (TLM)",
                "Fenoprofen (FNP)",
                "Salicylamide (SAL)",
                "Diflunisal (DFL)",
                "Etodolac (ETD)",
                "Lonazolac (LNZ)",
                "Zomepirac (ZOM)",
                "Glycol salicylate (GLY)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Arcoxia (ETC)",
                "Aspirin (ASA)",
                "Celecoxib (CEL)",
                "Diclofenac (DIC)",
                "Diflunisal (DFL)",
                "Etodolac (ETD)",
                "Fenoprofen (FNP)",
                "Flurbiprofen (FLB)",
                "Glycol salicylate (GLY)",
                "Ibuprofen (IBU)",
                "Indomethacin (IND)",
                "Ketoprofen (KET)",
                "Lonazolac (LNZ)",
                "Meloxicam (MEL)",
                "Nabumetone (NAB)",
                "Naproxen (NAP)",
                "Paracetamol (PAR)",
                "Piroxicam (PIX)",
                "Rofecoxib (ROF)",
                "Salicylamide (SAL)",
                "Tolmetin (TLM)",
                "Valdecoxib (VAL)",
                "Zomepirac (ZOM)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Paracetamol)",
            "Ibuprofen",
            "Aspirin (Acetylsalicylic Acid)",
            "Naproxen",
            "Diclofenac",
            "Indomethacin",
            "Ketoprofen",
            "Piroxicam",
            "Meloxicam",
            "Celecoxib",
            "Metamizole (Dipyrone)",
            "Phenacetin",
            "Nimesulide",
            "Meclofenamate",
            "Etodolac",
            "Sulindac",
            "Flurbiprofen",
            "Meclofenamic Acid",
            "Diflunisal",
            "Fenoprofen",
            "Oxaprozin",
            "Etodolac",
            "Tolmetin",
            "Mefenamic Acid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Paracetamol)",
                "Ibuprofen",
                "Aspirin (Acetylsalicylic Acid)",
                "Naproxen",
                "Diclofenac",
                "Indomethacin",
                "Ketoprofen",
                "Piroxicam",
                "Meloxicam",
                "Celecoxib",
                "Metamizole (Dipyrone)",
                "Phenacetin",
                "Nimesulide",
                "Meclofenamate",
                "Etodolac",
                "Sulindac",
                "Flurbiprofen",
                "Meclofenamic Acid",
                "Diflunisal",
                "Fenoprofen",
                "Oxaprozin",
                "Tolmetin",
                "Mefenamic Acid"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (Paracetamol)",
                "Aspirin (Acetylsalicylic Acid)",
                "Celecoxib",
                "Diclofenac",
                "Diflunisal",
                "Etodolac",
                "Fenoprofen",
                "Flurbiprofen",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Meclofenamate",
                "Meclofenamic Acid",
                "Mefenamic Acid",
                "Meloxicam",
                "Metamizole (Dipyrone)",
                "Naproxen",
                "Nimesulide",
                "Oxaprozin",
                "Phenacetin",
                "Piroxicam",
                "Sulindac",
                "Tolmetin"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Ibuprofen (IBU)",
            "Aspirin (ASA)",
            "Naproxen (NAP)",
            "Ketoprofen (KTP)",
            "Diclofenac (DIC)",
            "Indomethacin (IND)",
            "Meloxicam (MEL)",
            "Piroxicam (PIR)",
            "Fenoprofen (FEN)",
            "Flurbiprofen (FLU)",
            "Salsalate (SAL)",
            "Acetylsalicylic acid (ASA)",
            "Paracetamol (PAR)",
            "Celecoxib (CELE)",
            "Rofecoxib (ROF)",
            "Etodolac (ETO)",
            "Oxaprozin (OXA)",
            "Diflunisal (DIF)",
            "Nabumetone (NAB)",
            "Tolmetin (TOL)",
            "Ketorolac (KET)",
            "Aminophenazone (AMIN)",
            "Phenylbutazone (PHEN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Ibuprofen (IBU)",
                "Aspirin (ASA)",
                "Naproxen (NAP)",
                "Ketoprofen (KTP)",
                "Diclofenac (DIC)",
                "Indomethacin (IND)",
                "Meloxicam (MEL)",
                "Piroxicam (PIR)",
                "Fenoprofen (FEN)",
                "Flurbiprofen (FLU)",
                "Salsalate (SAL)",
                "Acetylsalicylic acid (ASA)",
                "Paracetamol (PAR)",
                "Celecoxib (CELE)",
                "Rofecoxib (ROF)",
                "Etodolac (ETO)",
                "Oxaprozin (OXA)",
                "Diflunisal (DIF)",
                "Nabumetone (NAB)",
                "Tolmetin (TOL)",
                "Ketorolac (KET)",
                "Aminophenazone (AMIN)",
                "Phenylbutazone (PHEN)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Acetylsalicylic acid (ASA)",
                "Aminophenazone (AMIN)",
                "Aspirin (ASA)",
                "Celecoxib (CELE)",
                "Diclofenac (DIC)",
                "Diflunisal (DIF)",
                "Etodolac (ETO)",
                "Fenoprofen (FEN)",
                "Flurbiprofen (FLU)",
                "Ibuprofen (IBU)",
                "Indomethacin (IND)",
                "Ketoprofen (KTP)",
                "Ketorolac (KET)",
                "Meloxicam (MEL)",
                "Nabumetone (NAB)",
                "Naproxen (NAP)",
                "Oxaprozin (OXA)",
                "Paracetamol (PAR)",
                "Phenylbutazone (PHEN)",
                "Piroxicam (PIR)",
                "Rofecoxib (ROF)",
                "Salsalate (SAL)",
                "Tolmetin (TOL)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetylsalicylic Acid (ASA)",
            "Ibuprofen",
            "Paracetamol (Acetaminophen)",
            "Naproxen",
            "Indomethacin",
            "Diclofenac",
            "Ketoprofen",
            "Acetaminophen",
            "Celecoxib",
            "Mefenamic Acid",
            "Phenylbutazone",
            "Propyphenazone",
            "Metamizole (Dipyrone)",
            "Ketorolac",
            "Etodolac",
            "Meloxicam",
            "Piroxicam",
            "Nabumetone",
            "Sulindac",
            "Choline Magnesium Trisalicylate",
            "Diflunisal",
            "Flurbiprofen",
            "Meclofenamate",
            "Valdecoxib"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetylsalicylic Acid (ASA)",
                "Ibuprofen",
                "Paracetamol (Acetaminophen)",
                "Naproxen",
                "Indomethacin",
                "Diclofenac",
                "Ketoprofen",
                "Acetaminophen",
                "Celecoxib",
                "Mefenamic Acid",
                "Phenylbutazone",
                "Propyphenazone",
                "Metamizole (Dipyrone)",
                "Ketorolac",
                "Etodolac",
                "Meloxicam",
                "Piroxicam",
                "Nabumetone",
                "Sulindac",
                "Choline Magnesium Trisalicylate",
                "Diflunisal",
                "Flurbiprofen",
                "Meclofenamate",
                "Valdecoxib"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen",
                "Acetylsalicylic Acid (ASA)",
                "Celecoxib",
                "Choline Magnesium Trisalicylate",
                "Diclofenac",
                "Diflunisal",
                "Etodolac",
                "Flurbiprofen",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Meclofenamate",
                "Mefenamic Acid",
                "Meloxicam",
                "Metamizole (Dipyrone)",
                "Nabumetone",
                "Naproxen",
                "Paracetamol (Acetaminophen)",
                "Phenylbutazone",
                "Piroxicam",
                "Propyphenazone",
                "Sulindac",
                "Valdecoxib"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Aspirin (acetylsalicylic acid)",
            "Ibuprofen",
            "Acetaminophen (paracetamol)",
            "Naproxen",
            "Diclofenac",
            "Indomethacin",
            "Meloxicam",
            "Celecoxib",
            "Ketoprofen",
            "Piroxicam",
            "Etodolac",
            "Mefenamic acid",
            "Nabumetone",
            "Sulindac",
            "Tolmetin",
            "Oxaprozin",
            "Ketorolac",
            "Flurbiprofen",
            "Indometacin",
            "Nimesulide",
            "Rofecoxib",
            "Valdecoxib",
            "Lornoxicam",
            "Etoricoxib"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Aspirin (acetylsalicylic acid)",
                "Ibuprofen",
                "Acetaminophen (paracetamol)",
                "Naproxen",
                "Diclofenac",
                "Indomethacin",
                "Meloxicam",
                "Celecoxib",
                "Ketoprofen",
                "Piroxicam",
                "Etodolac",
                "Mefenamic acid",
                "Nabumetone",
                "Sulindac",
                "Tolmetin",
                "Oxaprozin",
                "Ketorolac",
                "Flurbiprofen",
                "Indometacin",
                "Nimesulide",
                "Rofecoxib",
                "Valdecoxib",
                "Lornoxicam",
                "Etoricoxib"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (paracetamol)",
                "Aspirin (acetylsalicylic acid)",
                "Celecoxib",
                "Diclofenac",
                "Etodolac",
                "Etoricoxib",
                "Flurbiprofen",
                "Ibuprofen",
                "Indometacin",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Lornoxicam",
                "Mefenamic acid",
                "Meloxicam",
                "Nabumetone",
                "Naproxen",
                "Nimesulide",
                "Oxaprozin",
                "Piroxicam",
                "Rofecoxib",
                "Sulindac",
                "Tolmetin",
                "Valdecoxib"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Paracetamol, APAP)",
            "Ibuprofen (IBU)",
            "Aspirin (Acetylsalicylic acid, ASA)",
            "Naproxen (NAP)",
            "Indomethacin (IND)",
            "Diclofenac (DCF)",
            "Ketoprofen (KTP)",
            "Meloxicam (MLX)",
            "Celecoxib (CEL)",
            "Etoricoxib (ETX)",
            "Nabumetone (NBM)",
            "Mefenamic acid (MEF)",
            "Piroxicam (PRX)",
            "Sulindac (SUL)",
            "Flurbiprofen (FLB)",
            "Tolmetin (TOL)",
            "Fenoprofen (FEN)",
            "Oxaprozin (OXP)",
            "Salsalate (SAL)",
            "Indoprofen (IDP)",
            "Flufenamic acid (FFA)",
            "Ketorolac (KTN)",
            "Tenoxicam (TEN)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Paracetamol, APAP)",
                "Ibuprofen (IBU)",
                "Aspirin (Acetylsalicylic acid, ASA)",
                "Naproxen (NAP)",
                "Indomethacin (IND)",
                "Diclofenac (DCF)",
                "Ketoprofen (KTP)",
                "Meloxicam (MLX)",
                "Celecoxib (CEL)",
                "Etoricoxib (ETX)",
                "Nabumetone (NBM)",
                "Mefenamic acid (MEF)",
                "Piroxicam (PRX)",
                "Sulindac (SUL)",
                "Flurbiprofen (FLB)",
                "Tolmetin (TOL)",
                "Fenoprofen (FEN)",
                "Oxaprozin (OXP)",
                "Salsalate (SAL)",
                "Indoprofen (IDP)",
                "Flufenamic acid (FFA)",
                "Ketorolac (KTN)",
                "Tenoxicam (TEN)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (Paracetamol, APAP)",
                "Aspirin (Acetylsalicylic acid, ASA)",
                "Celecoxib (CEL)",
                "Diclofenac (DCF)",
                "Etoricoxib (ETX)",
                "Fenoprofen (FEN)",
                "Flufenamic acid (FFA)",
                "Flurbiprofen (FLB)",
                "Ibuprofen (IBU)",
                "Indomethacin (IND)",
                "Indoprofen (IDP)",
                "Ketoprofen (KTP)",
                "Ketorolac (KTN)",
                "Mefenamic acid (MEF)",
                "Meloxicam (MLX)",
                "Nabumetone (NBM)",
                "Naproxen (NAP)",
                "Oxaprozin (OXP)",
                "Piroxicam (PRX)",
                "Salsalate (SAL)",
                "Sulindac (SUL)",
                "Tenoxicam (TEN)",
                "Tolmetin (TOL)"
            ],
            "TP": 23,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Ibuprofen (IBU)",
            "Aspirin (ASA)",
            "Naproxen (NAP)",
            "Ketoprofen (KET)",
            "Mefenamic acid (MFA)",
            "Diclofenac (DCF)",
            "Indomethacin (IND)",
            "Nimesulide (NIM)",
            "Metamizole (MET)",
            "Celecoxib (CEL)",
            "Paracetamol (PCM)",
            "Flurbiprofen (FLU)",
            "Meloxicam (MEL)",
            "Piroxicam (PIR)",
            "Ketorolac (KET)",
            "Etoricoxib (ETO)",
            "Lornoxicam (LOR)",
            "Dexketoprofen (DKP)",
            "Aceclofenac (ACF)",
            "Etodolac (ETD)",
            "Phenylbutazone (PBZ)",
            "Propyphenazone (PPZ)",
            "Nefopam (NEF)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Ibuprofen (IBU)",
                "Aspirin (ASA)",
                "Naproxen (NAP)",
                "Ketoprofen (KET)",
                "Mefenamic acid (MFA)",
                "Diclofenac (DCF)",
                "Indomethacin (IND)",
                "Nimesulide (NIM)",
                "Metamizole (MET)",
                "Celecoxib (CEL)",
                "Paracetamol (PCM)",
                "Flurbiprofen (FLU)",
                "Meloxicam (MEL)",
                "Piroxicam (PIR)",
                "Ketorolac (KET)",
                "Etoricoxib (ETO)",
                "Lornoxicam (LOR)",
                "Dexketoprofen (DKP)",
                "Aceclofenac (ACF)",
                "Etodolac (ETD)",
                "Phenylbutazone (PBZ)",
                "Propyphenazone (PPZ)",
                "Nefopam (NEF)"
            ],
            "mismatches": [],
            "true_referents": [
                "Aceclofenac (ACF)",
                "Acetaminophen (APAP)",
                "Aspirin (ASA)",
                "Celecoxib (CEL)",
                "Dexketoprofen (DKP)",
                "Diclofenac (DCF)",
                "Etodolac (ETD)",
                "Etoricoxib (ETO)",
                "Flurbiprofen (FLU)",
                "Ibuprofen (IBU)",
                "Indomethacin (IND)",
                "Ketoprofen (KET)",
                "Ketorolac (KET)",
                "Lornoxicam (LOR)",
                "Mefenamic acid (MFA)",
                "Meloxicam (MEL)",
                "Metamizole (MET)",
                "Naproxen (NAP)",
                "Nefopam (NEF)",
                "Nimesulide (NIM)",
                "Paracetamol (PCM)",
                "Phenylbutazone (PBZ)",
                "Piroxicam (PIR)",
                "Propyphenazone (PPZ)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Ibuprofen",
            "Aspirin (ASA)",
            "Naproxen",
            "Diclofenac",
            "Ketoprofen",
            "Indomethacin",
            "Mefenamic acid",
            "Dipyrone (Metamizole)",
            "Phenacetin",
            "Antipyrine (Phenazone)",
            "Aminopyrine",
            "Niflumic acid",
            "Tolfenamic acid",
            "Celecoxib",
            "Etoricoxib",
            "Rofecoxib",
            "Valdecoxib",
            "Parecoxib",
            "Lumiracoxib",
            "Lornoxicam",
            "Meloxicam",
            "Piroxicam",
            "Tenoxicam"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Ibuprofen",
                "Aspirin (ASA)",
                "Naproxen",
                "Diclofenac",
                "Ketoprofen",
                "Indomethacin",
                "Mefenamic acid",
                "Dipyrone (Metamizole)",
                "Phenacetin",
                "Antipyrine (Phenazone)",
                "Aminopyrine",
                "Niflumic acid",
                "Tolfenamic acid",
                "Celecoxib",
                "Etoricoxib",
                "Rofecoxib",
                "Valdecoxib",
                "Parecoxib",
                "Lumiracoxib",
                "Lornoxicam",
                "Meloxicam",
                "Piroxicam",
                "Tenoxicam"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aminopyrine",
                "Antipyrine (Phenazone)",
                "Aspirin (ASA)",
                "Celecoxib",
                "Diclofenac",
                "Dipyrone (Metamizole)",
                "Etoricoxib",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Lornoxicam",
                "Lumiracoxib",
                "Mefenamic acid",
                "Meloxicam",
                "Naproxen",
                "Niflumic acid",
                "Parecoxib",
                "Phenacetin",
                "Piroxicam",
                "Rofecoxib",
                "Tenoxicam",
                "Tolfenamic acid",
                "Valdecoxib"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Ibuprofen",
            "Aspirin",
            "Naproxen",
            "Diclofenac",
            "Celecoxib",
            "Indomethacin",
            "Ketoprofen",
            "Ketorolac",
            "Meloxicam",
            "Nabumetone",
            "Piroxicam",
            "Sulindac",
            "Tolmetin",
            "Salicylic acid",
            "Phenacetin",
            "Aminopyrine",
            "Antipyrine",
            "Paracetamol",
            "Acetanilide",
            "Propacetamol",
            "Metamizole",
            "Dipyrone",
            "Phenazone"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Ibuprofen",
                "Aspirin",
                "Naproxen",
                "Diclofenac",
                "Celecoxib",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Meloxicam",
                "Nabumetone",
                "Piroxicam",
                "Sulindac",
                "Tolmetin",
                "Salicylic acid",
                "Phenacetin",
                "Aminopyrine",
                "Antipyrine",
                "Paracetamol",
                "Acetanilide",
                "Propacetamol",
                "Metamizole",
                "Dipyrone",
                "Phenazone"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Acetanilide",
                "Aminopyrine",
                "Antipyrine",
                "Aspirin",
                "Celecoxib",
                "Diclofenac",
                "Dipyrone",
                "Ibuprofen",
                "Indomethacin",
                "Ketoprofen",
                "Ketorolac",
                "Meloxicam",
                "Metamizole",
                "Nabumetone",
                "Naproxen",
                "Paracetamol",
                "Phenacetin",
                "Phenazone",
                "Piroxicam",
                "Propacetamol",
                "Salicylic acid",
                "Sulindac",
                "Tolmetin"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Aspirin (ASA)",
            "Ibuprofen (IBU)",
            "Naproxen (NAP)",
            "Diclofenac (DIC)",
            "Celecoxib (CEL)",
            "Meloxicam (MEL)",
            "Piroxicam (PIR)",
            "Indomethacin (IND)",
            "Ketoprofen (KET)",
            "Flurbiprofen (FLU)",
            "Mefenamic Acid (MEF)",
            "Phenazopyridine (PHA)",
            "Paracetamol (PAR)",
            "Nimesulide (NIM)",
            "Etodolac (ETO)",
            "Sulindac (SUL)",
            "Diflunisal (DIF)",
            "Oxaprozin (OXA)",
            "Tenoxicam (TEN)",
            "Lornoxicam (LOR)",
            "Etoricoxib (ETO)",
            "Valdecoxib (VAL)",
            "Lumiracoxib (LUM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Aspirin (ASA)",
                "Ibuprofen (IBU)",
                "Naproxen (NAP)",
                "Diclofenac (DIC)",
                "Celecoxib (CEL)",
                "Meloxicam (MEL)",
                "Piroxicam (PIR)",
                "Indomethacin (IND)",
                "Ketoprofen (KET)",
                "Flurbiprofen (FLU)",
                "Mefenamic Acid (MEF)",
                "Phenazopyridine (PHA)",
                "Paracetamol (PAR)",
                "Nimesulide (NIM)",
                "Etodolac (ETO)",
                "Sulindac (SUL)",
                "Diflunisal (DIF)",
                "Oxaprozin (OXA)",
                "Tenoxicam (TEN)",
                "Lornoxicam (LOR)",
                "Etoricoxib (ETO)",
                "Valdecoxib (VAL)",
                "Lumiracoxib (LUM)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acetaminophen (APAP)",
                "Aspirin (ASA)",
                "Celecoxib (CEL)",
                "Diclofenac (DIC)",
                "Diflunisal (DIF)",
                "Etodolac (ETO)",
                "Etoricoxib (ETO)",
                "Flurbiprofen (FLU)",
                "Ibuprofen (IBU)",
                "Indomethacin (IND)",
                "Ketoprofen (KET)",
                "Lornoxicam (LOR)",
                "Lumiracoxib (LUM)",
                "Mefenamic Acid (MEF)",
                "Meloxicam (MEL)",
                "Naproxen (NAP)",
                "Nimesulide (NIM)",
                "Oxaprozin (OXA)",
                "Paracetamol (PAR)",
                "Phenazopyridine (PHA)",
                "Piroxicam (PIR)",
                "Sulindac (SUL)",
                "Tenoxicam (TEN)",
                "Valdecoxib (VAL)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (Tylenol) (APAP)",
            "Aspirin",
            "Ibuprofen (Advil, Motrin)",
            "Ketoprofen (Orudis)",
            "Naproxen (Aleve)",
            "Indomethacin",
            "Diclofenac (Voltaren)",
            "Flurbiprofen (Ansaid)",
            "Celecoxib (Celebrex)",
            "Meloxicam (Mobic)",
            "Oxaprozin (Daypro)",
            "Etodolac (Lodine)",
            "Sulindac (Clinoril)",
            "Nimesulide (Nimulid)",
            "Fenoprofen (Nalfon)",
            "Piroxicam (Feldene)",
            "Ketorolac (Toradol)",
            "Meclofenamate (Meclomen)",
            "Mefenamic acid (Ponstel)",
            "Nabumetone (Relafen)",
            "Tolmetin (Tolectin)",
            "Salsalate (Disalcid)",
            "Magnesium salicylate (Doan's)",
            "Chlorzoxazone (Parafon)",
            "Bromfenac (Duratolin)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (Tylenol) (APAP)",
                "Aspirin",
                "Ibuprofen (Advil, Motrin)",
                "Ketoprofen (Orudis)",
                "Naproxen (Aleve)",
                "Indomethacin",
                "Diclofenac (Voltaren)",
                "Flurbiprofen (Ansaid)",
                "Celecoxib (Celebrex)",
                "Meloxicam (Mobic)",
                "Oxaprozin (Daypro)",
                "Etodolac (Lodine)",
                "Sulindac (Clinoril)",
                "Nimesulide (Nimulid)",
                "Fenoprofen (Nalfon)",
                "Piroxicam (Feldene)",
                "Ketorolac (Toradol)",
                "Meclofenamate (Meclomen)",
                "Mefenamic acid (Ponstel)",
                "Nabumetone (Relafen)",
                "Tolmetin (Tolectin)",
                "Salsalate (Disalcid)",
                "Magnesium salicylate (Doan's)",
                "Chlorzoxazone (Parafon)"
            ],
            "mismatches": [
                "Bromfenac (Duratolin)"
            ],
            "true_referents": [
                "Acetaminophen (Tylenol) (APAP)",
                "Aspirin",
                "Celecoxib (Celebrex)",
                "Chlorzoxazone (Parafon)",
                "Diclofenac (Voltaren)",
                "Etodolac (Lodine)",
                "Fenoprofen (Nalfon)",
                "Flurbiprofen (Ansaid)",
                "Ibuprofen (Advil, Motrin)",
                "Indomethacin",
                "Ketoprofen (Orudis)",
                "Ketorolac (Toradol)",
                "Magnesium salicylate (Doan's)",
                "Meclofenamate (Meclomen)",
                "Mefenamic acid (Ponstel)",
                "Meloxicam (Mobic)",
                "Nabumetone (Relafen)",
                "Naproxen (Aleve)",
                "Nimesulide (Nimulid)",
                "Oxaprozin (Daypro)",
                "Piroxicam (Feldene)",
                "Salsalate (Disalcid)",
                "Sulindac (Clinoril)",
                "Tolmetin (Tolectin)"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            "Acetaminophen (APAP)",
            "Paracetamol (PCM)",
            "Acetanilide",
            "Phenacetin",
            "Acetophenetidin",
            "N-Acetyl-p-aminophenol (NAPA)",
            "Phenylacetamide",
            "4-Acetamidophenol",
            "N-(4-Hydroxyphenyl)acetamide",
            "p-Acetaminophenetidine",
            "4'-Ethoxyacetanilide",
            "N-(p-Acetylphenyl)carbamate",
            "p-Nitroacetanilide",
            "p-Acetamidophenol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acetaminophen (APAP)",
                "Paracetamol (PCM)",
                "Acetanilide",
                "Phenacetin",
                "Acetophenetidin",
                "N-Acetyl-p-aminophenol (NAPA)",
                "Phenylacetamide",
                "4-Acetamidophenol",
                "N-(4-Hydroxyphenyl)acetamide",
                "p-Acetaminophenetidine",
                "4'-Ethoxyacetanilide",
                "N-(p-Acetylphenyl)carbamate",
                "p-Nitroacetanilide",
                "p-Acetamidophenol"
            ],
            "mismatches": [],
            "true_referents": [
                "4'-Ethoxyacetanilide",
                "4-Acetamidophenol",
                "Acetaminophen (APAP)",
                "Acetanilide",
                "Acetophenetidin",
                "N-(4-Hydroxyphenyl)acetamide",
                "N-(p-Acetylphenyl)carbamate",
                "N-Acetyl-p-aminophenol (NAPA)",
                "Paracetamol (PCM)",
                "Phenacetin",
                "Phenylacetamide",
                "p-Acetamidophenol",
                "p-Acetaminophenetidine",
                "p-Nitroacetanilide"
            ],
            "TP": 14,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "antipyretic agent",
        "domain": "medicine",
        "response": [
            {
                "name": "acetaminophen",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "aspirin",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "ibuprofen",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "naproxen",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "paracetamol",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "phenacetin",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "phenylbutazone",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "pyrazolone",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "salicylamide",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "salicylic acid",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "sodium salicylate",
                "description": "it is a chemical compound",
                "fever_reduction": "yes"
            },
            {
                "name": "acetaminophen",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "aspirin",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "ibuprofen",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "naproxen",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "paracetamol",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "phenacetin",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "phenylbutazone",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "pyrazolone",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "salicylamide",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "salicylic acid",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            },
            {
                "name": "sodium salicylate",
                "description": "it reduces fever",
                "fever_reduction": "yes"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "acetaminophen",
                "aspirin",
                "ibuprofen",
                "naproxen",
                "paracetamol",
                "phenacetin",
                "phenylbutazone",
                "pyrazolone",
                "salicylamide",
                "salicylic acid",
                "sodium salicylate"
            ],
            "mismatches": [],
            "true_referents": [
                "{\"name\": \"acetaminophen\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"acetaminophen\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"aspirin\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"aspirin\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"ibuprofen\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"ibuprofen\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"naproxen\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"naproxen\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"paracetamol\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"paracetamol\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"phenacetin\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"phenacetin\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"phenylbutazone\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"phenylbutazone\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"pyrazolone\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"pyrazolone\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"salicylamide\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"salicylamide\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"salicylic acid\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"salicylic acid\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"sodium salicylate\", \"description\": \"it is a chemical compound\", \"fever_reduction\": \"yes\"}",
                "{\"name\": \"sodium salicylate\", \"description\": \"it reduces fever\", \"fever_reduction\": \"yes\"}"
            ],
            "TP": 11,
            "FP": 0,
            "FN": 0
        }
    }
]