[
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Leukemia (LEUK)",
            "Lung cancer (LC)",
            "Breast cancer (BC)",
            "Colorectal cancer (CRC)",
            "Prostate cancer (PCa)",
            "Skin cancer (SC)",
            "Ovarian cancer (OC)",
            "Pancreatic cancer (PC)",
            "Brain cancer (BCa)",
            "Stomach cancer (SCa)",
            "Liver cancer (LCa)",
            "Thyroid cancer (TC)",
            "Bladder cancer (BCa)",
            "Esophageal cancer (ECa)",
            "Kidney cancer (KC)",
            "Testicular cancer (TCa)",
            "Cervical cancer (CC)",
            "Lymphoma (LYMPH)",
            "Melanoma (MEL)",
            "Multiple myeloma (MM)",
            "Gastric cancer (GC)",
            "Hodgkin's lymphoma (HL)",
            "Non-Hodgkin's lymphoma (NHL)",
            "Wilms' tumor (WT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Colorectal cancer",
                "Prostate cancer",
                "Ovarian cancer",
                "Pancreatic cancer",
                "Thyroid cancer",
                "Bladder cancer",
                "Esophageal cancer",
                "Kidney cancer",
                "Testicular cancer",
                "Cervical cancer",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Hodgkin's lymphoma",
                "Non-Hodgkin lymphoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "mismatches": [
                "Leukemia (LEUK)",
                "Lung cancer (LC)",
                "Skin cancer (SC)",
                "Brain cancer (BCa)",
                "Stomach cancer (SCa)",
                "Liver cancer (LCa)",
                "Gastric cancer (GC)",
                "Wilms' tumor (WT)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Appendix cancer",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cerebral astrocytoma",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Follicular lymphoma",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "phi-v4",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BC)",
            "Lung Cancer (LC)",
            "Colorectal Cancer (CRC)",
            "Prostate Cancer (PCa)",
            "Leukemia (LEUK)",
            "Melanoma (MM)",
            "Bladder Cancer (BCa)",
            "Pancreatic Cancer (PC)",
            "Ovarian Cancer (OC)",
            "Non-Small Cell Lung Cancer (NSCLC)",
            "Small Cell Lung Cancer (SCLC)",
            "Glioblastoma (GBM)",
            "Renal Cell Carcinoma (RCC)",
            "Liver Cancer (HCC)",
            "Esophageal Cancer (EC)",
            "Cervical Cancer (CC)",
            "Head and Neck Squamous Cell Carcinoma (HNSCC)",
            "Thyroid Cancer (TC)",
            "Multiple Myeloma (MM)",
            "Chronic Myeloid Leukemia (CML)",
            "Acute Lymphoblastic Leukemia (ALL)",
            "Acute Myeloid Leukemia (AML)",
            "Osteosarcoma (OS)",
            "Rhabdomyosarcoma (RMS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer",
                "Colorectal Cancer",
                "Prostate Cancer",
                "Melanoma",
                "Bladder Cancer",
                "Pancreatic Cancer",
                "Ovarian Cancer",
                "Non-Small Cell Lung Cancer",
                "Small Cell Lung Cancer",
                "Glioblastoma",
                "Renal Cell Carcinoma",
                "Esophageal Cancer",
                "Cervical Cancer",
                "Thyroid Cancer",
                "Multiple Myeloma",
                "Chronic Myeloid Leukemia",
                "Acute Lymphoblastic Leukemia",
                "Acute Myeloid Leukemia",
                "Osteosarcoma",
                "Rhabdomyosarcoma"
            ],
            "mismatches": [
                "Lung Cancer",
                "Leukemia",
                "Liver Cancer",
                "Head and Neck Squamous Cell Carcinoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Myelodysplastic syndromes",
                "Myxosarcoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Precursor B lymphoblastic leukemia",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Rhabdomyosarcoma",
                "Small cell lung cancer",
                "Squamous-cell carcinoma of the lung",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 20,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "mistral-small-instruct-24B",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer (Malignant Neoplasm)",
            "Leukemia",
            "Lymphoma",
            "Melanoma",
            "Carcinoma",
            "Sarcoma",
            "Glioblastoma Multiforme",
            "Colorectal Cancer",
            "Breast Cancer",
            "Lung Cancer",
            "Prostate Cancer",
            "Ovarian Cancer",
            "Pancreatic Cancer",
            "Gastric Cancer",
            "Bladder Cancer",
            "Thyroid Cancer",
            "Kidney Cancer",
            "Liver Cancer",
            "Brain Tumor",
            "Bone Cancer",
            "Squamous Cell Carcinoma",
            "Basal Cell Carcinoma",
            "Mesothelioma",
            "Myeloma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Melanoma",
                "Glioblastoma",
                "Colorectal Cancer",
                "Breast cancer",
                "Prostate cancer",
                "Ovarian cancer",
                "Pancreatic Cancer",
                "Bladder cancer",
                "Thyroid cancer",
                "Squamous cell carcinoma",
                "Basal cell carcinoma",
                "Mesothelioma"
            ],
            "mismatches": [
                "Cancer (Malignant Neoplasm)",
                "Leukemia",
                "Lymphoma",
                "Carcinoma",
                "Sarcoma",
                "Glioblastoma Multiforme",
                "Lung Cancer",
                "Gastric Cancer",
                "Kidney Cancer",
                "Liver Cancer",
                "Brain Tumor",
                "Bone Cancer",
                "Myeloma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Duodenal cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Soft Tissue Sarcoma",
                "Squamous cell carcinoma",
                "Squamous cell skin cancer",
                "Squamous-cell carcinoma of the lung",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 12,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gemma-3",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Lung Cancer (LC)",
            "Breast Cancer (BC)",
            "Colorectal Cancer (CRC)",
            "Melanoma (MEL)",
            "Prostate Cancer (PC)",
            "Glioblastoma (GBM)",
            "Pancreatic Cancer (PAC)",
            "Ovarian Cancer (OC)",
            "Leukemia - Acute Myeloid (AML)",
            "Leukemia - Chronic Lymphocytic (CLL)",
            "Lymphoma - Hodgkin's (HL)",
            "Lymphoma - Non-Hodgkin's (NHL)",
            "Thyroid Cancer (TC)",
            "Bladder Cancer (BCa)",
            "Kidney Cancer (RC)",
            "Esophageal Cancer (EC)",
            "Stomach Cancer (GC)",
            "Liver Cancer - Hepatocellular Carcinoma (HCC)",
            "Cervical Cancer (CC)",
            "Vulvar Cancer (VC)",
            "Vaginal Cancer (VaC)",
            "Sarcoma - Osteosarcoma (OS)",
            "Sarcoma - Ewing's (ES)",
            "Neuroblastoma (NB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer (BC)",
                "Colorectal Cancer (CRC)",
                "Melanoma (MEL)",
                "Prostate Cancer (PC)",
                "Glioblastoma (GBM)",
                "Pancreatic Cancer (PAC)",
                "Ovarian Cancer (OC)",
                "Leukemia - Acute Myeloid (AML)",
                "Leukemia - Chronic Lymphocytic (CLL)",
                "Lymphoma - Hodgkin's (HL)",
                "Lymphoma - Non-Hodgkin's (NHL)",
                "Thyroid Cancer (TC)",
                "Bladder Cancer (BCa)",
                "Kidney Cancer (RC)",
                "Esophageal Cancer (EC)",
                "Cervical Cancer (CC)",
                "Vulvar Cancer (VC)",
                "Vaginal Cancer (VaC)",
                "Neuroblastoma (NB)"
            ],
            "mismatches": [
                "Lung Cancer (LC)",
                "Stomach Cancer (GC)",
                "Liver Cancer - Hepatocellular Carcinoma (HCC)",
                "Sarcoma - Osteosarcoma (OS)",
                "Sarcoma - Ewing's (ES)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "B-cell prolymphocytic leukemia",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Lymphoma",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Mucinous carcinoma of the breast",
                "Myxosarcoma",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pineoblastoma",
                "Primitive neuroectodermal tumor",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Vaginal cancer",
                "Vulvar cancer"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BRCA)",
            "Lung Cancer (LC)",
            "Prostate Cancer (PC)",
            "Colorectal Cancer (CRC)",
            "Pancreatic Cancer (PCa)",
            "Leukemia (LEU)",
            "Lymphoma (LYM)",
            "Melanoma (MEL)",
            "Ovarian Cancer (OC)",
            "Cervical Cancer (CC)",
            "Bladder Cancer (BLC)",
            "Kidney Cancer (KC)",
            "Esophageal Cancer (EC)",
            "Liver Cancer (LCa)",
            "Stomach Cancer (SC)",
            "Thyroid Cancer (TC)",
            "Head and Neck Cancer (HNC)",
            "Multiple Myeloma (MM)",
            "Sarcoma (SAR)",
            "Brain Cancer (BC)",
            "Testicular Cancer (TCa)",
            "Endometrial Cancer (ECa)",
            "Gallbladder Cancer (GBC)",
            "Bile Duct Cancer (BDC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer (BRCA)",
                "Prostate Cancer (PC)",
                "Colorectal Cancer (CRC)",
                "Pancreatic Cancer (PCa)",
                "Lymphoma (LYM)",
                "Melanoma (MEL)",
                "Ovarian Cancer (OC)",
                "Cervical Cancer (CC)",
                "Bladder Cancer (BLC)",
                "Kidney Cancer (KC)",
                "Esophageal Cancer (EC)",
                "Thyroid Cancer (TC)",
                "Head and Neck Cancer (HNC)",
                "Multiple Myeloma (MM)",
                "Testicular Cancer (TCa)",
                "Endometrial Cancer (ECa)",
                "Gallbladder Cancer (GBC)"
            ],
            "mismatches": [
                "Lung Cancer (LC)",
                "Leukemia (LEU)",
                "Liver Cancer (LCa)",
                "Stomach Cancer (SC)",
                "Sarcoma (SAR)",
                "Brain Cancer (BC)",
                "Bile Duct Cancer (BDC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cerebral astrocytoma",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Extrahepatic bile duct cancer",
                "Fibrosarcoma",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4o-mini",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BC)",
            "Lung Cancer (LC)",
            "Colorectal Cancer (CC)",
            "Prostate Cancer (PC)",
            "Melanoma (M)",
            "Leukemia (L)",
            "Lymphoma (LY)",
            "Pancreatic Cancer (PC)",
            "Ovarian Cancer (OC)",
            "Esophageal Cancer (EC)",
            "Kidney Cancer (KC)",
            "Bladder Cancer (BC)",
            "Thyroid Cancer (TC)",
            "Stomach Cancer (SC)",
            "Cervical Cancer (CC)",
            "Hepatocellular Carcinoma (HCC)",
            "Multiple Myeloma (MM)",
            "Head and Neck Cancer (HNC)",
            "Testicular Cancer (TC)",
            "Neuroblastoma (NB)",
            "Cholangiocarcinoma (CC)",
            "Mesothelioma (M)",
            "Sarcoma (S)",
            "Basal Cell Carcinoma (BCC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer",
                "Colorectal Cancer",
                "Prostate Cancer",
                "Melanoma",
                "Pancreatic Cancer",
                "Ovarian Cancer",
                "Esophageal Cancer",
                "Kidney Cancer",
                "Bladder Cancer",
                "Thyroid Cancer",
                "Cervical Cancer",
                "Multiple Myeloma",
                "Head and Neck Cancer",
                "Testicular Cancer",
                "Neuroblastoma",
                "Cholangiocarcinoma",
                "Basal Cell Carcinoma"
            ],
            "mismatches": [
                "Lung Cancer",
                "Leukemia",
                "Lymphoma",
                "Stomach Cancer",
                "Hepatocellular Carcinoma",
                "Mesothelioma",
                "Sarcoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Extrahepatic bile duct cancer",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Gastrointestinal stromal tumor (GIST)",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mesothelioma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pineoblastoma",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Skin adnexal tumors (e.g. sebaceous carcinoma)",
                "Soft Tissue Sarcoma",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-4-32k",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer (CA)",
            "Melanoma (MEL)",
            "Lung Cancer (LC)",
            "Breast Cancer (BC)",
            "Prostate Cancer (PC)",
            "Colorectal Cancer (CRC)",
            "Leukemia (LEU)",
            "Non-Hodgkin Lymphoma (NHL)",
            "Pancreatic Cancer (PC)",
            "Ovarian Cancer (OC)",
            "Bladder Cancer (BLAC)",
            "Kidney Cancer (KC)",
            "Thyroid Cancer (TC)",
            "Cervical Cancer (CC)",
            "Esophageal Cancer (EC)",
            "Stomach Cancer (SC)",
            "Liver Cancer (LIC)",
            "Oral Cancer (ORC)",
            "Skin Cancer (SKC)",
            "Testicular Cancer (TEC)",
            "Bone Cancer (BOC)",
            "Brain Cancer (BRC)",
            "Uterine Cancer (UC)",
            "Lymphoma (LYM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Melanoma",
                "Breast Cancer",
                "Prostate Cancer",
                "Colorectal Cancer",
                "Non-Hodgkin Lymphoma",
                "Pancreatic Cancer",
                "Ovarian Cancer",
                "Bladder Cancer",
                "Kidney Cancer",
                "Thyroid Cancer",
                "Cervical Cancer",
                "Esophageal Cancer",
                "Oral Cancer",
                "Testicular Cancer",
                "Lymphoma"
            ],
            "mismatches": [
                "Cancer (CA)",
                "Lung Cancer (LC)",
                "Leukemia (LEU)",
                "Stomach Cancer (SC)",
                "Liver Cancer (LIC)",
                "Skin Cancer (SKC)",
                "Bone Cancer (BOC)",
                "Brain Cancer (BRC)",
                "Uterine Cancer (UC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Endometrial cancer",
                "Esophageal cancer",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Mucinous carcinoma of the breast",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "gpt-35-turbo-16k",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer",
            "Alzheimer's disease",
            "Parkinson's disease",
            "Multiple sclerosis",
            "Diabetes",
            "Huntington's disease",
            "Lung cancer",
            "Breast cancer",
            "Leukemia",
            "Colon cancer",
            "Melanoma",
            "Prostate cancer",
            "Ovarian cancer",
            "Pancreatic cancer",
            "Brain tumor",
            "Lymphoma",
            "Thyroid cancer",
            "Liver cancer",
            "Kidney cancer",
            "Bladder cancer",
            "Glioblastoma",
            "Cervical cancer",
            "Stomach cancer",
            "Esophageal cancer"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Colon cancer",
                "Melanoma",
                "Prostate cancer",
                "Ovarian cancer",
                "Pancreatic cancer",
                "Lymphoma",
                "Thyroid cancer",
                "Kidney cancer",
                "Bladder cancer",
                "Glioblastoma",
                "Cervical cancer",
                "Esophageal cancer"
            ],
            "mismatches": [
                "Cancer",
                "Alzheimer's disease",
                "Parkinson's disease",
                "Multiple sclerosis",
                "Diabetes",
                "Huntington's disease",
                "Lung cancer",
                "Leukemia",
                "Brain tumor",
                "Liver cancer",
                "Stomach cancer"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Brain and nervous system",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endocrine system",
                "Endometrial cancer",
                "Esophageal cancer",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Glucagonoma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Lymphoma",
                "Melanoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oligodendroglioma",
                "Oral cancer",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "o1-mini",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast cancer (BRCA)",
            "Lung cancer (LC)",
            "Prostate cancer (PCa)",
            "Colorectal cancer",
            "Pancreatic cancer",
            "Melanoma",
            "Leukemia",
            "Lymphoma",
            "Glioblastoma multiforme (GBM)",
            "Ovarian cancer",
            "Cervical cancer",
            "Hepatocellular carcinoma (HCC)",
            "Esophageal cancer",
            "Gastric cancer",
            "Bladder cancer",
            "Thyroid cancer",
            "Multiple myeloma",
            "Sarcoma",
            "Oral cancer",
            "Renal cell carcinoma (RCC)",
            "Testicular cancer",
            "Endometrial cancer",
            "Acute myeloid leukemia (AML)",
            "Acute lymphoblastic leukemia (ALL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Prostate cancer",
                "Colorectal cancer",
                "Pancreatic cancer",
                "Melanoma",
                "Glioblastoma",
                "Ovarian cancer",
                "Cervical cancer",
                "Esophageal cancer",
                "Gastric (stomach) cancer",
                "Bladder cancer",
                "Thyroid cancer",
                "Multiple myeloma/plasma cell neoplasm",
                "Oral cancer",
                "Renal cell carcinoma",
                "Testicular cancer",
                "Endometrial cancer",
                "Acute myeloid leukemia",
                "Acute lymphoblastic leukemia"
            ],
            "mismatches": [
                "Lung cancer (LC)",
                "Leukemia",
                "Lymphoma",
                "Glioblastoma multiforme (GBM)",
                "Hepatocellular carcinoma (HCC)",
                "Sarcoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Precursor B lymphoblastic leukemia",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 19,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer",
            "Leukemia",
            "Lymphoma",
            "Melanoma",
            "Glioblastoma",
            "Sarcoma",
            "Carcinoma",
            "Myeloma",
            "Mesothelioma",
            "Neuroblastoma",
            "Retinoblastoma",
            "Hepatoblastoma",
            "Wilms tumor",
            "Osteosarcoma",
            "Ewing sarcoma",
            "Rhabdomyosarcoma",
            "Chondrosarcoma",
            "Kaposi's sarcoma",
            "Adenocarcinoma",
            "Squamous cell carcinoma",
            "Basal cell carcinoma",
            "Merkel cell carcinoma",
            "Neuroendocrine tumor",
            "Teratoma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Glioblastoma",
                "Melanoma",
                "Neuroblastoma",
                "Retinoblastoma",
                "Hepatoblastoma",
                "Wilms tumor",
                "Osteosarcoma",
                "Chondrosarcoma",
                "Rhabdomyosarcoma",
                "Kaposi's sarcoma",
                "Basal cell carcinoma",
                "Merkel cell carcinoma",
                "Teratoma"
            ],
            "mismatches": [
                "Cancer",
                "Leukemia",
                "Lymphoma",
                "Sarcoma",
                "Carcinoma",
                "Myeloma",
                "Mesothelioma",
                "Ewing sarcoma",
                "Adenocarcinoma",
                "Squamous cell carcinoma",
                "Neuroendocrine tumor"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Adenocarcinoma of the lung",
                "Angiosarcoma",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Breast cancer",
                "Carcinoid",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Embryonal carcinoma",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Fibrosarcoma",
                "Glioblastoma",
                "Glioma",
                "Hemangioblastoma",
                "Hepatoblasoma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kaposi sarcoma",
                "Kidney cancer",
                "Lymphoma",
                "Medulloblastoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mesothelioma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Osteosarcoma",
                "Primitive neuroectodermal tumor",
                "Renal cell carcinoma",
                "Retinoblastoma",
                "Rhabdomyosarcoma",
                "Seminoma",
                "Soft Tissue Sarcoma",
                "Somatostatinoma",
                "Squamous cell carcinoma",
                "Squamous cell skin cancer",
                "Squamous-cell carcinoma of the lung",
                "Teratoma",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Lung cancer (LC)",
            "Breast cancer (BC)",
            "Prostate cancer (PC)",
            "Colorectal cancer (CRC)",
            "Melanoma",
            "Leukemia",
            "Lymphoma",
            "Brain cancer",
            "Ovarian cancer (OC)",
            "Pancreatic cancer",
            "Liver cancer",
            "Kidney cancer",
            "Thyroid cancer",
            "Bladder cancer",
            "Esophageal cancer",
            "Stomach cancer",
            "Uterine cancer",
            "Cervical cancer",
            "Oral cancer",
            "Sarcoma",
            "Testicular cancer",
            "Bone cancer",
            "Skin cancer (non-melanoma)",
            "Mesothelioma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Prostate cancer",
                "Melanoma",
                "Lymphoma",
                "Ovarian cancer",
                "Pancreatic cancer",
                "Kidney cancer",
                "Thyroid cancer",
                "Bladder cancer",
                "Esophageal cancer",
                "Cervical cancer",
                "Oral cancer",
                "Testicular cancer",
                "Mesothelioma"
            ],
            "mismatches": [
                "Lung cancer (LC)",
                "Colorectal cancer (CRC)",
                "Leukemia",
                "Brain cancer",
                "Liver cancer",
                "Stomach cancer",
                "Uterine cancer",
                "Sarcoma",
                "Bone cancer",
                "Skin cancer (non-melanoma)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Mucinous carcinoma of the breast",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer (Ca)",
            "Leukemia (Leu)",
            "Lymphoma (Lym)",
            "Sarcoma (Sar)",
            "Carcinoma (Car)",
            "Melanoma (Mel)",
            "Glioma (Gli)",
            "Myeloma (Mye)",
            "Neoplasm (Neo)",
            "Tumor (Tum)",
            "Metastasis (Met)",
            "Malignancy (Mal)",
            "Blastoma (Bla)",
            "Adenocarcinoma (Ade)",
            "Mesothelioma (Mes)",
            "Retinoblastoma (Ret)",
            "Neuroblastoma (Neu)",
            "Teratoma (Ter)",
            "Seminoma (Sem)",
            "Choriocarcinoma (Cho)",
            "Fibrosarcoma (Fib)",
            "Liposarcoma (Lip)",
            "Rhabdomyosarcoma (Rha)",
            "Osteosarcoma (Ost)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Blastoma",
                "Glioma",
                "Melanoma",
                "Neuroblastoma",
                "Retinoblastoma",
                "Teratoma",
                "Seminoma",
                "Choriocarcinoma",
                "Fibrosarcoma",
                "Liposarcoma",
                "Rhabdomyosarcoma",
                "Osteosarcoma",
                "Mesothelioma"
            ],
            "mismatches": [
                "Cancer (Ca)",
                "Leukemia (Leu)",
                "Lymphoma (Lym)",
                "Sarcoma (Sar)",
                "Carcinoma (Car)",
                "Myeloma (Mye)",
                "Neoplasm (Neo)",
                "Tumor (Tum)",
                "Metastasis (Met)",
                "Malignancy (Mal)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Adenocarcinoma of the lung",
                "Angiosarcoma",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Blastoma",
                "Breast cancer",
                "Carcinoid tumor, gastrointestinal",
                "Chondrosarcoma",
                "Choriocarcinoma",
                "Chronic myelogenous leukemia",
                "Embryonal carcinoma",
                "Ewing's sarcoma",
                "Fibrosarcoma",
                "Gestational trophoblastic tumor",
                "Glioblastoma",
                "Glioma",
                "Hemangioblastoma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Large-cell lung carcinoma",
                "Leiomyosarcoma",
                "Liposarcoma",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Pleuropulmonary blastoma",
                "Prostate cancer",
                "Renal cell carcinoma",
                "Retinoblastoma",
                "Rhabdomyosarcoma",
                "Seminoma",
                "Serous tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell carcinoma",
                "Teratoma",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 13,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Leukemia (LKM)",
            "Melanoma (MLN)",
            "Glioblastoma (GBM)",
            "Breast Cancer (BRC)",
            "Lung Cancer (LNC)",
            "Prostate Cancer (PRC)",
            "Colorectal Cancer (CRC)",
            "Pancreatic Cancer (PNC)",
            "Ovarian Cancer (OVC)",
            "Liver Cancer (LVC)",
            "Lymphoma (LYM)",
            "Sarcoma (SRC)",
            "Multiple Myeloma (MM)",
            "Thyroid Cancer (THC)",
            "Kidney Cancer (KC)",
            "Bladder Cancer (BC)",
            "Esophageal Cancer (EC)",
            "Stomach Cancer (SC)",
            "Head and Neck Cancer (HNC)",
            "Brain Cancer (BRC)",
            "Bone Cancer (BC)",
            "Skin Cancer (SC)",
            "Cervical Cancer (CC)",
            "Testicular Cancer (TC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Melanoma",
                "Glioblastoma",
                "Breast Cancer",
                "Prostate Cancer",
                "Colorectal Cancer",
                "Pancreatic Cancer",
                "Ovarian Cancer",
                "Lymphoma",
                "Multiple Myeloma",
                "Thyroid Cancer",
                "Kidney Cancer",
                "Bladder Cancer",
                "Esophageal Cancer",
                "Head and Neck Cancer",
                "Cervical Cancer",
                "Testicular Cancer"
            ],
            "mismatches": [
                "Leukemia (LKM)",
                "Lung Cancer (LNC)",
                "Liver Cancer (LVC)",
                "Sarcoma (SRC)",
                "Stomach Cancer (SC)",
                "Brain Cancer (BRC)",
                "Bone Cancer (BC)",
                "Skin Cancer (SC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Cancer (Various types)",
            "Lymphoma",
            "Leukemia",
            "Melanoma",
            "Brain Tumor",
            "Breast Cancer",
            "Colorectal Cancer",
            "Pancreatic Cancer",
            "Prostate Cancer",
            "Lung Cancer",
            "Skin Cancer",
            "Ovarian Cancer",
            "Cervical Cancer",
            "Uterine Cancer",
            "Stomach Cancer",
            "Esophageal Cancer",
            "Liver Cancer",
            "Kidney Cancer",
            "Bladder Cancer",
            "Osteosarcoma",
            "Ewing's Sarcoma",
            "Rhabdomyosarcoma",
            "Glioblastoma",
            "Medulloblastoma",
            "Retinoblastoma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Lymphoma",
                "Melanoma",
                "Breast Cancer",
                "Colorectal Cancer",
                "Pancreatic Cancer",
                "Prostate Cancer",
                "Osteosarcoma",
                "Ewing's Sarcoma",
                "Rhabdomyosarcoma",
                "Glioblastoma",
                "Medulloblastoma",
                "Esophageal Cancer",
                "Kidney Cancer",
                "Bladder Cancer",
                "Cervical Cancer"
            ],
            "mismatches": [
                "Cancer (Various types)",
                "Leukemia",
                "Brain Tumor",
                "Lung Cancer",
                "Skin Cancer",
                "Ovarian Cancer",
                "Uterine Cancer",
                "Stomach Cancer",
                "Liver Cancer",
                "Retinoblastoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Ependymoma",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Lymphoma",
                "Medulloblastoma",
                "Melanoma",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Primitive neuroectodermal tumor",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Rhabdomyosarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 15,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": "[\n\"Adenocarcinoma (ADC)\",\n\"Basal cell carcinoma (BCC)\",\n\"Squamous cell carcinoma (SCC)\",\n\"Melanoma\",\n\"Leukemia\",\n\"Lymphoma\",\n\"Multiple myeloma\",\n\"Breast cancer\",\n\"Colorectal cancer\",\n\"Lung cancer\",\n\"Prostate cancer\",\n\"Ovarian cancer\",\n\"Pancreatic cancer\",\n\"Brain cancer\",\n\"Bladder cancer\",\n\"Kidney cancer\",\n\"Thyroid cancer\",\n\"Hepatocellular carcinoma (HCC)\",\n\"Glioblastoma\",\n\"Rhabdomyosarcoma\",\n\"Osteosarcoma\",\n\"Ewing's sarcoma\",\n\"Chondrosarcoma\",\n\"Liposarcoma\"",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents-from-selection-criteria",
        "responder": "amazon.titan-text-express-v1",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Acute myeloid leukemia",
            "Acute lymphocytic leukemia",
            "Burkitt lymphoma",
            "Chronic myeloid leukemia",
            "Chronic lymphocytic leukemia",
            "Hairy cell leukemia",
            "Hodgkin's disease",
            "Leukemia",
            "Lymphoma",
            "Multiple myeloma",
            "Myelodysplastic syndrome",
            "Myeloproliferative neoplasm",
            "Myeloid leukemia",
            "Neuroblastoma",
            "Prostate cancer",
            "Testicular cancer",
            "Thyroid cancer",
            "Multiple myeloma",
            "Myelodysplastic syndrome",
            "Myeloproliferative neoplasm",
            "Myeloid leukemia",
            "Neuroblastoma",
            "Prostate cancer",
            "Testicular cancer",
            "Thyroid cancer"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acute myeloid leukemia",
                "Chronic lymphocytic leukemia",
                "Hairy cell leukemia",
                "Lymphoma",
                "Neuroblastoma",
                "Prostate cancer",
                "Testicular cancer",
                "Thyroid cancer"
            ],
            "mismatches": [
                "Acute lymphocytic leukemia",
                "Burkitt lymphoma",
                "Chronic myeloid leukemia",
                "Hodgkin's disease",
                "Leukemia",
                "Multiple myeloma",
                "Myelodysplastic syndrome",
                "Myeloproliferative neoplasm",
                "Myeloid leukemia"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "B-cell prolymphocytic leukemia",
                "Burkitt's lymphoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Diffuse large B-cell lymphoma",
                "Follicular lymphoma",
                "Hairy cell leukemia",
                "Head and neck cancer",
                "Hodgkin's lymphoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Medulloblastoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myelodysplastic syndromes",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Penile cancer",
                "Prostate cancer",
                "Sertoli cell tumour",
                "Testicular cancer",
                "Thyroid cancer"
            ],
            "TP": 8,
            "FP": 9,
            "FN": 0
        }
    }
]