[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast cancer (BC)",
            "Lung cancer (LC)",
            "Prostate cancer (PCa)",
            "Colorectal cancer (CRC)",
            "Leukemia (Leu)",
            "Lymphoma (LY)",
            "Melanoma (Mel)",
            "Ovarian cancer (OC)",
            "Pancreatic cancer (PC)",
            "Thyroid cancer (TC)",
            "Bladder cancer (BCa)",
            "Kidney cancer (KC)",
            "Brain cancer (BCa)",
            "Esophageal cancer (EC)",
            "Stomach cancer (GC)",
            "Liver cancer (HCC)",
            "Glioblastoma (GBM)",
            "Mesothelioma (Mes)",
            "Bone cancer (BC)",
            "Head and neck cancer (HNC)",
            "Testicular cancer (TCa)",
            "Cervical cancer (CC)",
            "Uterine cancer (UC)",
            "Soft tissue sarcoma (STS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Prostate cancer",
                "Colorectal Cancer",
                "Lymphoma",
                "Melanoma",
                "Ovarian cancer",
                "Pancreatic Cancer",
                "Thyroid cancer",
                "Bladder cancer",
                "Kidney cancer",
                "Esophageal cancer",
                "Glioblastoma",
                "Mesothelioma",
                "Head and neck cancer",
                "Testicular cancer",
                "Cervical cancer",
                "Soft Tissue Sarcoma"
            ],
            "mismatches": [
                "Lung cancer (LC)",
                "Leukemia (Leu)",
                "Brain cancer (BCa)",
                "Stomach cancer (GC)",
                "Liver cancer (HCC)",
                "Bone cancer (BC)",
                "Uterine cancer (UC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cerebral astrocytoma",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Mucinous carcinoma of the breast",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BC)",
            "Lung Cancer (LC)",
            "Prostate Cancer (PC)",
            "Colorectal Cancer (CRC)",
            "Melanoma (Skin Cancer)",
            "Leukemia",
            "Lymphoma",
            "Ovarian Cancer",
            "Pancreatic Cancer",
            "Bladder Cancer",
            "Thyroid Cancer",
            "Cervical Cancer",
            "Endometrial Cancer",
            "Glioblastoma (GBM)",
            "Renal Cell Carcinoma (RCC)",
            "Esophageal Cancer",
            "Head and Neck Cancer",
            "Stomach Cancer (Gastric Cancer)",
            "Liver Cancer (Hepatocellular Carcinoma, HCC)",
            "Multiple Myeloma",
            "Non-Hodgkin Lymphoma",
            "Hodgkin Lymphoma",
            "Cholangiocarcinoma",
            "Mesothelioma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer",
                "Prostate Cancer",
                "Colorectal Cancer",
                "Melanoma",
                "Lymphoma",
                "Ovarian Cancer",
                "Pancreatic Cancer",
                "Bladder Cancer",
                "Thyroid Cancer",
                "Cervical Cancer",
                "Endometrial Cancer",
                "Glioblastoma",
                "Renal Cell Carcinoma",
                "Esophageal Cancer",
                "Head and Neck Cancer",
                "Cholangiocarcinoma",
                "Mesothelioma"
            ],
            "mismatches": [
                "Lung Cancer",
                "Leukemia",
                "Stomach Cancer",
                "Liver Cancer",
                "Multiple Myeloma",
                "Non-Hodgkin Lymphoma",
                "Hodgkin Lymphoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Extrahepatic bile duct cancer",
                "Follicular lymphoma",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Squamous cell skin cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Acute Lymphoblastic Leukemia (ALL)",
            "Acute Myeloid Leukemia (AML)",
            "Adenocarcinoma",
            "Bladder Cancer",
            "Brain Tumor",
            "Breast Cancer",
            "Colorectal Cancer",
            "Endometrial Cancer",
            "Esophageal Cancer",
            "Gastric Cancer",
            "Glioblastoma",
            "Head and Neck Cancer",
            "Hepatocellular Carcinoma (HCC)",
            "Kidney Cancer",
            "Lung Cancer",
            "Lymphoma",
            "Melanoma",
            "Multiple Myeloma",
            "Ovarian Cancer",
            "Pancreatic Cancer",
            "Prostate Cancer",
            "Sarcoma",
            "Thyroid Cancer",
            "Uterine Cancer"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acute Lymphoblastic Leukemia (ALL)",
                "Acute Myeloid Leukemia (AML)",
                "Bladder Cancer",
                "Breast Cancer",
                "Colorectal Cancer",
                "Endometrial Cancer",
                "Esophageal Cancer",
                "Gastric Cancer",
                "Glioblastoma",
                "Head and Neck Cancer",
                "Kidney Cancer",
                "Lymphoma",
                "Melanoma",
                "Multiple Myeloma",
                "Ovarian Cancer",
                "Pancreatic Cancer",
                "Prostate Cancer",
                "Thyroid Cancer"
            ],
            "mismatches": [
                "Adenocarcinoma",
                "Brain Tumor",
                "Hepatocellular Carcinoma (HCC)",
                "Lung Cancer",
                "Sarcoma",
                "Uterine Cancer"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Adenocarcinoma of the lung",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Precursor B lymphoblastic leukemia",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Acute Lymphoblastic Leukemia (ALL)",
            "Breast Cancer (BCa)",
            "Lung Cancer (LCa)",
            "Colorectal Cancer (CRC)",
            "Prostate Cancer (PCa)",
            "Melanoma (Mel)",
            "Glioma (Gli)",
            "Osteosarcoma (OS)",
            "Ewing Sarcoma (ES)",
            "Neuroblastoma (NB)",
            "Retinoblastoma (Rb)",
            "Chronic Myeloid Leukemia (CML)",
            "Chronic Lymphocytic Leukemia (CLL)",
            "Multiple Myeloma (MM)",
            "Pancreatic Cancer (PaCa)",
            "Ovarian Cancer (OC)",
            "Thyroid Cancer (ThyCa)",
            "Bladder Cancer (BC)",
            "Kidney Cancer (RCC)",
            "Esophageal Cancer (ECa)",
            "Gastric Cancer (GC)",
            "Head and Neck Squamous Cell Carcinoma (HNSCC)",
            "Cervical Cancer (CxCa)",
            "Endometrial Cancer (EC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acute lymphoblastic leukemia",
                "Breast cancer",
                "Colorectal Cancer",
                "Prostate cancer",
                "Melanoma",
                "Glioma",
                "Osteosarcoma",
                "Ewing's sarcoma",
                "Neuroblastoma",
                "Retinoblastoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Multiple myeloma/plasma cell neoplasm",
                "Pancreatic Cancer",
                "Ovarian cancer",
                "Thyroid cancer",
                "Bladder cancer",
                "Kidney cancer",
                "Esophageal cancer",
                "Gastric (stomach) cancer",
                "Cervical cancer",
                "Endometrial cancer"
            ],
            "mismatches": [
                "Lung Cancer (LCa)",
                "Head and Neck Squamous Cell Carcinoma (HNSCC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hemangioblastoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Laryngeal cancer",
                "Lymphoma",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myelodysplastic syndromes",
                "Myxosarcoma",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pineoblastoma",
                "Precursor B lymphoblastic leukemia",
                "Primitive neuroectodermal tumor",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Retinoblastoma",
                "Squamous-cell carcinoma of the lung",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Uveal melanoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 22,
            "FP": 2,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BC)",
            "Lung Cancer (LC)",
            "Prostate Cancer (PC)",
            "Colorectal Cancer (CRC)",
            "Melanoma",
            "Leukemia",
            "Lymphoma",
            "Pancreatic Cancer (PCa)",
            "Ovarian Cancer (OC)",
            "Cervical Cancer (CC)",
            "Bladder Cancer (BCa)",
            "Esophageal Cancer (EC)",
            "Stomach Cancer (Gastric Cancer, GC)",
            "Liver Cancer (Hepatocellular Carcinoma, HCC)",
            "Kidney Cancer (Renal Cell Carcinoma, RCC)",
            "Thyroid Cancer (TC)",
            "Endometrial Cancer (Uterine Cancer, UC)",
            "Bone Cancer",
            "Brain Cancer (Glioma)",
            "Testicular Cancer (TCa)",
            "Head and Neck Cancer (HNC)",
            "Multiple Myeloma (MM)",
            "Skin Cancer (Non-Melanoma)",
            "Sarcoma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Prostate cancer",
                "Colorectal Cancer",
                "Melanoma",
                "Pancreatic Cancer",
                "Ovarian cancer",
                "Cervical cancer",
                "Bladder cancer",
                "Esophageal cancer",
                "Gastric (stomach) cancer",
                "Hepatocellular cancer",
                "Kidney cancer",
                "Thyroid cancer",
                "Endometrial cancer",
                "Glioma",
                "Testicular cancer",
                "Head and neck cancer",
                "Multiple myeloma/plasma cell neoplasm"
            ],
            "mismatches": [
                "Lung Cancer (LC)",
                "Leukemia",
                "Lymphoma",
                "Bone Cancer",
                "Brain Cancer (Glioma)",
                "Skin Cancer (Non-Melanoma)",
                "Sarcoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Carcinoid tumor, gastrointestinal",
                "Cervical cancer",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 18,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BRCA)",
            "Lung Cancer (NSCLC)",
            "Colorectal Cancer (CRC)",
            "Prostate Cancer (PCa)",
            "Leukemia (ALL)",
            "Lymphoma (NHL)",
            "Melanoma (MM)",
            "Pancreatic Cancer (PC)",
            "Ovarian Cancer (OC)",
            "Cervical Cancer (CC)",
            "Hepatocellular Carcinoma (HCC)",
            "Esophageal Cancer (EC)",
            "Bladder Cancer (BC)",
            "Kidney Cancer (RCC)",
            "Thyroid Cancer (TC)",
            "Head and Neck Cancer (HNC)",
            "Stomach Cancer (GC)",
            "Testicular Cancer (TC)",
            "Multiple Myeloma (MM)",
            "Cholangiocarcinoma (CC)",
            "Neuroblastoma (NB)",
            "Gastric Lymphoma (GL)",
            "Soft Tissue Sarcoma (STS)",
            "Retinoblastoma (RB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer",
                "Colorectal Cancer",
                "Prostate Cancer",
                "Melanoma",
                "Pancreatic Cancer",
                "Ovarian Cancer",
                "Cervical Cancer",
                "Esophageal Cancer",
                "Bladder Cancer",
                "Kidney Cancer",
                "Thyroid Cancer",
                "Head and Neck Cancer",
                "Testicular Cancer",
                "Cholangiocarcinoma",
                "Neuroblastoma",
                "Soft Tissue Sarcoma",
                "Retinoblastoma"
            ],
            "mismatches": [
                "Lung Cancer (NSCLC)",
                "Leukemia (ALL)",
                "Lymphoma (NHL)",
                "Hepatocellular Carcinoma (HCC)",
                "Stomach Cancer (GC)",
                "Multiple Myeloma (MM)",
                "Gastric Lymphoma (GL)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Extrahepatic bile duct cancer",
                "Fibrosarcoma",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Head and neck cancer",
                "Hemangioblastoma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mucosa-associated lymphoid tissue lymphoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Neuroblastoma",
                "Neurofibroma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pineoblastoma",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Retinoblastoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 17,
            "FP": 7,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BC)",
            "Lung Cancer (LC)",
            "Colorectal Cancer (CRC)",
            "Prostate Cancer (PC)",
            "Skin Cancer (SC)",
            "Bladder Cancer (BlC)",
            "Kidney Cancer (KC)",
            "Pancreatic Cancer (PaC)",
            "Ovarian Cancer (OC)",
            "Leukemia (LK)",
            "Liver Cancer (LiC)",
            "Stomach Cancer (StC)",
            "Esophageal Cancer (EC)",
            "Cervical Cancer (CC)",
            "Testicular Cancer (TC)",
            "Thyroid Cancer (ThC)",
            "Lymphoma (LM)",
            "Head and Neck Cancer (HNC)",
            "Bone Cancer (BoC)",
            "Gallbladder Cancer (GBC)",
            "Uterine Cancer (UC)",
            "Oral Cancer (OrC)",
            "Brain Cancer (BrC)",
            "Adrenal Cancer (AC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Colorectal Cancer",
                "Prostate cancer",
                "Bladder cancer",
                "Kidney cancer",
                "Pancreatic Cancer",
                "Ovarian cancer",
                "Esophageal cancer",
                "Cervical cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Lymphoma",
                "Head and neck cancer",
                "Gallbladder cancer",
                "Oral cancer"
            ],
            "mismatches": [
                "Lung Cancer (LC)",
                "Skin Cancer (SC)",
                "Leukemia (LK)",
                "Liver Cancer (LiC)",
                "Stomach Cancer (StC)",
                "Bone Cancer (BoC)",
                "Uterine Cancer (UC)",
                "Brain Cancer (BrC)",
                "Adrenal Cancer (AC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Adrenocortical adenoma",
                "Adrenocortical carcinoma",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic lymphocytic leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Extrahepatic bile duct cancer",
                "Gallbladder cancer",
                "Gastric (stomach) cancer",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Mucinous carcinoma of the breast",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pheochromocytoma",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast cancer",
            "Lung cancer",
            "Prostate cancer",
            "Colon cancer",
            "Pancreatic cancer",
            "Leukemia",
            "Lymphoma",
            "Melanoma",
            "Ovarian cancer",
            "Bladder cancer",
            "Liver cancer",
            "Kidney cancer",
            "Brain cancer",
            "Thyroid cancer",
            "Gastric cancer",
            "Esophageal cancer",
            "Cervical cancer",
            "Testicular cancer",
            "Bone cancer",
            "Skin cancer",
            "Hodgkin's lymphoma",
            "Non-Hodgkin's lymphoma",
            "Multiple myeloma",
            "Sarcoma"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Prostate cancer",
                "Colon cancer",
                "Pancreatic cancer",
                "Melanoma",
                "Ovarian cancer",
                "Bladder cancer",
                "Kidney cancer",
                "Thyroid cancer",
                "Gastric cancer",
                "Esophageal cancer",
                "Cervical cancer",
                "Testicular cancer",
                "Hodgkin's lymphoma",
                "Non-Hodgkin's lymphoma"
            ],
            "mismatches": [
                "Lung cancer",
                "Leukemia",
                "Lymphoma",
                "Liver cancer",
                "Brain cancer",
                "Bone cancer",
                "Skin cancer",
                "Multiple myeloma",
                "Sarcoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chondrosarcoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 15,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Breast Cancer (BRCA)",
            "Lung Cancer (LC)",
            "Prostate Cancer (PCa)",
            "Colorectal Cancer (CRC)",
            "Pancreatic Ductal Adenocarcinoma (PDAC)",
            "Acute Myeloid Leukemia (AML)",
            "Chronic Lymphocytic Leukemia (CLL)",
            "Melanoma (MM)",
            "Ovarian Cancer (OvC)",
            "Gastric Cancer (GC)",
            "Hepatocellular Carcinoma (HCC)",
            "Esophageal Cancer (EC)",
            "Bladder Cancer (BC)",
            "Thyroid Cancer (TC)",
            "Renal Cell Carcinoma (RCC)",
            "Cervical Cancer (CC)",
            "Multiple Myeloma (MM)",
            "Glioblastoma Multiforme (GBM)",
            "Head and Neck Cancer (HNC)",
            "Sarcoma",
            "Testicular Cancer (TC)",
            "Endometrial Cancer",
            "Hodgkin Lymphoma (HL)",
            "Non-Hodgkin Lymphoma (NHL)",
            "Neuroblastoma (NB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast Cancer (BRCA)",
                "Prostate Cancer (PCa)",
                "Colorectal Cancer (CRC)",
                "Acute Myeloid Leukemia (AML)",
                "Chronic Lymphocytic Leukemia (CLL)",
                "Melanoma (MM)",
                "Ovarian Cancer (OvC)",
                "Gastric Cancer (GC)",
                "Esophageal Cancer (EC)",
                "Bladder Cancer (BC)",
                "Thyroid Cancer (TC)",
                "Renal Cell Carcinoma (RCC)",
                "Cervical Cancer (CC)",
                "Multiple Myeloma (MM)",
                "Head and Neck Cancer (HNC)",
                "Testicular Cancer (TC)",
                "Endometrial Cancer",
                "Hodgkin Lymphoma (HL)",
                "Non-Hodgkin Lymphoma (NHL)"
            ],
            "mismatches": [
                "Lung Cancer (LC)",
                "Pancreatic Ductal Adenocarcinoma (PDAC)",
                "Glioblastoma Multiforme (GBM)",
                "Sarcoma",
                "Neuroblastoma (NB)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Follicular lymphoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 19,
            "FP": 5,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Lung cancer (LUAD)",
            "Breast cancer (BRCA)",
            "Colorectal cancer (CRC)",
            "Prostate cancer (PRAD)",
            "Leukemia (AML)",
            "Melanoma (MEL)",
            "Pancreatic cancer (PAAD)",
            "Ovarian cancer (OV)",
            "Glioblastoma (GBM)",
            "Liver cancer (HCC)",
            "Thyroid cancer (THCA)",
            "Bladder cancer (BLCA)",
            "Kidney cancer (KIRC)",
            "Esophageal cancer (ESCA)",
            "Lymphoma (DLBCL)",
            "Cervical cancer (CESC)",
            "Stomach cancer (STAD)",
            "Head and neck cancer (HNSC)",
            "Multiple myeloma (MM)",
            "Sarcoma (SARC)",
            "Mesothelioma (MESO)",
            "Testicular cancer (TGCT)",
            "Neuroblastoma (NBL)",
            "Retinoblastoma (RB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Colorectal Cancer",
                "Prostate cancer",
                "Acute myeloid leukemia",
                "Melanoma",
                "Pancreatic Cancer",
                "Ovarian cancer",
                "Glioblastoma",
                "Thyroid cancer",
                "Bladder cancer",
                "Kidney cancer",
                "Esophageal cancer",
                "Diffuse large B-cell lymphoma",
                "Cervical cancer",
                "Gastric (stomach) cancer",
                "Head and neck cancer",
                "Multiple myeloma/plasma cell neoplasm",
                "Mesothelioma",
                "Testicular cancer",
                "Neuroblastoma",
                "Retinoblastoma"
            ],
            "mismatches": [
                "Lung cancer (LUAD)",
                "Liver cancer (HCC)",
                "Sarcoma (SARC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Appendix cancer",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hemangioblastoma",
                "Hepatocellular cancer",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Mesothelioma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Neuroblastoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Parathyroid cancer",
                "Penile cancer",
                "Pharyngeal cancer",
                "Pineoblastoma",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Retinoblastoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell carcinoma",
                "Squamous-cell carcinoma of the lung",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 21,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Lung cancer (NSCLC/SCLC)",
            "Breast cancer (IDC/ILC)",
            "Prostate cancer",
            "Colorectal cancer (CRC)",
            "Skin cancer (Melanoma/BCC/SCC)",
            "Leukemia (ALL/AML/CLL/CML)",
            "Lymphoma (HL/NHL)",
            "Brain cancer (Glioblastoma)",
            "Ovarian cancer",
            "Pancreatic cancer",
            "Liver cancer (HCC)",
            "Stomach cancer",
            "Esophageal cancer",
            "Kidney cancer (RCC)",
            "Bladder cancer",
            "Thyroid cancer",
            "Endometrial cancer",
            "Cervical cancer",
            "Oral cancer",
            "Bone cancer (Osteosarcoma/Ewing's sarcoma)",
            "Testicular cancer",
            "Mesothelioma",
            "Neuroendocrine tumors (NETs)",
            "Head and neck cancer (HNSCC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Prostate cancer",
                "Ovarian cancer",
                "Pancreatic cancer",
                "Esophageal cancer",
                "Bladder cancer",
                "Thyroid cancer",
                "Endometrial cancer",
                "Cervical cancer",
                "Oral cancer",
                "Testicular cancer",
                "Mesothelioma",
                "Head and neck cancer"
            ],
            "mismatches": [
                "Lung cancer (NSCLC/SCLC)",
                "Breast cancer (IDC/ILC)",
                "Colorectal cancer (CRC)",
                "Skin cancer (Melanoma/BCC/SCC)",
                "Leukemia (ALL/AML/CLL/CML)",
                "Lymphoma (HL/NHL)",
                "Brain cancer (Glioblastoma)",
                "Liver cancer (HCC)",
                "Stomach cancer",
                "Kidney cancer (RCC)",
                "Bone cancer (Osteosarcoma/Ewing's sarcoma)",
                "Neuroendocrine tumors (NETs)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Malignant fibrous histiocytoma of bone/osteosarcoma",
                "Melanoma",
                "Mesothelioma",
                "Multiple endocrine neoplasia syndrome",
                "Myxosarcoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Sertoli cell tumour",
                "Somatostatinoma",
                "Squamous cell skin cancer",
                "Squamous-cell carcinoma of the lung",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Lung cancer (NSCLC)",
            "Breast cancer (BC)",
            "Colorectal cancer (CRC)",
            "Prostate cancer (PCa)",
            "Melanoma (MM)",
            "Leukemia (AML, ALL, CLL, CML)",
            "Lymphoma (NHL, HL)",
            "Pancreatic cancer (PDAC)",
            "Ovarian cancer (OC)",
            "Liver cancer (HCC)",
            "Brain cancer (GBM)",
            "Kidney cancer (RCC)",
            "Bladder cancer (BC)",
            "Thyroid cancer (TC)",
            "Esophageal cancer (EC)",
            "Stomach cancer (GC)",
            "Cervical cancer (CC)",
            "Testicular cancer (TC)",
            "Head and neck cancer (HNSCC)",
            "Multiple myeloma (MM)",
            "Sarcoma (STS)",
            "Mesothelioma (MPM)",
            "Neuroendocrine tumors (NET)",
            "Skin cancer (NMSC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Breast cancer",
                "Colorectal Cancer",
                "Prostate cancer",
                "Melanoma",
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Chronic myelogenous leukemia",
                "Lymphoma",
                "Pancreatic Cancer",
                "Ovarian cancer",
                "Kidney cancer",
                "Bladder cancer",
                "Thyroid cancer",
                "Esophageal cancer",
                "Gastric (stomach) cancer",
                "Cervical cancer",
                "Testicular cancer",
                "Head and neck cancer",
                "Multiple myeloma/plasma cell neoplasm",
                "Mesothelioma",
                "Non-small cell lung cancer"
            ],
            "mismatches": [
                "Lung cancer (NSCLC)",
                "Liver cancer (HCC)",
                "Brain cancer (GBM)",
                "Sarcoma (STS)",
                "Neuroendocrine tumors (NET)",
                "Skin cancer (NMSC)"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Carcinoid",
                "Carcinoid tumor, gastrointestinal",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Ewing's sarcoma",
                "Gastric (stomach) cancer",
                "Gastrointestinal carcinoid tumor",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Leydig cell tumour",
                "Lymphoma",
                "Melanoma",
                "Merkel cell carcinoma",
                "Mesothelioma",
                "Mucinous carcinoma of the breast",
                "Multiple myeloma/plasma cell neoplasm",
                "Myxosarcoma",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Renal pelvis and ureter, transitional cell cancer",
                "Rhabdomyosarcoma",
                "Sertoli cell tumour",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uveal melanoma",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 21,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Acute Lymphoblastic Leukemia (ALL)",
            "Acute Myeloid Leukemia (AML)",
            "Bladder Cancer",
            "Breast Cancer",
            "Cervical Cancer",
            "Chronic Lymphocytic Leukemia (CLL)",
            "Chronic Myeloid Leukemia (CML)",
            "Colon Cancer",
            "Endometrial Cancer",
            "Esophageal Cancer",
            "Glioblastoma",
            "Head and Neck Cancer",
            "Kidney Cancer",
            "Liver Cancer",
            "Lung Cancer",
            "Lymphoma",
            "Melanoma",
            "Multiple Myeloma",
            "Non-Hodgkin Lymphoma",
            "Ovarian Cancer",
            "Pancreatic Cancer",
            "Prostate Cancer",
            "Sarcoma",
            "Thyroid Cancer"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Chronic lymphocytic leukemia",
                "Colon cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Glioblastoma",
                "Head and neck cancer",
                "Kidney cancer",
                "Lymphoma",
                "Melanoma",
                "Non-Hodgkin lymphoma",
                "Ovarian cancer",
                "Pancreatic Cancer",
                "Prostate cancer",
                "Thyroid cancer"
            ],
            "mismatches": [
                "Liver Cancer",
                "Lung Cancer",
                "Multiple Myeloma",
                "Sarcoma"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Endometrial cancer",
                "Esophageal cancer",
                "Fibrosarcoma",
                "Glioblastoma",
                "Glioma",
                "Head and neck cancer",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myelodysplastic syndromes",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Oral cancer",
                "Oropharyngeal cancer",
                "Osteosarcoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pancreatic cancer, islet cell",
                "Penile cancer",
                "Pharyngeal cancer",
                "Precursor B lymphoblastic leukemia",
                "Prostate cancer",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Soft Tissue Sarcoma",
                "Squamous cell skin cancer",
                "Testicular cancer",
                "Thyroid cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 19,
            "FP": 4,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": "[\n    \"Cancer (disease)\" (CA),\n    \"Cancer cells\",\n    \"Carcinoma\",\n    \"Lymphoma\",\n    \"Leukemia\",\n    \"Melanoma\",\n    \"Breast cancer\",\n    \"Colorectal cancer\",\n    \"Lung cancer\",\n    \"Pancreatic cancer\",\n    \"Prostate cancer\",\n    \"Brain cancer\",\n    \"Ovarian cancer\",\n    \"Cervical cancer\",\n    \"Esophageal cancer\",\n    \"Stomach cancer\",\n    \"Liver cancer\",\n    \"Kidney cancer\",\n    \"Bladder cancer\",\n    \"Non-Hodgkin lymphoma\",\n    \"Hodgkin lymphoma\",\n    \"Multiple myeloma\",\n    \"Adenocarcinoma\",\n    \"Squamous cell carcinoma\",\n    \"Basal cell carcinoma\",\n    \"Sarcoma\",\n    \"Glioblastoma\",\n    \"Medulloblastoma\"\n]",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            "Acute Lymphoblastic Leukemia (ALL)",
            "Adenocarcinoma",
            "Basal Cell Carcinoma (BCC)",
            "Bladder Cancer",
            "Brain Tumor",
            "Breast Cancer",
            "Cervical Cancer",
            "Chronic Lymphocytic Leukemia (CLL)",
            "Chronic Myeloid Leukemia (CML)",
            "Colorectal Cancer",
            "Endometrial Cancer",
            "Esophageal Cancer",
            "Gastric Cancer",
            "Glioblastoma",
            "Hodgkin Lymphoma",
            "Kidney Cancer",
            "Leukemia",
            "Liver Cancer",
            "Lung Cancer",
            "Lymphoma",
            "Melanoma",
            "Multiple Myeloma",
            "Non-Hodgkin Lymphoma",
            "Ovarian Cancer",
            "Pancreatic Cancer",
            "Prostate Cancer"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acute Lymphoblastic Leukemia (ALL)",
                "Basal Cell Carcinoma (BCC)",
                "Bladder Cancer",
                "Breast Cancer",
                "Cervical Cancer",
                "Chronic Lymphocytic Leukemia (CLL)",
                "Chronic Myeloid Leukemia (CML)",
                "Colorectal Cancer",
                "Endometrial Cancer",
                "Esophageal Cancer",
                "Gastric Cancer",
                "Glioblastoma",
                "Hodgkin Lymphoma",
                "Kidney Cancer",
                "Melanoma",
                "Multiple Myeloma",
                "Non-Hodgkin Lymphoma",
                "Ovarian Cancer",
                "Pancreatic Cancer"
            ],
            "mismatches": [
                "Adenocarcinoma",
                "Brain Tumor",
                "Leukemia",
                "Liver Cancer",
                "Lung Cancer",
                "Prostate Cancer"
            ],
            "true_referents": [
                "Acute lymphoblastic leukemia",
                "Acute myeloid leukemia",
                "Adenocarcinoma of the lung",
                "Astrocytoma",
                "Basal cell carcinoma",
                "Bladder cancer",
                "Breast cancer",
                "Cervical cancer",
                "Cholangiocarcinoma",
                "Chronic lymphocytic leukemia",
                "Chronic myelogenous leukemia",
                "Colon cancer",
                "Colorectal Cancer",
                "Diffuse large B-cell lymphoma",
                "Duodenal cancer",
                "Endometrial cancer",
                "Esophageal cancer",
                "Follicular lymphoma",
                "Gastric (stomach) cancer",
                "Glioblastoma",
                "Glioma",
                "Hepatocellular cancer",
                "Hodgkin's lymphoma",
                "Inflammatory breast cancer",
                "Invasive ductal carcinoma",
                "Kidney cancer",
                "Large-cell lung carcinoma",
                "Laryngeal cancer",
                "Leiomyosarcoma",
                "Lymphoma",
                "Melanoma",
                "Multiple myeloma/plasma cell neoplasm",
                "Myelodysplastic syndromes",
                "Non-Hodgkin lymphoma",
                "Non-small cell lung cancer",
                "Non-small cell lung carcinoma",
                "Ovarian cancer",
                "Ovarian epithelial cancer (surface epithelial-stromal tumor)",
                "Ovarian germ cell tumor",
                "Pancreatic Cancer",
                "Pharyngeal cancer",
                "Precursor B lymphoblastic leukemia",
                "Rectal cancer",
                "Renal cell carcinoma",
                "Skin adnexal tumors (e.g. sebaceous carcinoma)",
                "Squamous cell skin cancer",
                "Transitional cell cancer (urothelial carcinoma)",
                "Uterine sarcoma",
                "Vaginal cancer",
                "Wilms tumor (nephroblastoma)"
            ],
            "TP": 19,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "cancer",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Abnormal cell growth",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Cancer cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Disease characterized by the growth of abnormal cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Disease characterized by the growth of abnormal cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Abnormal growth of cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Disease characterized by the growth of abnormal cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Abnormal cell growth",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Disease characterized by the growth of abnormal cells",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            },
            {
                "Referent": "Uncontrolled cell proliferation",
                "Canonical Name": "Neoplasm"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [],
            "mismatches": [
                "Neoplasm"
            ],
            "true_referents": [
                "Angiosarcoma",
                "Astrocytoma",
                "Gastrointestinal stromal tumor (GIST)",
                "Glioblastoma",
                "Islet cell carcinoma (endocrine pancreas)",
                "Meningioma",
                "Myxosarcoma",
                "Osteosarcoma"
            ],
            "TP": 0,
            "FP": 1,
            "FN": 0
        }
    }
]