[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondrion (Mito)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus",
            "Lysosome",
            "Peroxisome",
            "Vacuole",
            "Chloroplast",
            "Ribosome",
            "Centrosome",
            "Cytoskeleton",
            "Cytosol",
            "Plastid",
            "Microtubule Organizing Center (MTOC)",
            "Nucleolus",
            "Chromoplast",
            "Leucoplast",
            "Amyloplast",
            "Elaioplast",
            "Phragmoplast",
            "Dictyosome",
            "Lipid Droplet",
            "Plastoglobule",
            "Cup-Shaped Chloroplast",
            "Thylakoid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondrion (Mito)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus",
                "Lysosome",
                "Peroxisome",
                "Vacuole",
                "Chloroplast",
                "Ribosome",
                "Centrosome",
                "Cytoskeleton",
                "Cytosol",
                "Plastid",
                "Microtubule Organizing Center (MTOC)",
                "Nucleolus",
                "Chromoplast",
                "Leucoplast",
                "Amyloplast",
                "Elaioplast",
                "Phragmoplast",
                "Dictyosome",
                "Lipid Droplet",
                "Plastoglobule",
                "Cup-Shaped Chloroplast"
            ],
            "mismatches": [
                "Thylakoid"
            ],
            "true_referents": [
                "Amyloplast",
                "Centrosome",
                "Chloroplast",
                "Chromoplast",
                "Cup-Shaped Chloroplast",
                "Cytoskeleton",
                "Cytosol",
                "Dictyosome",
                "Elaioplast",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus",
                "Leucoplast",
                "Lipid Droplet",
                "Lysosome",
                "Microtubule Organizing Center (MTOC)",
                "Mitochondrion (Mito)",
                "Nucleolus",
                "Nucleus (N)",
                "Peroxisome",
                "Phragmoplast",
                "Plastid",
                "Plastoglobule",
                "Ribosome",
                "Vacuole"
            ],
            "TP": 24,
            "FP": 1,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondrion (M)",
            "Chloroplast (C)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (L)",
            "Peroxisome (P)",
            "Ribosome (R)",
            "Vacuole (V)",
            "Plasma Membrane (PM)",
            "Centrosome (C)",
            "Cytoskeleton (CS)",
            "Nuclear Pore Complex (NPC)",
            "Smooth Endoplasmic Reticulum (SER)",
            "Rough Endoplasmic Reticulum (RER)",
            "Cristae (C)",
            "Stroma (S)",
            "Thylakoid (T)",
            "Tonoplast (TP)",
            "Peroxisomal Matrix (PM)",
            "Cis-Golgi Network (CGN)",
            "Trans-Golgi Network (TGN)",
            "Proteasome (P)",
            "Autophagosome (A)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondrion (M)",
                "Chloroplast (C)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Peroxisome (P)",
                "Ribosome (R)",
                "Vacuole (V)",
                "Plasma Membrane (PM)",
                "Centrosome (C)",
                "Cytoskeleton (CS)",
                "Nuclear Pore Complex (NPC)",
                "Smooth Endoplasmic Reticulum (SER)",
                "Rough Endoplasmic Reticulum (RER)",
                "Cristae (C)",
                "Stroma (S)",
                "Thylakoid (T)",
                "Tonoplast (TP)",
                "Peroxisomal Matrix (PM)",
                "Cis-Golgi Network (CGN)",
                "Trans-Golgi Network (TGN)",
                "Proteasome (P)",
                "Autophagosome (A)"
            ],
            "mismatches": [],
            "true_referents": [
                "Autophagosome (A)",
                "Centrosome (C)",
                "Chloroplast (C)",
                "Cis-Golgi Network (CGN)",
                "Cristae (C)",
                "Cytoskeleton (CS)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Mitochondrion (M)",
                "Nuclear Pore Complex (NPC)",
                "Nucleus (N)",
                "Peroxisomal Matrix (PM)",
                "Peroxisome (P)",
                "Plasma Membrane (PM)",
                "Proteasome (P)",
                "Ribosome (R)",
                "Rough Endoplasmic Reticulum (RER)",
                "Smooth Endoplasmic Reticulum (SER)",
                "Stroma (S)",
                "Thylakoid (T)",
                "Tonoplast (TP)",
                "Trans-Golgi Network (TGN)",
                "Vacuole (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (Nu)",
            "Mitochondrion (Mt)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Ribosome (R)",
            "Lysosome (Ly)",
            "Peroxisome (P)",
            "Vacuole (V)",
            "Chloroplast (Cp)",
            "Endosome (E)",
            "Centrosome",
            "Cytoskeleton",
            "Microtubule",
            "Microfilament",
            "Intermediate Filament",
            "Nucleolus (Nuol)",
            "Plastid",
            "Cytosol",
            "Cytoplasm",
            "Cell Wall",
            "Plasma Membrane",
            "Microbody",
            "Cytoplasmic Vesicle",
            "Glyoxysome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (Nu)",
                "Mitochondrion (Mt)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Ribosome (R)",
                "Lysosome (Ly)",
                "Peroxisome (P)",
                "Vacuole (V)",
                "Chloroplast (Cp)",
                "Endosome (E)",
                "Centrosome",
                "Cytoskeleton",
                "Microtubule",
                "Microfilament",
                "Intermediate Filament",
                "Nucleolus (Nuol)",
                "Plastid",
                "Cytosol",
                "Cytoplasm",
                "Cell Wall",
                "Plasma Membrane",
                "Microbody",
                "Cytoplasmic Vesicle",
                "Glyoxysome"
            ],
            "mismatches": [],
            "true_referents": [
                "Cell Wall",
                "Centrosome",
                "Chloroplast (Cp)",
                "Cytoplasm",
                "Cytoplasmic Vesicle",
                "Cytoskeleton",
                "Cytosol",
                "Endoplasmic Reticulum (ER)",
                "Endosome (E)",
                "Glyoxysome",
                "Golgi Apparatus (GA)",
                "Intermediate Filament",
                "Lysosome (Ly)",
                "Microbody",
                "Microfilament",
                "Microtubule",
                "Mitochondrion (Mt)",
                "Nucleolus (Nuol)",
                "Nucleus (Nu)",
                "Peroxisome (P)",
                "Plasma Membrane",
                "Plastid",
                "Ribosome (R)",
                "Vacuole (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondrion (MT)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (LY)",
            "Peroxisome (PX)",
            "Ribosome (R)",
            "Vacuole (VC)",
            "Chloroplast (CP)",
            "Cell Wall (CW)",
            "Centriole (CT)",
            "Cilium (CL)",
            "Flagellum (FL)",
            "Cytoskeleton (CS)",
            "Plasma Membrane (PM)",
            "Glyoxysome (GX)",
            "Hydrogenosome (HS)",
            "Melanosome (MS)",
            "Microbody (MB)",
            "Contractile Vacuole (CVC)",
            "Food Vacuole (FV)",
            "Nematocyst (NC)",
            "Extrusome (EX)",
            "Volutin Granule (VG)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondrion (MT)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (LY)",
                "Peroxisome (PX)",
                "Ribosome (R)",
                "Vacuole (VC)",
                "Chloroplast (CP)",
                "Cell Wall (CW)",
                "Centriole (CT)",
                "Cilium (CL)",
                "Flagellum (FL)",
                "Cytoskeleton (CS)",
                "Plasma Membrane (PM)",
                "Glyoxysome (GX)",
                "Hydrogenosome (HS)",
                "Melanosome (MS)",
                "Microbody (MB)",
                "Contractile Vacuole (CVC)",
                "Food Vacuole (FV)",
                "Nematocyst (NC)",
                "Extrusome (EX)",
                "Volutin Granule (VG)"
            ],
            "mismatches": [],
            "true_referents": [
                "Cell Wall (CW)",
                "Centriole (CT)",
                "Chloroplast (CP)",
                "Cilium (CL)",
                "Contractile Vacuole (CVC)",
                "Cytoskeleton (CS)",
                "Endoplasmic Reticulum (ER)",
                "Extrusome (EX)",
                "Flagellum (FL)",
                "Food Vacuole (FV)",
                "Glyoxysome (GX)",
                "Golgi Apparatus (GA)",
                "Hydrogenosome (HS)",
                "Lysosome (LY)",
                "Melanosome (MS)",
                "Microbody (MB)",
                "Mitochondrion (MT)",
                "Nematocyst (NC)",
                "Nucleus (N)",
                "Peroxisome (PX)",
                "Plasma Membrane (PM)",
                "Ribosome (R)",
                "Vacuole (VC)",
                "Volutin Granule (VG)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondrion (M)",
            "Chloroplast (C)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (L)",
            "Peroxisome (P)",
            "Ribosome (R)",
            "Vacuole (V)",
            "Cytoskeleton (CS)",
            "Plasma Membrane (PM)",
            "Cell Wall (CW)",
            "Centriole (Ce)",
            "Flagellum (F)",
            "Cilium (Ci)",
            "Nucleolus (Nu)",
            "Chromoplast (Ch)",
            "Amyloplast (A)",
            "Glyoxysome (G)",
            "Microbody (Mb)",
            "Tonoplast (T)",
            "Basal Body (Bb)",
            "Endosome (E)",
            "Multivesicular Body (MVB)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondrion (M)",
                "Chloroplast (C)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Peroxisome (P)",
                "Ribosome (R)",
                "Vacuole (V)",
                "Cytoskeleton (CS)",
                "Plasma Membrane (PM)",
                "Cell Wall (CW)",
                "Centriole (Ce)",
                "Flagellum (F)",
                "Cilium (Ci)",
                "Nucleolus (Nu)",
                "Chromoplast (Ch)",
                "Amyloplast (A)",
                "Glyoxysome (G)",
                "Microbody (Mb)",
                "Tonoplast (T)",
                "Basal Body (Bb)",
                "Endosome (E)",
                "Multivesicular Body (MVB)"
            ],
            "mismatches": [],
            "true_referents": [
                "Amyloplast (A)",
                "Basal Body (Bb)",
                "Cell Wall (CW)",
                "Centriole (Ce)",
                "Chloroplast (C)",
                "Chromoplast (Ch)",
                "Cilium (Ci)",
                "Cytoskeleton (CS)",
                "Endoplasmic Reticulum (ER)",
                "Endosome (E)",
                "Flagellum (F)",
                "Glyoxysome (G)",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Microbody (Mb)",
                "Mitochondrion (M)",
                "Multivesicular Body (MVB)",
                "Nucleolus (Nu)",
                "Nucleus (N)",
                "Peroxisome (P)",
                "Plasma Membrane (PM)",
                "Ribosome (R)",
                "Tonoplast (T)",
                "Vacuole (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (NUC)",
            "Mitochondrion (MIT)",
            "Chloroplast (CHL)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (LYS)",
            "Peroxisome (PER)",
            "Ribosome (RIB)",
            "Vesicle (VES)",
            "Cytoskeleton (CYT)",
            "Centrosome (CEN)",
            "Vacuole (VAC)",
            "Plasma Membrane (PM)",
            "Nucleolus (NUL)",
            "Chromoplast (CHR)",
            "Leucoplast (LEU)",
            "Flagellum (FLA)",
            "Cilia (CIL)",
            "Basal Body (BB)",
            "Pili (PIL)",
            "Microtubule (MT)",
            "Microfilament (MF)",
            "Intermediate Filament (IF)",
            "Plasmid (PLA)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (NUC)",
                "Mitochondrion (MIT)",
                "Chloroplast (CHL)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (LYS)",
                "Peroxisome (PER)",
                "Ribosome (RIB)",
                "Vesicle (VES)",
                "Cytoskeleton (CYT)",
                "Centrosome (CEN)",
                "Vacuole (VAC)",
                "Plasma Membrane (PM)",
                "Nucleolus (NUL)",
                "Chromoplast (CHR)",
                "Leucoplast (LEU)",
                "Flagellum (FLA)",
                "Cilia (CIL)",
                "Basal Body (BB)",
                "Pili (PIL)",
                "Microtubule (MT)",
                "Microfilament (MF)",
                "Intermediate Filament (IF)",
                "Plasmid (PLA)"
            ],
            "mismatches": [],
            "true_referents": [
                "Basal Body (BB)",
                "Centrosome (CEN)",
                "Chloroplast (CHL)",
                "Chromoplast (CHR)",
                "Cilia (CIL)",
                "Cytoskeleton (CYT)",
                "Endoplasmic Reticulum (ER)",
                "Flagellum (FLA)",
                "Golgi Apparatus (GA)",
                "Intermediate Filament (IF)",
                "Leucoplast (LEU)",
                "Lysosome (LYS)",
                "Microfilament (MF)",
                "Microtubule (MT)",
                "Mitochondrion (MIT)",
                "Nucleolus (NUL)",
                "Nucleus (NUC)",
                "Peroxisome (PER)",
                "Pili (PIL)",
                "Plasma Membrane (PM)",
                "Plasmid (PLA)",
                "Ribosome (RIB)",
                "Vacuole (VAC)",
                "Vesicle (VES)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondria (Mito)",
            "Ribosomes (R)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosomes (L)",
            "Peroxisomes (P)",
            "Vacuoles (V)",
            "Chloroplasts (C)",
            "Cytoskeleton (CSK)",
            "Nucleolus (Nl)",
            "Centrosome (Ce)",
            "Cilia (Ci)",
            "Flagella (F)",
            "Plastids (P)",
            "Chromoplasts (Ch)",
            "Leucoplasts (Le)",
            "Endosomes (E)",
            "Microtubules (Mt)",
            "Microfilaments (Mf)",
            "Intermediate Filaments (IF)",
            "Lipid Droplets (LD)",
            "Proteasomes (Pro)",
            "Nuclear Pore Complex (NPC)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondria (Mito)",
                "Ribosomes (R)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosomes (L)",
                "Peroxisomes (P)",
                "Vacuoles (V)",
                "Chloroplasts (C)",
                "Cytoskeleton (CSK)",
                "Nucleolus (Nl)",
                "Centrosome (Ce)",
                "Cilia (Ci)",
                "Flagella (F)",
                "Plastids (P)",
                "Chromoplasts (Ch)",
                "Leucoplasts (Le)",
                "Endosomes (E)",
                "Microtubules (Mt)",
                "Microfilaments (Mf)",
                "Intermediate Filaments (IF)",
                "Lipid Droplets (LD)",
                "Proteasomes (Pro)",
                "Nuclear Pore Complex (NPC)"
            ],
            "mismatches": [],
            "true_referents": [
                "Centrosome (Ce)",
                "Chloroplasts (C)",
                "Chromoplasts (Ch)",
                "Cilia (Ci)",
                "Cytoskeleton (CSK)",
                "Endoplasmic Reticulum (ER)",
                "Endosomes (E)",
                "Flagella (F)",
                "Golgi Apparatus (GA)",
                "Intermediate Filaments (IF)",
                "Leucoplasts (Le)",
                "Lipid Droplets (LD)",
                "Lysosomes (L)",
                "Microfilaments (Mf)",
                "Microtubules (Mt)",
                "Mitochondria (Mito)",
                "Nuclear Pore Complex (NPC)",
                "Nucleolus (Nl)",
                "Nucleus (N)",
                "Peroxisomes (P)",
                "Plastids (P)",
                "Proteasomes (Pro)",
                "Ribosomes (R)",
                "Vacuoles (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus",
            "Mitochondria",
            "Endoplasmic reticulum",
            "Golgi apparatus",
            "Lysosome",
            "Peroxisome",
            "Ribosome",
            "Chloroplast",
            "Vacuole",
            "Cytoskeleton",
            "Centrosome",
            "Nuclear envelope",
            "Nucleolus",
            "Plasma membrane",
            "Cell wall",
            "Microtubules",
            "Microfilaments",
            "Intermediate filaments",
            "Endosome",
            "Exosome",
            "Glyoxysome",
            "Nucleoid",
            "Plasmid",
            "Dictyosome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus",
                "Mitochondria",
                "Endoplasmic reticulum",
                "Golgi apparatus",
                "Lysosome",
                "Peroxisome",
                "Ribosome",
                "Chloroplast",
                "Vacuole",
                "Cytoskeleton",
                "Centrosome",
                "Nuclear envelope",
                "Nucleolus",
                "Plasma membrane",
                "Cell wall",
                "Microtubules",
                "Microfilaments",
                "Intermediate filaments",
                "Endosome",
                "Exosome",
                "Glyoxysome",
                "Nucleoid",
                "Plasmid",
                "Dictyosome"
            ],
            "mismatches": [],
            "true_referents": [
                "Cell wall",
                "Centrosome",
                "Chloroplast",
                "Cytoskeleton",
                "Dictyosome",
                "Endoplasmic reticulum",
                "Endosome",
                "Exosome",
                "Glyoxysome",
                "Golgi apparatus",
                "Intermediate filaments",
                "Lysosome",
                "Microfilaments",
                "Microtubules",
                "Mitochondria",
                "Nuclear envelope",
                "Nucleoid",
                "Nucleolus",
                "Nucleus",
                "Peroxisome",
                "Plasma membrane",
                "Plasmid",
                "Ribosome",
                "Vacuole"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus",
            "Mitochondrion (MITO)",
            "Ribosome",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (LYS)",
            "Peroxisome",
            "Chloroplast (Chl)",
            "Vacuole",
            "Centrosome",
            "Nucleolus",
            "Vesicle",
            "Proteasome (PR)",
            "Cilium",
            "Flagellum",
            "Microtubule",
            "Intermediate Filament",
            "Microfilament",
            "Lipid Droplet",
            "Melanosome",
            "Glyoxysome",
            "Autophagosome",
            "Exosome",
            "Plasma Membrane (PM)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus",
                "Mitochondrion (MITO)",
                "Ribosome",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (LYS)",
                "Peroxisome",
                "Chloroplast (Chl)",
                "Vacuole",
                "Centrosome",
                "Nucleolus",
                "Vesicle",
                "Proteasome (PR)",
                "Cilium",
                "Flagellum",
                "Microtubule",
                "Intermediate Filament",
                "Microfilament",
                "Lipid Droplet",
                "Melanosome",
                "Glyoxysome",
                "Autophagosome",
                "Exosome",
                "Plasma Membrane (PM)"
            ],
            "mismatches": [],
            "true_referents": [
                "Autophagosome",
                "Centrosome",
                "Chloroplast (Chl)",
                "Cilium",
                "Endoplasmic Reticulum (ER)",
                "Exosome",
                "Flagellum",
                "Glyoxysome",
                "Golgi Apparatus (GA)",
                "Intermediate Filament",
                "Lipid Droplet",
                "Lysosome (LYS)",
                "Melanosome",
                "Microfilament",
                "Microtubule",
                "Mitochondrion (MITO)",
                "Nucleolus",
                "Nucleus",
                "Peroxisome",
                "Plasma Membrane (PM)",
                "Proteasome (PR)",
                "Ribosome",
                "Vacuole",
                "Vesicle"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (NUC)",
            "Mitochondrion (MITO)",
            "Endoplasmic reticulum (ER)",
            "Golgi apparatus (GA)",
            "Lysosome (LYS)",
            "Peroxisome (PEX)",
            "Chloroplast (CHL)",
            "Vacuole (VAC)",
            "Ribosome (RIB)",
            "Centriole (CEN)",
            "Nucleolus (NCL)",
            "Plasma membrane (PM)",
            "Cytoskeleton (CSK)",
            "Endosome (END)",
            "Melanosome (MEL)",
            "Glyoxysome (GLX)",
            "Hydrogenosome (HYD)",
            "Autophagosome (APG)",
            "Amyloplast (AMP)",
            "Chromoplast (CRP)",
            "Elaioplast (ELP)",
            "Proteasome (PSM)",
            "Acrosome (ACR)",
            "Glycosome (GLS)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (NUC)",
                "Mitochondrion (MITO)",
                "Endoplasmic reticulum (ER)",
                "Golgi apparatus (GA)",
                "Lysosome (LYS)",
                "Peroxisome (PEX)",
                "Chloroplast (CHL)",
                "Vacuole (VAC)",
                "Ribosome (RIB)",
                "Centriole (CEN)",
                "Nucleolus (NCL)",
                "Plasma membrane (PM)",
                "Cytoskeleton (CSK)",
                "Endosome (END)",
                "Melanosome (MEL)",
                "Glyoxysome (GLX)",
                "Hydrogenosome (HYD)",
                "Autophagosome (APG)",
                "Amyloplast (AMP)",
                "Chromoplast (CRP)",
                "Elaioplast (ELP)",
                "Proteasome (PSM)",
                "Acrosome (ACR)",
                "Glycosome (GLS)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acrosome (ACR)",
                "Amyloplast (AMP)",
                "Autophagosome (APG)",
                "Centriole (CEN)",
                "Chloroplast (CHL)",
                "Chromoplast (CRP)",
                "Cytoskeleton (CSK)",
                "Elaioplast (ELP)",
                "Endoplasmic reticulum (ER)",
                "Endosome (END)",
                "Glycosome (GLS)",
                "Glyoxysome (GLX)",
                "Golgi apparatus (GA)",
                "Hydrogenosome (HYD)",
                "Lysosome (LYS)",
                "Melanosome (MEL)",
                "Mitochondrion (MITO)",
                "Nucleolus (NCL)",
                "Nucleus (NUC)",
                "Peroxisome (PEX)",
                "Plasma membrane (PM)",
                "Proteasome (PSM)",
                "Ribosome (RIB)",
                "Vacuole (VAC)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus",
            "Endoplasmic Reticulum (ER)",
            "Rough Endoplasmic Reticulum (RER)",
            "Smooth Endoplasmic Reticulum (SER)",
            "Golgi Apparatus (GA)",
            "Mitochondrion",
            "Ribosome",
            "Lysosome",
            "Peroxisome",
            "Centrosome",
            "Centriole",
            "Vacuole",
            "Chloroplast",
            "Thylakoid",
            "Grana",
            "Stroma",
            "Plastid",
            "Leucoplast",
            "Chromoplast",
            "Amyloplast",
            "Nucleolus",
            "Plasma Membrane",
            "Cytoskeleton",
            "Cytosol"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus",
                "Endoplasmic Reticulum (ER)",
                "Rough Endoplasmic Reticulum (RER)",
                "Smooth Endoplasmic Reticulum (SER)",
                "Golgi Apparatus (GA)",
                "Mitochondrion",
                "Ribosome",
                "Lysosome",
                "Peroxisome",
                "Centrosome",
                "Centriole",
                "Vacuole",
                "Chloroplast",
                "Thylakoid",
                "Grana",
                "Stroma",
                "Plastid",
                "Leucoplast",
                "Chromoplast",
                "Amyloplast",
                "Nucleolus",
                "Plasma Membrane",
                "Cytoskeleton",
                "Cytosol"
            ],
            "mismatches": [],
            "true_referents": [
                "Amyloplast",
                "Centriole",
                "Centrosome",
                "Chloroplast",
                "Chromoplast",
                "Cytoskeleton",
                "Cytosol",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Grana",
                "Leucoplast",
                "Lysosome",
                "Mitochondrion",
                "Nucleolus",
                "Nucleus",
                "Peroxisome",
                "Plasma Membrane",
                "Plastid",
                "Ribosome",
                "Rough Endoplasmic Reticulum (RER)",
                "Smooth Endoplasmic Reticulum (SER)",
                "Stroma",
                "Thylakoid",
                "Vacuole"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Nucleus (N)",
            "Mitochondrion (M)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (L)",
            "Ribosome (R)",
            "Peroxisome (P)",
            "Chloroplast (C)",
            "Vacuole (V)",
            "Cytoskeleton",
            "Centriole",
            "Cilium",
            "Flagellum",
            "Cell Membrane",
            "Nuclear Envelope",
            "Rough ER",
            "Smooth ER",
            "Cis Golgi",
            "Medial Golgi",
            "Trans Golgi",
            "Primary Lysosome",
            "Secondary Lysosome",
            "Microbody",
            "Glyoxysome"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Nucleus (N)",
                "Mitochondrion (M)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Ribosome (R)",
                "Peroxisome (P)",
                "Chloroplast (C)",
                "Vacuole (V)",
                "Cytoskeleton",
                "Centriole",
                "Cilium",
                "Flagellum",
                "Cell Membrane",
                "Nuclear Envelope",
                "Rough ER",
                "Smooth ER",
                "Cis Golgi",
                "Medial Golgi",
                "Trans Golgi",
                "Primary Lysosome",
                "Secondary Lysosome",
                "Microbody",
                "Glyoxysome"
            ],
            "mismatches": [],
            "true_referents": [
                "Cell Membrane",
                "Centriole",
                "Chloroplast (C)",
                "Cilium",
                "Cis Golgi",
                "Cytoskeleton",
                "Endoplasmic Reticulum (ER)",
                "Flagellum",
                "Glyoxysome",
                "Golgi Apparatus (GA)",
                "Lysosome (L)",
                "Medial Golgi",
                "Microbody",
                "Mitochondrion (M)",
                "Nuclear Envelope",
                "Nucleus (N)",
                "Peroxisome (P)",
                "Primary Lysosome",
                "Ribosome (R)",
                "Rough ER",
                "Secondary Lysosome",
                "Smooth ER",
                "Trans Golgi",
                "Vacuole (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Mitochondrion (Mt)",
            "Nucleus (N)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (Ly)",
            "Ribosome (Rb)",
            "Peroxisome (Px)",
            "Chloroplast (Cp)",
            "Vacuole (V)",
            "Centrosome (Cn)",
            "Centriole (Ct)",
            "Flagellum (Fl)",
            "Cilium (Ci)",
            "Nucleolus (Nc)",
            "Spliceosome (Sp)",
            "Proteasome (Pt)",
            "Microsome (Ms)",
            "Glyoxysome (Gy)",
            "Acidocalcisome (Ac)",
            "Hydrogenosome (Hg)",
            "Mitosome (Mt)",
            "Carboxysome (Cb)",
            "Magnetosome (Mg)",
            "Chromatophore (Cr)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mitochondrion (Mt)",
                "Nucleus (N)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (Ly)",
                "Ribosome (Rb)",
                "Peroxisome (Px)",
                "Chloroplast (Cp)",
                "Vacuole (V)",
                "Centrosome (Cn)",
                "Centriole (Ct)",
                "Flagellum (Fl)",
                "Cilium (Ci)",
                "Nucleolus (Nc)",
                "Spliceosome (Sp)",
                "Proteasome (Pt)",
                "Microsome (Ms)",
                "Glyoxysome (Gy)",
                "Acidocalcisome (Ac)",
                "Hydrogenosome (Hg)",
                "Mitosome (Mt)",
                "Carboxysome (Cb)",
                "Magnetosome (Mg)",
                "Chromatophore (Cr)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acidocalcisome (Ac)",
                "Carboxysome (Cb)",
                "Centriole (Ct)",
                "Centrosome (Cn)",
                "Chloroplast (Cp)",
                "Chromatophore (Cr)",
                "Cilium (Ci)",
                "Endoplasmic Reticulum (ER)",
                "Flagellum (Fl)",
                "Glyoxysome (Gy)",
                "Golgi Apparatus (GA)",
                "Hydrogenosome (Hg)",
                "Lysosome (Ly)",
                "Magnetosome (Mg)",
                "Microsome (Ms)",
                "Mitochondrion (Mt)",
                "Mitosome (Mt)",
                "Nucleolus (Nc)",
                "Nucleus (N)",
                "Peroxisome (Px)",
                "Proteasome (Pt)",
                "Ribosome (Rb)",
                "Spliceosome (Sp)",
                "Vacuole (V)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Mitochondria (Mit)",
            "Endoplasmic Reticulum (ER)",
            "Golgi Apparatus (GA)",
            "Lysosome (Lyso)",
            "Ribosome (Rib)",
            "Centriole (Cen)",
            "Cytoskeleton",
            "Peroxisome (Pero)",
            "Plastid",
            "Chloroplast",
            "Vacuole",
            "Chromoplast",
            "Amyloplast",
            "Chloroelast",
            "Leucoplast",
            "Chromatophore",
            "Cilium",
            "Flagellum",
            "Microtubule-organizing Center (MTOC)",
            "Centrosome",
            "Glycocalyx",
            "Glycophorin",
            "Microbody",
            "Autophagosome",
            "Lysosomal Body",
            "Chloroplast Stroma",
            "Chloroplast Thylakoid"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Mitochondria (Mit)",
                "Endoplasmic Reticulum (ER)",
                "Golgi Apparatus (GA)",
                "Lysosome (Lyso)",
                "Ribosome (Rib)",
                "Centriole (Cen)",
                "Cytoskeleton",
                "Peroxisome (Pero)",
                "Plastid",
                "Chloroplast",
                "Vacuole",
                "Chromoplast",
                "Amyloplast",
                "Chloroelast",
                "Leucoplast",
                "Chromatophore",
                "Cilium",
                "Flagellum",
                "Microtubule-organizing Center (MTOC)",
                "Centrosome",
                "Glycocalyx",
                "Glycophorin",
                "Microbody",
                "Autophagosome"
            ],
            "mismatches": [
                "Lysosomal Body",
                "Chloroplast Stroma",
                "Chloroplast Thylakoid"
            ],
            "true_referents": [
                "Amyloplast",
                "Autophagosome",
                "Centriole (Cen)",
                "Centrosome",
                "Chloroelast",
                "Chloroplast",
                "Chromatophore",
                "Chromoplast",
                "Cilium",
                "Cytoskeleton",
                "Endoplasmic Reticulum (ER)",
                "Flagellum",
                "Glycocalyx",
                "Glycophorin",
                "Golgi Apparatus (GA)",
                "Leucoplast",
                "Lysosome (Lyso)",
                "Microbody",
                "Microtubule-organizing Center (MTOC)",
                "Mitochondria (Mit)",
                "Peroxisome (Pero)",
                "Plastid",
                "Ribosome (Rib)",
                "Vacuole"
            ],
            "TP": 24,
            "FP": 3,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "organelle",
        "domain": "biology",
        "response": [
            "Acrosome (ACR)",
            "Centriole (CNT)",
            "Centrosome (CS)",
            "Chloroplast (CHL)",
            "Cilium (CIL)",
            "Cytolytic granule (CG)",
            "Endosome (END)",
            "Golgi apparatus (GA)",
            "Lysosome (LYS)",
            "Melanosome (MLS)",
            "Microbody (MB)",
            "Microtubule-organizing center (MTOC)",
            "Mitochondrion (MIT)",
            "Nucleolus (NU)",
            "Nucleus (NUC)",
            "Peroxisome (PO)",
            "Phagosome (PHG)",
            "Plastid (PLT)",
            "Pseudopodium (PSD)",
            "Ribosome (RIB)",
            "Rough endoplasmic reticulum (RER)",
            "Smooth endoplasmic reticulum (SER)",
            "Vacuole (VAC)",
            "Vault (VLT)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Acrosome (ACR)",
                "Centriole (CNT)",
                "Centrosome (CS)",
                "Chloroplast (CHL)",
                "Cilium (CIL)",
                "Cytolytic granule (CG)",
                "Endosome (END)",
                "Golgi apparatus (GA)",
                "Lysosome (LYS)",
                "Melanosome (MLS)",
                "Microbody (MB)",
                "Microtubule-organizing center (MTOC)",
                "Mitochondrion (MIT)",
                "Nucleolus (NU)",
                "Nucleus (NUC)",
                "Peroxisome (PO)",
                "Phagosome (PHG)",
                "Plastid (PLT)",
                "Pseudopodium (PSD)",
                "Ribosome (RIB)",
                "Rough endoplasmic reticulum (RER)",
                "Smooth endoplasmic reticulum (SER)",
                "Vacuole (VAC)",
                "Vault (VLT)"
            ],
            "mismatches": [],
            "true_referents": [
                "Acrosome (ACR)",
                "Centriole (CNT)",
                "Centrosome (CS)",
                "Chloroplast (CHL)",
                "Cilium (CIL)",
                "Cytolytic granule (CG)",
                "Endosome (END)",
                "Golgi apparatus (GA)",
                "Lysosome (LYS)",
                "Melanosome (MLS)",
                "Microbody (MB)",
                "Microtubule-organizing center (MTOC)",
                "Mitochondrion (MIT)",
                "Nucleolus (NU)",
                "Nucleus (NUC)",
                "Peroxisome (PO)",
                "Phagosome (PHG)",
                "Plastid (PLT)",
                "Pseudopodium (PSD)",
                "Ribosome (RIB)",
                "Rough endoplasmic reticulum (RER)",
                "Smooth endoplasmic reticulum (SER)",
                "Vacuole (VAC)",
                "Vault (VLT)"
            ],
            "TP": 24,
            "FP": 0,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "organelle",
        "domain": "biology",
        "response": "error when calling model 'amazon.titan-text-express-v1': empty response",
        "scorer": "gpt-4o",
        "judgement": {
            "TP": 0,
            "FP": 0
        }
    }
]