[
    {
        "test": "limited-list-referents",
        "responder": "qwen-v2.5-14b-instruct",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Anxiety Disorder (AD)",
            "Bipolar Disorder (BD)",
            "Depression (MDD)",
            "Schizophrenia (SCZ)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Dissociative Identity Disorder (DID)",
            "Eating Disorders (ED)",
            "Generalized Anxiety Disorder (GAD)",
            "Major Depressive Disorder (MDD)",
            "Personality Disorders (PD)",
            "Borderline Personality Disorder (BPD)",
            "Antisocial Personality Disorder (APD)",
            "Histrionic Personality Disorder (HPD)",
            "Narcissistic Personality Disorder (NPD)",
            "Avoidant Personality Disorder (AvPD)",
            "Dependent Personality Disorder (DPD)",
            "Schizotypal Personality Disorder (STPD)",
            "Obsessive-Compulsive Personality Disorder (OCPD)",
            "Dysthymia (Persistent Depressive Disorder)",
            "Substance Use Disorder (SUD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder",
                "Attention-Deficit/Hyperactivity Disorder",
                "Autism Spectrum Disorder",
                "Dissociative Identity Disorder",
                "Major Depressive Disorder",
                "Personality Disorders",
                "Borderline Personality Disorder",
                "Antisocial Personality Disorder",
                "Histrionic Personality Disorder",
                "Narcissistic Personality Disorder",
                "Dependent Personality Disorder",
                "Schizotypal Personality Disorder"
            ],
            "mismatches": [
                "Anxiety Disorder (AD)",
                "Depression (MDD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Eating Disorders (ED)",
                "Generalized Anxiety Disorder (GAD)",
                "Avoidant Personality Disorder (AvPD)",
                "Obsessive-Compulsive Personality Disorder (OCPD)",
                "Dysthymia (Persistent Depressive Disorder)",
                "Substance Use Disorder (SUD)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dysthymic Disorder",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Narcissistic Personality Disorder",
                "Neurotic Disorders",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Passive-Aggressive Personality Disorder",
                "Personality Disorders",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 14,
            "FP": 9,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "phi-v4",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Bipolar Disorder (BD)",
            "Schizophrenia (SCZ)",
            "Generalized Anxiety Disorder (GAD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Anxiety Disorder (AD)",
            "Panic Disorder (PD)",
            "Social Anxiety Disorder (SAD)",
            "Eating Disorders (ED)",
            "Bulimia Nervosa (BN)",
            "Anorexia Nervosa (AN)",
            "Borderline Personality Disorder (BPD)",
            "Antisocial Personality Disorder (ASPD)",
            "Histrionic Personality Disorder (HPD)",
            "Narcissistic Personality Disorder (NPD)",
            "Substance Use Disorder (SUD)",
            "Alzheimer's Disease (AD)",
            "Parkinson's Disease Dementia (PDD)",
            "Huntington's Disease (HD)",
            "Alzheimer's Disease Dementia (ADD)",
            "Frontotemporal Dementia (FTD)",
            "Delirium (DEL)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Bulimia Nervosa",
                "Anorexia Nervosa",
                "Borderline Personality Disorder",
                "Antisocial Personality Disorder",
                "Histrionic Personality Disorder",
                "Narcissistic Personality Disorder",
                "Autism Spectrum Disorder",
                "Huntington Disease",
                "Frontotemporal Dementia"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Attention-Deficit/Hyperactivity Disorder (ADHD)",
                "Anxiety Disorder (AD)",
                "Social Anxiety Disorder (SAD)",
                "Eating Disorders (ED)",
                "Substance Use Disorder (SUD)",
                "Alzheimer's Disease (AD)",
                "Parkinson's Disease Dementia (PDD)",
                "Alzheimer's Disease Dementia (ADD)",
                "Delirium (DEL)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Alzheimer Disease",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Cognitive Dysfunction",
                "Compulsive Personality Disorder",
                "Dementia",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Frontotemporal Dementia",
                "Frontotemporal Lobar Degeneration",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Huntington Disease",
                "Lewy Body Disease",
                "Narcissistic Personality Disorder",
                "Neurocognitive Disorders",
                "Neurotic Disorders",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 13,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "mistral-small-instruct-24B",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Bipolar Disorder",
            "Schizophrenia",
            "Anxiety Disorder",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Attention Deficit Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Eating Disorders (e.g., Anorexia Nervosa, Bulimia Nervosa)",
            "Borderline Personality Disorder (BPD)",
            "Narcissistic Personality Disorder",
            "Panic Disorder",
            "Generalized Anxiety Disorder (GAD)",
            "Social Anxiety Disorder",
            "Body Dysmorphic Disorder",
            "Dissociative Identity Disorder (DID)",
            "Substance Use Disorder",
            "Alcohol Use Disorder",
            "Gambling Disorder",
            "Trichotillomania (Hair-Pulling Disorder)",
            "Hoarding Disorder",
            "Pica",
            "Kleptomania",
            "Intermittent Explosive Disorder"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder (OCD)",
                "Autism Spectrum Disorder (ASD)",
                "Borderline Personality Disorder (BPD)",
                "Narcissistic Personality Disorder",
                "Panic Disorder",
                "Body Dysmorphic Disorder",
                "Dissociative Identity Disorder (DID)",
                "Trichotillomania (Hair-Pulling Disorder)",
                "Hoarding Disorder",
                "Pica"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Anxiety Disorder",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Attention Deficit Hyperactivity Disorder (ADHD)",
                "Eating Disorders (e.g., Anorexia Nervosa, Bulimia Nervosa)",
                "Generalized Anxiety Disorder (GAD)",
                "Social Anxiety Disorder",
                "Substance Use Disorder",
                "Alcohol Use Disorder",
                "Gambling Disorder",
                "Kleptomania",
                "Intermittent Explosive Disorder"
            ],
            "true_referents": [
                "Agoraphobia",
                "Alcohol-Related Disorders",
                "Alcoholism",
                "Anorexia Nervosa",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge Drinking",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Dysmorphic Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Combat Disorders",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Cyclothymic Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Gambling",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Pica",
                "Primary Progressive Nonfluent Aphasia",
                "Psychotic Disorders",
                "Schizophrenia",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance-Related Disorders",
                "Tobacco Use Disorder",
                "Trauma and Stressor Related Disorders",
                "Trichotillomania"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gemma-3",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Bipolar I Disorder (BD-I)",
            "Generalized Anxiety Disorder (GAD)",
            "Panic Disorder (PD)",
            "Social Anxiety Disorder (SAD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Posttraumatic Stress Disorder (PTSD)",
            "Schizophrenia (SZ)",
            "Schizoaffective Disorder (SAD)",
            "Borderline Personality Disorder (BPD)",
            "Antisocial Personality Disorder (ASPD)",
            "Narcissistic Personality Disorder (NPD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Anorexia Nervosa (AN)",
            "Bulimia Nervosa (BN)",
            "Binge-Eating Disorder (BED)",
            "Body Dysmorphic Disorder (BDD)",
            "Hoarding Disorder (HD)",
            "Somatic Symptom Disorder (SSD)",
            "Illusionary Parasitosis (IP)",
            "Capgras Delusion (CD)",
            "Dissociative Identity Disorder (DID)",
            "Factitious Disorder (FD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Major Depressive Disorder (MDD)",
                "Panic Disorder (PD)",
                "Obsessive-Compulsive Disorder (OCD)",
                "Schizophrenia (SZ)",
                "Borderline Personality Disorder (BPD)",
                "Antisocial Personality Disorder (ASPD)",
                "Narcissistic Personality Disorder (NPD)",
                "Autism Spectrum Disorder (ASD)",
                "Anorexia Nervosa (AN)",
                "Bulimia Nervosa (BN)",
                "Binge-Eating Disorder (BED)",
                "Hoarding Disorder (HD)",
                "Dissociative Identity Disorder (DID)",
                "Factitious Disorder (FD)"
            ],
            "mismatches": [
                "Bipolar I Disorder (BD-I)",
                "Generalized Anxiety Disorder (GAD)",
                "Social Anxiety Disorder (SAD)",
                "Posttraumatic Stress Disorder (PTSD)",
                "Schizoaffective Disorder (SAD)",
                "Attention-Deficit/Hyperactivity Disorder (ADHD)",
                "Body Dysmorphic Disorder (BDD)",
                "Somatic Symptom Disorder (SSD)",
                "Illusionary Parasitosis (IP)",
                "Capgras Delusion (CD)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Dysmorphic Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Capgras Syndrome",
                "Compulsive Personality Disorder",
                "Delirium",
                "Delusional Parasitosis",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Factitious Disorders",
                "Hoarding Disorder",
                "Mood Disorders",
                "Munchausen Syndrome",
                "Munchausen Syndrome by Proxy",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Psychological Trauma",
                "Restless Legs Syndrome",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Sleep Paralysis",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Generalized Anxiety Disorder (GAD)",
            "Bipolar Disorder",
            "Schizophrenia",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Panic Disorder",
            "Social Anxiety Disorder",
            "Borderline Personality Disorder (BPD)",
            "Antisocial Personality Disorder",
            "Eating Disorders",
            "Specific Phobias",
            "Dissociative Identity Disorder (DID)",
            "Alzheimer's Disease",
            "Parkinson's Disease",
            "Huntington's Disease",
            "Tourette Syndrome",
            "Sleep Disorders",
            "Somatic Symptom Disorder",
            "Conversion Disorder",
            "Substance Use Disorder",
            "Conduct Disorder"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Borderline Personality Disorder",
                "Antisocial Personality Disorder",
                "Dissociative Identity Disorder",
                "Huntington's Disease",
                "Tourette Syndrome",
                "Conversion Disorder",
                "Conduct Disorder"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Attention-Deficit/Hyperactivity Disorder (ADHD)",
                "Autism Spectrum Disorder (ASD)",
                "Social Anxiety Disorder",
                "Eating Disorders",
                "Specific Phobias",
                "Alzheimer's Disease",
                "Parkinson's Disease",
                "Sleep Disorders",
                "Somatic Symptom Disorder",
                "Substance Use Disorder"
            ],
            "true_referents": [
                "Agoraphobia",
                "Alcohol-Related Disorders",
                "Alzheimer Disease",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Conversion Disorder",
                "Cyclothymic Disorder",
                "Dementia",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Factitious Disorders",
                "Hoarding Disorder",
                "Huntington Disease",
                "Hypochondriasis",
                "Lewy Body Disease",
                "Motor Disorders",
                "Munchausen Syndrome",
                "Narcissistic Personality Disorder",
                "Neurocognitive Disorders",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Psychotic Disorders",
                "Schizophrenia",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Sleep Disorders, Circadian Rhythm",
                "Sleep Disorders, Intrinsic",
                "Sleep Wake Disorders",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance-Related Disorders",
                "Tic Disorders",
                "Tobacco Use Disorder",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4o-mini",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Generalized Anxiety Disorder (GAD)",
            "Bipolar Disorder (BD)",
            "Schizophrenia (SZ)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Social Anxiety Disorder (SAD)",
            "Borderline Personality Disorder (BPD)",
            "Narcissistic Personality Disorder (NPD)",
            "Panic Disorder (PD)",
            "Eating Disorder (ED)",
            "Dissociative Identity Disorder (DID)",
            "Seasonal Affective Disorder (SAD)",
            "Specific Phobia (SP)",
            "Agoraphobia (AG)",
            "Substance Use Disorder (SUD)",
            "Conduct Disorder (CD)",
            "Oppositional Defiant Disorder (ODD)",
            "Psychotic Disorder (PD)",
            "Mental Retardation (Intellectual Disability) (ID)",
            "Schizoaffective Disorder (SAD)",
            "Somatic Symptom Disorder (SSD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder",
                "Attention-Deficit/Hyperactivity Disorder",
                "Autism Spectrum Disorder",
                "Borderline Personality Disorder",
                "Narcissistic Personality Disorder",
                "Panic Disorder",
                "Dissociative Identity Disorder",
                "Seasonal Affective Disorder",
                "Agoraphobia",
                "Conduct Disorder",
                "Oppositional Defiant Disorder",
                "Intellectual Disability"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Social Anxiety Disorder (SAD)",
                "Eating Disorder (ED)",
                "Specific Phobia (SP)",
                "Substance Use Disorder (SUD)",
                "Psychotic Disorder (PD)",
                "Schizoaffective Disorder (SAD)",
                "Somatic Symptom Disorder (SSD)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Developmental Disabilities",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Hoarding Disorder",
                "Intellectual Disability",
                "Learning Disabilities",
                "Mood Disorders",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Oppositional Defiant Disorder",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Psychotic Disorders",
                "Restless Legs Syndrome",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Seasonal Affective Disorder",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-4-32k",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Bipolar Disorder (BD)",
            "Schizophrenia (SCZ)",
            "Generalized Anxiety Disorder (GAD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Panic Disorder (PD)",
            "Social Anxiety Disorder (SAD)",
            "Attention Deficit Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Borderline Personality Disorder (BPD)",
            "Antisocial Personality Disorder (APD)",
            "Narcissistic Personality Disorder (NPD)",
            "Eating Disorders (ED)",
            "Substance Use Disorder (SUD)",
            "Dissociative Identity Disorder (DID)",
            "Tourette's Syndrome (TS)",
            "Oppositional Defiant Disorder (ODD)",
            "Conduct Disorder (CD)",
            "Insomnia Disorder (ID)",
            "Paranoid Personality Disorder (PPD)",
            "Schizotypal Personality Disorder (STPD)",
            "Histrionic Personality Disorder (HPD)",
            "Avoidant Personality Disorder (APD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Autism Spectrum Disorder",
                "Borderline Personality Disorder",
                "Antisocial Personality Disorder",
                "Narcissistic Personality Disorder",
                "Substance-Related Disorders",
                "Dissociative Identity Disorder",
                "Tourette Syndrome",
                "Oppositional Defiant Disorder",
                "Conduct Disorder",
                "Paranoid Personality Disorder",
                "Schizotypal Personality Disorder",
                "Histrionic Personality Disorder"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Social Anxiety Disorder (SAD)",
                "Attention Deficit Hyperactivity Disorder (ADHD)",
                "Eating Disorders (ED)",
                "Insomnia Disorder (ID)",
                "Avoidant Personality Disorder (APD)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dyssomnias",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Idiopathic Hypersomnia",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Oppositional Defiant Disorder",
                "Panic Disorder",
                "Paranoid Disorders",
                "Paranoid Personality Disorder",
                "Passive-Aggressive Personality Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Shared Paranoid Disorder",
                "Sleep Initiation and Maintenance Disorders",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "gpt-35-turbo-16k",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major depressive disorder (MDD)",
            "Generalized anxiety disorder (GAD)",
            "Bipolar disorder",
            "Schizophrenia",
            "Obsessive-compulsive disorder (OCD)",
            "Post-traumatic stress disorder (PTSD)",
            "Attention deficit hyperactivity disorder (ADHD)",
            "Autism spectrum disorder (ASD)",
            "Borderline personality disorder (BPD)",
            "Eating disorders",
            "Panic disorder",
            "Social anxiety disorder",
            "Alzheimer's disease",
            "Parkinson's disease",
            "Huntington's disease",
            "Multiple sclerosis",
            "Amyotrophic lateral sclerosis (ALS)",
            "Tourette syndrome",
            "Insomnia",
            "Narcolepsy",
            "Substance use disorder",
            "Dissociative identity disorder (DID)",
            "Schizoaffective disorder",
            "Anorexia nervosa"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Major depressive disorder (MDD)",
                "Bipolar disorder",
                "Schizophrenia",
                "Obsessive-compulsive disorder (OCD)",
                "Post-traumatic stress disorder (PTSD)",
                "Attention deficit hyperactivity disorder (ADHD)",
                "Autism spectrum disorder (ASD)",
                "Borderline personality disorder (BPD)",
                "Panic disorder",
                "Alzheimer's disease",
                "Huntington's disease",
                "Tourette syndrome",
                "Narcolepsy",
                "Substance use disorder",
                "Dissociative identity disorder (DID)",
                "Anorexia nervosa"
            ],
            "mismatches": [
                "Generalized anxiety disorder (GAD)",
                "Eating disorders",
                "Social anxiety disorder",
                "Parkinson's disease",
                "Multiple sclerosis",
                "Amyotrophic lateral sclerosis (ALS)",
                "Insomnia",
                "Schizoaffective disorder"
            ],
            "true_referents": [
                "Agoraphobia",
                "Alcohol-Related Disorders",
                "Alzheimer Disease",
                "Anorexia Nervosa",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Compulsive Personality Disorder",
                "Cyclothymic Disorder",
                "Dementia",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dyssomnias",
                "Hoarding Disorder",
                "Huntington Disease",
                "Lewy Body Disease",
                "Motor Disorders",
                "Narcissistic Personality Disorder",
                "Narcolepsy",
                "Neurocognitive Disorders",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Psychotic Disorders",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Sleep Deprivation",
                "Sleep Wake Disorders",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance-Related Disorders",
                "Tic Disorders",
                "Tobacco Use Disorder",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "o1-mini",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Schizophrenia (SZ)",
            "Bipolar Disorder (BD)",
            "Major Depressive Disorder (MDD)",
            "Generalized Anxiety Disorder (GAD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Borderline Personality Disorder (BPD)",
            "Panic Disorder",
            "Social Anxiety Disorder",
            "Eating Disorder Not Otherwise Specified (EDNOS)",
            "Anorexia Nervosa",
            "Bulimia Nervosa",
            "Cyclothymic Disorder",
            "Dysthymia",
            "Somatic Symptom Disorder",
            "Histrionic Personality Disorder",
            "Narcissistic Personality Disorder",
            "Avoidant Personality Disorder",
            "Dependent Personality Disorder",
            "Paranoid Personality Disorder",
            "Schizotypal Personality Disorder",
            "Adjustment Disorder"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Schizophrenia",
                "Bipolar Disorder",
                "Depressive Disorder, Major",
                "Attention Deficit Disorder with Hyperactivity",
                "Autism Spectrum Disorder",
                "Obsessive-Compulsive Disorder",
                "Stress Disorders, Post-Traumatic",
                "Borderline Personality Disorder",
                "Panic Disorder",
                "Anorexia Nervosa",
                "Bulimia Nervosa",
                "Cyclothymic Disorder",
                "Histrionic Personality Disorder",
                "Narcissistic Personality Disorder",
                "Dependent Personality Disorder",
                "Paranoid Personality Disorder",
                "Schizotypal Personality Disorder",
                "Adjustment Disorders"
            ],
            "mismatches": [
                "Generalized Anxiety Disorder",
                "Social Anxiety Disorder",
                "Eating Disorder Not Otherwise Specified",
                "Dysthymia",
                "Somatic Symptom Disorder",
                "Avoidant Personality Disorder"
            ],
            "true_referents": [
                "Adjustment Disorders",
                "Agoraphobia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Compulsive Personality Disorder",
                "Cyclothymic Disorder",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dysthymic Disorder",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Hypochondriasis",
                "Narcissistic Personality Disorder",
                "Neurotic Disorders",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Paranoid Disorders",
                "Paranoid Personality Disorder",
                "Passive-Aggressive Personality Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Sadism",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Shared Paranoid Disorder",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 18,
            "FP": 6,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-5-sonnet-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Generalized Anxiety Disorder (GAD)",
            "Bipolar Disorder",
            "Schizophrenia",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Eating Disorders",
            "Borderline Personality Disorder (BPD)",
            "Panic Disorder",
            "Social Anxiety Disorder",
            "Specific Phobias",
            "Dissociative Identity Disorder (DID)",
            "Substance Use Disorders",
            "Antisocial Personality Disorder",
            "Narcissistic Personality Disorder",
            "Insomnia Disorder",
            "Anorexia Nervosa",
            "Bulimia Nervosa",
            "Tourette Syndrome",
            "Body Dysmorphic Disorder (BDD)",
            "Agoraphobia",
            "Kleptomania"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder (OCD)",
                "Attention-Deficit/Hyperactivity Disorder (ADHD)",
                "Autism Spectrum Disorder (ASD)",
                "Borderline Personality Disorder (BPD)",
                "Panic Disorder",
                "Dissociative Identity Disorder (DID)",
                "Antisocial Personality Disorder",
                "Narcissistic Personality Disorder",
                "Anorexia Nervosa",
                "Bulimia Nervosa",
                "Tourette Syndrome",
                "Agoraphobia"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Eating Disorders",
                "Social Anxiety Disorder",
                "Specific Phobias",
                "Substance Use Disorders",
                "Insomnia Disorder",
                "Body Dysmorphic Disorder (BDD)",
                "Kleptomania"
            ],
            "true_referents": [
                "Agoraphobia",
                "Alcohol-Related Disorders",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Dysmorphic Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Cyclothymic Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dyssomnias",
                "Histrionic Personality Disorder",
                "Hoarding Disorder",
                "Idiopathic Hypersomnia",
                "Narcissistic Personality Disorder",
                "Narcolepsy",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Psychotic Disorders",
                "Schizophrenia",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance-Related Disorders",
                "Tic Disorders",
                "Tobacco Use Disorder",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders",
                "Trichotillomania"
            ],
            "TP": 14,
            "FP": 10,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-opus-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Major Depressive Disorder (MDD)",
            "Generalized Anxiety Disorder (GAD)",
            "Bipolar Disorder (BD)",
            "Schizophrenia (SCZ)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Borderline Personality Disorder (BPD)",
            "Eating Disorders (ED)",
            "Substance Use Disorders (SUD)",
            "Panic Disorder (PD)",
            "Social Anxiety Disorder (SAD)",
            "Specific Phobias",
            "Agoraphobia",
            "Persistent Depressive Disorder (PDD)",
            "Premenstrual Dysphoric Disorder (PMDD)",
            "Disruptive Mood Dysregulation Disorder (DMDD)",
            "Conduct Disorder (CD)",
            "Oppositional Defiant Disorder (ODD)",
            "Tourette Syndrome (TS)",
            "Delusional Disorder",
            "Brief Psychotic Disorder",
            "Schizoaffective Disorder"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Bipolar Disorder (BD)",
                "Obsessive-Compulsive Disorder (OCD)",
                "Attention-Deficit/Hyperactivity Disorder (ADHD)",
                "Autism Spectrum Disorder (ASD)",
                "Borderline Personality Disorder (BPD)",
                "Panic Disorder (PD)",
                "Agoraphobia",
                "Premenstrual Dysphoric Disorder (PMDD)",
                "Conduct Disorder (CD)",
                "Oppositional Defiant Disorder (ODD)",
                "Tourette Syndrome (TS)"
            ],
            "mismatches": [
                "Major Depressive Disorder (MDD)",
                "Generalized Anxiety Disorder (GAD)",
                "Schizophrenia (SCZ)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Eating Disorders (ED)",
                "Substance Use Disorders (SUD)",
                "Social Anxiety Disorder (SAD)",
                "Specific Phobias",
                "Persistent Depressive Disorder (PDD)",
                "Disruptive Mood Dysregulation Disorder (DMDD)",
                "Delusional Disorder",
                "Brief Psychotic Disorder",
                "Schizoaffective Disorder"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dysthymic Disorder",
                "Factitious Disorders",
                "Hoarding Disorder",
                "Mood Disorders",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Oppositional Defiant Disorder",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Premenstrual Dysphoric Disorder",
                "Psychotic Disorders",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Social Communication Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Tic Disorders",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 11,
            "FP": 13,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-sonnet-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Depression (MDD)",
            "Anxiety Disorders",
            "Bipolar Disorder",
            "Schizophrenia",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Attention Deficit Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Eating Disorders (e.g., Anorexia Nervosa, Bulimia Nervosa)",
            "Substance Use Disorders",
            "Personality Disorders",
            "Dissociative Disorders",
            "Somatic Symptom Disorders",
            "Sleep-Wake Disorders",
            "Neurocognitive Disorders",
            "Trauma and Stressor-Related Disorders",
            "Disruptive, Impulse-Control, and Conduct Disorders",
            "Feeding and Eating Disorders of Infancy or Early Childhood",
            "Elimination Disorders",
            "Sexual Dysfunctions",
            "Gender Dysphoria",
            "Paraphilic Disorders",
            "Factitious Disorders",
            "Other Mental Disorders"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Anxiety Disorders",
                "Bipolar Disorder",
                "Schizophrenia",
                "Obsessive-Compulsive Disorder (OCD)",
                "Autism Spectrum Disorder (ASD)",
                "Personality Disorders",
                "Dissociative Disorders",
                "Neurocognitive Disorders",
                "Disruptive, Impulse-Control, and Conduct Disorders",
                "Elimination Disorders",
                "Gender Dysphoria",
                "Paraphilic Disorders",
                "Factitious Disorders"
            ],
            "mismatches": [
                "Depression (MDD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Attention Deficit Hyperactivity Disorder (ADHD)",
                "Eating Disorders (e.g., Anorexia Nervosa, Bulimia Nervosa)",
                "Substance Use Disorders",
                "Somatic Symptom Disorders",
                "Sleep-Wake Disorders",
                "Trauma and Stressor-Related Disorders",
                "Feeding and Eating Disorders of Infancy or Early Childhood",
                "Sexual Dysfunctions",
                "Other Mental Disorders"
            ],
            "true_referents": [
                "Alcohol-Related Disorders",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cognition Disorders",
                "Cognitive Dysfunction",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Cyclothymic Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disorders of Excessive Somnolence",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Elimination Disorders",
                "Enuresis",
                "Erectile Dysfunction",
                "Factitious Disorders",
                "Feeding and Eating Disorders",
                "Feeding and Eating Disorders of Childhood",
                "Gender Dysphoria",
                "Hoarding Disorder",
                "Munchausen Syndrome",
                "Munchausen Syndrome by Proxy",
                "Neurocognitive Disorders",
                "Nocturnal Enuresis",
                "Obsessive-Compulsive Disorder",
                "Panic Disorder",
                "Paraphilic Disorders",
                "Pedophilia",
                "Personality Disorders",
                "Phobic Disorders",
                "Psychotic Disorders",
                "Schizophrenia",
                "Schizophrenia, Paranoid",
                "Sexual Dysfunctions, Psychological",
                "Sexual and Gender Disorders",
                "Sleep Disorders, Circadian Rhythm",
                "Sleep Wake Disorders",
                "Sleep-Wake Transition Disorders",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance-Related Disorders",
                "Tobacco Use Disorder",
                "Transvestism",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 13,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "anthropic.claude-3-haiku-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Schizophrenia (SCZ)",
            "Major Depressive Disorder (MDD)",
            "Bipolar Disorder (BD)",
            "Generalized Anxiety Disorder (GAD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Attention-Deficit/Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Borderline Personality Disorder (BPD)",
            "Panic Disorder (PD)",
            "Social Anxiety Disorder (SAD)",
            "Substance Use Disorder (SUD)",
            "Anorexia Nervosa (AN)",
            "Bulimia Nervosa (BN)",
            "Body Dysmorphic Disorder (BDD)",
            "Hoarding Disorder (HD)",
            "Persistent Depressive Disorder (PDD)",
            "Disruptive Mood Dysregulation Disorder (DMDD)",
            "Avoidant Personality Disorder (AvPD)",
            "Dependent Personality Disorder (DPD)",
            "Narcissistic Personality Disorder (NPD)",
            "Antisocial Personality Disorder (ASPD)",
            "Dissociative Identity Disorder (DID)",
            "Somatic Symptom Disorder (SSD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Schizophrenia",
                "Major Depressive Disorder",
                "Bipolar Disorder",
                "Obsessive-Compulsive Disorder",
                "Attention-Deficit/Hyperactivity Disorder",
                "Autism Spectrum Disorder",
                "Borderline Personality Disorder",
                "Panic Disorder",
                "Anorexia Nervosa",
                "Bulimia Nervosa",
                "Body Dysmorphic Disorders",
                "Hoarding Disorder",
                "Dependent Personality Disorder",
                "Narcissistic Personality Disorder",
                "Antisocial Personality Disorder",
                "Dissociative Identity Disorder"
            ],
            "mismatches": [
                "Generalized Anxiety Disorder",
                "Post-Traumatic Stress Disorder",
                "Social Anxiety Disorder",
                "Substance Use Disorder",
                "Persistent Depressive Disorder",
                "Disruptive Mood Dysregulation Disorder",
                "Avoidant Personality Disorder",
                "Somatic Symptom Disorder"
            ],
            "true_referents": [
                "Agoraphobia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Dysmorphic Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Hoarding Disorder",
                "Mood Disorders",
                "Narcissistic Personality Disorder",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Passive-Aggressive Personality Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Restless Legs Syndrome",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 16,
            "FP": 8,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-8b-instruct-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Anxiety Disorder (AD)",
            "Attention Deficit Hyperactivity Disorder (ADHD)",
            "Autism Spectrum Disorder (ASD)",
            "Bipolar Disorder (BD)",
            "Borderline Personality Disorder (BPD)",
            "Bulimia Nervosa (BN)",
            "Cocaine-Induced Psychosis (CIP)",
            "Depersonalization Disorder (DPD)",
            "Depression (D)",
            "Dissociative Amnesia (DA)",
            "Dissociative Identity Disorder (DID)",
            "Generalized Anxiety Disorder (GAD)",
            "Hypochondriasis (H)",
            "Major Depressive Disorder (MDD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)",
            "Schizophrenia (SCZ)",
            "Schizotypal Personality Disorder (STPD)",
            "Social Anxiety Disorder (SAD)",
            "Somatization Disorder (SD)",
            "Specific Phobia (SP)",
            "Tourette's Syndrome (TS)",
            "Trichotillomania (TTM)",
            "Unspecified Mental Disorder (UMD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Autism Spectrum Disorder (ASD)",
                "Bipolar Disorder (BD)",
                "Borderline Personality Disorder (BPD)",
                "Bulimia Nervosa (BN)",
                "Dissociative Identity Disorder (DID)",
                "Hypochondriasis (H)",
                "Major Depressive Disorder (MDD)",
                "Obsessive-Compulsive Disorder (OCD)",
                "Post-Traumatic Stress Disorder (PTSD)",
                "Schizotypal Personality Disorder (STPD)",
                "Tourette's Syndrome (TS)",
                "Trichotillomania (TTM)"
            ],
            "mismatches": [
                "Anxiety Disorder (AD)",
                "Attention Deficit Hyperactivity Disorder (ADHD)",
                "Cocaine-Induced Psychosis (CIP)",
                "Depersonalization Disorder (DPD)",
                "Depression (D)",
                "Dissociative Amnesia (DA)",
                "Generalized Anxiety Disorder (GAD)",
                "Social Anxiety Disorder (SAD)",
                "Somatization Disorder (SD)",
                "Specific Phobia (SP)",
                "Unspecified Mental Disorder (UMD)"
            ],
            "true_referents": [
                "Agoraphobia",
                "Amnesia",
                "Amnesia, Anterograde",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cocaine-Related Disorders",
                "Compulsive Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dysthymic Disorder",
                "Hoarding Disorder",
                "Hypochondriasis",
                "Narcissistic Personality Disorder",
                "Neurotic Disorders",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobia, Social",
                "Phobic Disorders",
                "Psychoses, Substance-Induced",
                "Schizoid Personality Disorder",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Paranoid",
                "Schizotypal Personality Disorder",
                "Social Communication Disorder",
                "Somatoform Disorders",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Tourette Syndrome",
                "Trauma and Stressor Related Disorders",
                "Trichotillomania"
            ],
            "TP": 12,
            "FP": 11,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "meta.llama3-70b-instruct-v1:0",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            "Acute Stress Disorder (ASD)",
            "Adjustment Disorder",
            "Agitated Delirium",
            "Alzheimer's Disease (AD)",
            "Anorexia Nervosa",
            "Antisocial Personality Disorder",
            "Anxiety Disorder",
            "Asperger Syndrome",
            "Attention Deficit Hyperactivity Disorder (ADHD)",
            "Autorotism",
            "Bipolar Disorder",
            "Body Dysmorphic Disorder",
            "Borderline Personality Disorder",
            "Bulimia Nervosa",
            "Catatonic Disorder",
            "Chronic Fatigue Syndrome (CFS)",
            "Dementia",
            "Depersonalization Disorder",
            "Dissociative Amnesia",
            "Dissociative Fugue",
            "Dysthymia",
            "Major Depressive Disorder (MDD)",
            "Obsessive-Compulsive Disorder (OCD)",
            "Post-Traumatic Stress Disorder (PTSD)"
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Adjustment Disorder",
                "Alzheimer's Disease (AD)",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Asperger Syndrome",
                "Bipolar Disorder",
                "Body Dysmorphic Disorder",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Dementia",
                "Dysthymia",
                "Obsessive-Compulsive Disorder (OCD)"
            ],
            "mismatches": [
                "Acute Stress Disorder (ASD)",
                "Agitated Delirium",
                "Anxiety Disorder",
                "Attention Deficit Hyperactivity Disorder (ADHD)",
                "Autorotism",
                "Catatonic Disorder",
                "Chronic Fatigue Syndrome (CFS)",
                "Depersonalization Disorder",
                "Dissociative Amnesia",
                "Dissociative Fugue",
                "Major Depressive Disorder (MDD)",
                "Post-Traumatic Stress Disorder (PTSD)"
            ],
            "true_referents": [
                "Adjustment Disorders",
                "Alzheimer Disease",
                "Amnesia",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Asperger Syndrome",
                "Attention Deficit Disorder with Hyperactivity",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Binge-Eating Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Dysmorphic Disorders",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Cognitive Dysfunction",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Cyclothymic Disorder",
                "Delirium",
                "Dementia",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Dysthymic Disorder",
                "Emergence Delirium",
                "Hoarding Disorder",
                "Hysteria",
                "Narcissistic Personality Disorder",
                "Narcolepsy",
                "Neurocognitive Disorders",
                "Neurotic Disorders",
                "Obsessive-Compulsive Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobic Disorders",
                "Restless Legs Syndrome",
                "Sadism",
                "Schizophrenia",
                "Schizophrenia, Catatonic",
                "Seasonal Affective Disorder",
                "Stress Disorders, Post-Traumatic",
                "Stress Disorders, Traumatic",
                "Stress Disorders, Traumatic, Acute",
                "Transvestism",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 12,
            "FP": 12,
            "FN": 0
        }
    },
    {
        "test": "limited-list-referents",
        "responder": "amazon.titan-text-express-v1",
        "concept": "mental disorder",
        "domain": "medicine",
        "response": [
            {
                "Referent": "Alzheimer's disease",
                "Canonical name": "Alzheimer's disease"
            },
            {
                "Referent": "Anxiety disorder",
                "Canonical name": "Anxiety disorder"
            },
            {
                "Referent": "Bipolar disorder",
                "Canonical name": "Bipolar disorder"
            },
            {
                "Referent": "Borderline personality disorder",
                "Canonical name": "Borderline personality disorder"
            },
            {
                "Referent": "Depression",
                "Canonical name": "Major depressive disorder"
            },
            {
                "Referent": "Eating disorder",
                "Canonical name": "Anorexia nervosa"
            },
            {
                "Referent": "Schizophrenia",
                "Canonical name": "Schizophrenia"
            },
            {
                "Referent": "Post-traumatic stress disorder",
                "Canonical name": "Post-traumatic stress disorder"
            },
            {
                "Referent": "Schizoaffective disorder",
                "Canonical name": "Schizoaffective disorder"
            },
            {
                "Referent": "Substance use disorder",
                "Canonical name": "Substance use disorder"
            },
            {
                "Referent": "Obsessive-compulsive disorder",
                "Canonical name": "Obsessive-compulsive disorder"
            },
            {
                "Referent": "Antisocial personality disorder",
                "Canonical name": "Antisocial personality disorder"
            },
            {
                "Referent": "Conduct disorder",
                "Canonical name": "Conduct disorder"
            },
            {
                "Referent": "Dissociative identity disorder",
                "Canonical name": "Dissociative identity disorder"
            },
            {
                "Referent": "Dissociative amnesia",
                "Canonical name": "Dissociative amnesia"
            },
            {
                "Referent": "Disruptive behavior and dissocial disorder",
                "Canonical name": "Disruptive behavior and dissocial disorder"
            },
            {
                "Referent": "Neurodevelopmental disorder",
                "Canonical name": "Neurodevelopmental disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            },
            {
                "Referent": "Disruptive mood dysregulation disorder",
                "Canonical name": "Disruptive mood dysregulation disorder"
            }
        ],
        "scorer": "gpt-4o",
        "judgement": {
            "matches": [
                "Alzheimer Disease",
                "Anxiety Disorders",
                "Bipolar Disorder",
                "Borderline Personality Disorder",
                "Anorexia Nervosa",
                "Schizophrenia",
                "Stress Disorders, Post-Traumatic",
                "Obsessive-Compulsive Disorder",
                "Antisocial Personality Disorder",
                "Conduct Disorder",
                "Dissociative Identity Disorder",
                "Neurodevelopmental Disorders"
            ],
            "mismatches": [
                "Schizoaffective disorder",
                "Substance use disorder",
                "Dissociative amnesia",
                "Disruptive behavior and dissocial disorder",
                "Disruptive mood dysregulation disorder"
            ],
            "true_referents": [
                "Alzheimer Disease",
                "Amnesia",
                "Amnesia, Anterograde",
                "Anorexia Nervosa",
                "Antisocial Personality Disorder",
                "Anxiety Disorders",
                "Attention Deficit and Disruptive Behavior Disorders",
                "Autism Spectrum Disorder",
                "Autistic Disorder",
                "Bipolar Disorder",
                "Bipolar and Related Disorders",
                "Body Integrity Identity Disorder",
                "Borderline Personality Disorder",
                "Bulimia Nervosa",
                "Compulsive Personality Disorder",
                "Conduct Disorder",
                "Cyclothymic Disorder",
                "Dementia",
                "Dependent Personality Disorder",
                "Depressive Disorder",
                "Depressive Disorder, Major",
                "Depressive Disorder, Treatment-Resistant",
                "Disruptive, Impulse Control, and Conduct Disorders",
                "Dissociative Disorders",
                "Dissociative Identity Disorder",
                "Frontotemporal Dementia",
                "Hoarding Disorder",
                "Neurodevelopmental Disorders",
                "Obsessive-Compulsive Disorder",
                "Oppositional Defiant Disorder",
                "Orthorexia Nervosa",
                "Panic Disorder",
                "Personality Disorders",
                "Phobic Disorders",
                "Psychological Trauma",
                "Schizophrenia",
                "Schizophrenia Spectrum and Other Psychotic Disorders",
                "Schizophrenia, Catatonic",
                "Schizotypal Personality Disorder",
                "Stress Disorders, Post-Traumatic",
                "Substance Withdrawal Syndrome",
                "Substance-Related Disorders",
                "Tobacco Use Disorder",
                "Trauma and Stressor Related Disorders"
            ],
            "TP": 12,
            "FP": 5,
            "FN": 0
        }
    }
]