{
    "o4-mini": {
        "02984008762913113973753bce29760d": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "which patients have prescription for spironolactone medication? (Values: Spironolactone)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Spironolactone\"",
            "pred_sql": "SELECT DISTINCT d.SUBJECT_ID, d.HADM_ID, d.NAME FROM PRESCRIPTIONS p JOIN DEMOGRAPHIC d ON p.SUBJECT_ID = d.SUBJECT_ID AND p.HADM_ID = d.HADM_ID WHERE LOWER(p.DRUG) LIKE '%spironolactone%';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['18480.0', '156404.0', 'Brian Taylor'], ['6983.0', '173511.0', 'Edward Castillo'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman']]",
            "is_correct": false,
            "reasoning": "The predicted SQL returns SUBJECT_ID, HADM_ID, and NAME instead of a single COUNT of distinct SUBJECT_ID as required. It also uses a LIKE filter rather than an exact match and thus completely changes the query\u2019s structure and intended output. Fixing this is not a localized tweak but requires rewriting the SELECT and WHERE clauses and adding the aggregation, so it is a global structural error.",
            "error_type": "Global"
        },
        "060151fe7a66f01cf5811b6efdc73538": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "find the number of patients who were ordered alanine aminotransferase (alt) lab test. (Values: Alanine Aminotransferase (ALT))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"LABEL\" = \"Alanine Aminotransferase (ALT)\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM LAB WHERE LABEL ILIKE '%alanine aminotransferase%';",
            "gold_answer": "[['51.0']]",
            "pred_answer": "near \"ILIKE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the ILIKE operator, which is not supported in SQLite, resulting in a near \"ILIKE\": syntax error. This is a localized syntax issue within the WHERE clause and does not affect the overall query structure or intended relationships between tables.",
            "error_type": "Local: syntax error"
        },
        "94d03397b2b5cc027c508ab22c8e216e": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients born before the year 2104 were given the drug vancomycin? (Values: Vancomycin)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2104\" AND PRESCRIPTIONS.\"DRUG\" = \"Vancomycin\"",
            "pred_sql": "SELECT COUNT(DISTINCT p.SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC d JOIN PRESCRIPTIONS p ON d.SUBJECT_ID = p.SUBJECT_ID AND d.HADM_ID = p.HADM_ID WHERE d.DOB_YEAR < 2104 AND UPPER(p.DRUG) LIKE '%VANCOMYCIN%';",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['22.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake lies in the WHERE clause: it uses UPPER(p.DRUG) LIKE '%VANCOMYCIN%' instead of an exact match p.DRUG = 'Vancomycin'. This causes over\u2010matching (e.g., 'Vancomycin Phosphate') and inflates the count. The overall query structure (joins, aggregation) remains correct. This is a localized condition error that can be fixed by changing the comparison operator, so it\u2019s a Local error.",
            "error_type": "Local: incorrect value condition"
        }
    },
    "dinsql": {
        "9b805afb13a94b6007d685fa0474e38f": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the diagnosis of the diagnosis icd9 code 55221?",
            "gold_sql": "SELECT DIAGNOSES.\"LONG_TITLE\" FROM DIAGNOSES WHERE DIAGNOSES.\"ICD9_CODE\" = \"55221\"",
            "pred_sql": "SELECT SHORT_TITLE, LONG_TITLE FROM DIAGNOSES WHERE ICD9_CODE = '55221'",
            "gold_answer": "[['Incisional ventral hernia with obstruction']]",
            "pred_answer": "[['Obstr incisional hernia', 'Incisional ventral hernia with obstruction']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the SELECT clause: it returns both SHORT_TITLE and LONG_TITLE instead of only LONG_TITLE as required by the question. This is a localized error in the SELECT list and does not affect joins, filters, or overall query structure.",
            "error_type": "Local: select-clause error"
        }
    },
    "codes": {
        "a6ee3501998234967c1043a5887f7a1b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the name and route of administration of drug code asa325 (Values: ASA325)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"DRUG\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"ASA325\"",
            "pred_sql": "SELECT drug, route FROM prescriptions WHERE drug = 'ASA325'",
            "gold_answer": "[['Aspirin', 'NG'], ['Aspirin', 'NG'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on the wrong column (`drug` instead of `FORMULARY_DRUG_CD`), which is a localized column reference error in the WHERE clause. The overall query structure is correct but the wrong field is used for the filter, leading to no results. This does not require changing the join logic or query flow, only fixing the column reference.",
            "error_type": "Local: column-reference error"
        },
        "045c5269cdae83454d68e393e12c8658": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Give me the list of patients born before 2071 who prefer spanish. (Values: SPAN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"LANGUAGE\" = \"SPAN\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2071\"",
            "pred_sql": "SELECT demographic.name FROM demographic INNER JOIN demographic ON demographic.subject_id = demographic.subject_id WHERE demographic.dob_year < 2071 AND demographic.language = 'SPAN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "ambiguous column name: demographic.name",
            "is_correct": false,
            "reasoning": "The predicted SQL self-joins the DEMOGRAPHIC table without aliases, causing an ambiguous reference to demographic.name. This is a column reference error localized to the JOIN/SELECT clause and does not require reworking the overall query logic.",
            "error_type": "Local: ambiguous-column error"
        },
        "008452ec0d8e3074023da5b599850454": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are tested with lab item id 50922?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"50922\"",
            "pred_sql": "SELECT count(subject_id) FROM lab WHERE itemid = 50922",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all lab rows for itemid 50922 rather than counting distinct patients. It misses the DISTINCT modifier (and the JOIN is unnecessary since subject_id is in LAB). This is a localized error in the aggregation clause, not a structural or multi-clause logical error.",
            "error_type": "Local: missing DISTINCT"
        },
        "04f2d8289f6c164c281c05e3fc7f3940": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are admitted in emergency and admitted before the year 2163? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2163\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE strftime('%Y', admittime) < '2163'",
            "gold_answer": "[['38.0']]",
            "pred_answer": "[['58.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the required filter on ADMISSION_TYPE = 'EMERGENCY' and only checks admittime year < 2163. This is a mistake confined to the WHERE clause (a missing condition), and can be corrected by adding the emergency filter without restructuring the entire query.",
            "error_type": "Local: missing filter condition"
        },
        "a76a171c5f287a62a48890fd64927138": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find me the death status and primary language that patient with patient id 3623 prefers.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\",DEMOGRAPHIC.\"EXPIRE_FLAG\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"3623\"",
            "pred_sql": "SELECT diagnosis, LANGUAGE FROM demographic WHERE subject_id = 3623",
            "gold_answer": "[['ENGL', '1.0']]",
            "pred_answer": "[['PNEUMONIA', 'ENGL']]",
            "is_correct": false,
            "reasoning": "The predicted query selects the wrong column in the SELECT clause: it uses DEMOGRAPHIC.DIAGNOSIS instead of the required EXPIRE_FLAG (death status). The query structure (FROM DEMOGRAPHIC, WHERE subject_id filter) is correct; only the column choice is incorrect. This is a localized error in the SELECT clause and does not affect overall query logic or table relationships.",
            "error_type": "Local: column reference error"
        },
        "8793d7722bc0a33c0337714fb51f5a3b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are admitted before the year 2200 and diagnosed with icd9 code e8791? (Values: E8791)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2200\" AND DIAGNOSES.\"ICD9_CODE\" = \"E8791\"",
            "pred_sql": "SELECT count(subject_id) FROM diagnoses WHERE icd9_code = 'E8791' AND strftime('%Y', admittime) < '2200'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL references the column admittime in the DIAGNOSES table, but admittime exists only in the DEMOGRAPHIC table. There is no JOIN to DEMOGRAPHIC, and the column reference is incorrect, causing the 'no such column' error. This is a localized issue with a column reference, not a fundamental change to the query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "0bc26b6c69c9bf3628430b1e23d18007": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the number of patients born before 1879 ere tranferred within the facility. (Values: TRSF WITHIN THIS FACILITY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRSF WITHIN THIS FACILITY\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"1879\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE dob_year < 1879",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies the DOB_YEAR < 1879 filter and omits the required filter on ADMISSION_LOCATION = 'TRSF WITHIN THIS FACILITY'. This is a localized error in the WHERE clause, not altering the overall query structure or joins, but missing a specific condition.",
            "error_type": "Local: missing WHERE condition"
        },
        "a5ed08bdcb4c1e8be442640558d76f49": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the average age of government health insurance patients who were admitted in hospital for a day. (Values: Government, 1)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Government\" AND DEMOGRAPHIC.\"DAYS_STAY\" = \"1\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN patients ON demographic.subject_id = patients.subject_id WHERE patients.insurance = 'Government' AND demographic.days_stay = 1",
            "gold_answer": "[['20.0']]",
            "pred_answer": "no such table: patients",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with a table named \u201cpatients,\u201d which does not exist in the provided schema. This is a reference error confined to a specific part of the query (the FROM/JOIN clause) and does not require restructuring the overall logic\u2014only correcting the table name or removing the invalid join. Therefore, it is a Local error.",
            "error_type": "Local: table reference error"
        }
    },
    "macsql": {
        "9cfe1f468ae3569a8653122b46b2dce5": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "tell me the ethnic background and marital status of patient with patient id 21796.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"ETHNICITY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT `ETHNICITY`, `MARITAL_STATUS` FROM DEMOGRAPHIC WHERE `SUBJECT_ID` = 21796 AND `ETHNICITY` IS NOT NULL AND `MARITAL_STATUS` IS NOT NULL",
            "gold_answer": "[['None', 'WHITE']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds extra IS NOT NULL conditions in the WHERE clause, filtering out the desired row because MARITAL_STATUS is NULL. The overall query structure and target tables/columns are correct; only the WHERE clause is overly restrictive. This is a localized mistake in the WHERE filters, not a global logical or structural error.",
            "error_type": "Local: where-clause filter error"
        },
        "89473cb3b3eaa62d3a19fba9185e42e7": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the number of emergency room admitted patients who have diagnosis icd9 code e8798? (Values: EMERGENCY, E8798)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DIAGNOSES.\"ICD9_CODE\" = \"E8798\"",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT D1.`SUBJECT_ID`, D1.`HADM_ID` FROM DIAGNOSES AS D1 INNER JOIN DEMOGRAPHIC AS D2 ON D1.`SUBJECT_ID` = D2.`SUBJECT_ID` AND D1.`HADM_ID` = D2.`HADM_ID` WHERE D1.`ICD9_CODE` = 'E8798' AND D2.`ADMISSION_LOCATION` = 'EMERGENCY ROOM ADMIT' ) AS SubQuery",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DEMOGRAPHIC.ADMISSION_LOCATION = 'EMERGENCY ROOM ADMIT' instead of DEMOGRAPHIC.ADMISSION_TYPE = 'EMERGENCY'. This is a localized mistake in the WHERE clause referencing the wrong column and value. The overall query structure (join, counting distinct patients) is correct; only the column and literal in the filter are wrong.",
            "error_type": "Local: column reference error"
        },
        "8757aa09f5f4b407395c6608aecb69e0": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "find the number of patients with bowel obstruction primary disease who have been admitted to hospital for more than 10 days. (Values: BOWEL OBSTRUCTION)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BOWEL OBSTRUCTION\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"10\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `HADM_ID` FROM DIAGNOSES WHERE `LONG_TITLE` LIKE '%BOWEL OBSTRUCTION%' OR `SHORT_TITLE` LIKE '%BOWEL OBSTRUCTION%' ) AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`DAYS_STAY` > 10",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining to the DIAGNOSES table and using substring matches on LONG_TITLE/SHORT_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column for the primary disease. This alters the intended meaning (primary diagnosis vs. any diagnosis) and requires restructuring the query beyond a simple local fix.",
            "error_type": "Global: join error"
        }
    },
    "deepseek": {
        "088bfde60ac975ad3850f459ad47cfef": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "bring the number of patients younger than 48 years who were admitted before 2138.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"AGE\" < \"48\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2138\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE AGE < 48 AND YEAR(ADMITTIME) < 2138;",
            "gold_answer": "[['12.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the non-existent YEAR() function on ADMITTIME in SQLite instead of using the ADMITYEAR column as specified. This is a localized mistake in the WHERE clause\u2019s temporal handling and does not require restructuring the joins or overall logic of the query. It violates the SQL assumptions but is fixable by replacing YEAR(ADMITTIME) with ADMITYEAR.",
            "error_type": "Local: other-temporal error"
        },
        "bb83ddfb2b58462bada823ae66a5bd95": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many patients born before the year 2109 were primarily diagnosed for guillain barre syndrome? (Values: GUILLAIN BARRE SYNDROME)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"GUILLAIN BARRE SYNDROME\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2109\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC DM JOIN DIAGNOSES D ON DM.SUBJECT_ID = D.SUBJECT_ID AND DM.HADM_ID = D.HADM_ID WHERE DM.DOB_YEAR < 2109 AND D.LONG_TITLE LIKE '%Guillain Barre Syndrome%';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic by joining the DIAGNOSES table and filtering on its LONG_TITLE rather than using the DEMOGRAPHIC.DIAGNOSIS field for the primary diagnosis. This alters the meaning of \u201cprimarily diagnosed\u201d and requires restructuring the query beyond a simple fix in one clause.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "llm2sql": {
        "85aa76ce561a87a6edc0d482804be238": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "provide the number of patients who had other body fluid lab test and were hospitalized for more than 2 days. (Values: Other Body Fluid)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"2\" AND LAB.\"FLUID\" = \"Other Body Fluid\"",
            "pred_sql": "SELECT COUNT(DISTINCT L.SUBJECT_ID) FROM LAB AS L JOIN DEMOGRAPHIC AS D ON L.SUBJECT_ID = D.SUBJECT_ID WHERE L.CATEGORY = 'Other Body Fluid' AND D.DAYS_STAY > 2;",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only uses the wrong column (LAB.CATEGORY instead of LAB.FLUID) but, more critically, it joins LAB to DEMOGRAPHIC on SUBJECT_ID rather than on the admission key (HADM_ID). This incorrect join relationship fundamentally alters the intended data association between a specific lab test and its corresponding hospital stay, leading to unintended results. Fixing this requires restructuring the join logic, which is a global error.",
            "error_type": "Global: incorrect join relationship"
        },
        "0bc9f228d063f921a03fbc029e61db03": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "How many of the patients admitted before the year 2173 were diagnosed with ac salpingo-oophoritis? (Values: Ac salpingo-oophoritis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2173\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Ac salpingo-oophoritis\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.LONG_TITLE = 'Ac salpingo-oophoritis' AND DEMOGRAPHIC.ADMITYEAR < 2173;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses DIAGNOSES.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE and uses COUNT(*) instead of COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID). These are mistakes confined to specific clauses (SELECT and WHERE) and do not alter the overall join structure or logic of the query.",
            "error_type": "Local: column reference and aggregation error"
        },
        "ae3c7627e3ba7750105510476460e8e4": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "What is the age and primary disease of Steven Sepulveda?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"AGE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Steven Sepulveda\"",
            "pred_sql": "SELECT D.AGE, Diag.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID WHERE D.NAME = 'Steven Sepulveda';",
            "gold_answer": "[['64.0', 'CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA']]",
            "pred_answer": "[['64.0', 'Atrial fibrillation'], ['64.0', 'Cardiac complications, not elsewhere classified'], ['64.0', 'Coronary atherosclerosis of native coronary artery'], ['64.0', 'Other and unspecified hyperlipidemia'], ['64.0', 'Pulmonary collapse'], ['64.0', 'Unspecified essential hypertension'], ['64.0', 'Unspecified sleep apnea']]",
            "is_correct": false,
            "reasoning": "The predicted query introduces a JOIN to the DIAGNOSES table and selects LONG_TITLE, returning multiple diagnosis rows instead of using the single DIAGNOSIS field in DEMOGRAPHIC. This alters the overall logic and structure of the query by changing the source of the primary disease and producing multiple records rather than the one expected. Fixing this requires removing the JOIN and selecting from a different table, which is more than a local correction.",
            "error_type": "Global: incorrect-join"
        }
    }
}