{
    "dinsql": {
        "a601dcef90267006aea6385d22fd36d3": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "give the number of patients whose procedure short title is cardiac rhythm conv nec. (Values: Cardiac rhythm conv NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"Cardiac rhythm conv NEC\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PROCEDURES WHERE SHORT_TITLE = 'Cardiac rhythm conv nec'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the WHERE clause literal: it uses 'Cardiac rhythm conv nec' (lowercase 'nec') instead of 'Cardiac rhythm conv NEC'. In SQLite, string comparisons are case-sensitive by default (BINARY collation), so the predicted query fails to match the intended rows. This is a localized issue in the WHERE clause value, not affecting the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "88918368bee11a289ddd9a04f1958b6b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "get me the number of elective hospital admission patients who had coronary artery primary disease. (Values: ELECTIVE, CORONARY ARTERY DISEASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"CORONARY ARTERY DISEASE\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE ADMISSION_TYPE = 'ELECTIVE' AND DIAGNOSIS LIKE '%CORONARY ARTERY DISEASE%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and uses COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID), fundamentally altering the query\u2019s logic. The original question only requires filtering on the DEMOGRAPHIC table\u2019s DIAGNOSIS column and counting distinct patients. By joining DIAGNOSES, the predicted query creates extra row duplication and changes the source of the diagnosis filter, leading to an incorrect result. Fixing this requires removing the join and restoring the distinct count, which is more than a localized tweak.",
            "error_type": "Global: incorrect join strategy"
        }
    },
    "deepseek": {
        "8905a4c9ce097875ed1f8db4c3ff14bd": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients admitted before 2166 with procedure icd9 code 5011?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2166\" AND PROCEDURES.\"ICD9_CODE\" = \"5011\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PROCEDURES P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE P.ICD9_CODE = '5011' AND EXTRACT(YEAR FROM D.ADMITTIME) < 2166;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to a syntax error when using EXTRACT(YEAR FROM D.ADMITTIME), which SQLite does not support. The rest of the query structure (SELECT, JOIN, WHERE on ICD9_CODE) aligns with the intended logic. This is a localized misuse of a datetime function rather than a structural or multi-clause logical error, so it can be fixed by replacing EXTRACT with the correct SQLite date function.",
            "error_type": "Local: other-temporal error"
        },
        "85c64a20b4ee9ba7566059a53b6fcdb3": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Bring the list of patients younger than 41 years of age who had a blood lab test done. (Values: Blood)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"41\" AND LAB.\"FLUID\" = \"Blood\"",
            "pred_sql": "SELECT d.NAME FROM DEMOGRAPHIC d INNER JOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID AND d.HADM_ID = l.HADM_ID WHERE d.AGE < 41 AND l.FLUID = 'Blood';",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended output and query structure. The original task requires returning the count of distinct patients under 41 who had a blood lab test. The predicted query instead selects patient names (with duplicates) rather than computing a distinct count. This is not a simple column reference or syntax mistake\u2014it alters the core aggregation and output of the query. Therefore, this is a global error affecting the overall logic and structure of the SQL.",
            "error_type": "Global: wrong-aggregation/selection error"
        },
        "807ed8b40c34e53869ce863c6ed2489a": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "which lab test has been ordered for patient with patient id 19187?",
            "gold_sql": "SELECT LAB.\"LABEL\" FROM LAB WHERE LAB.\"SUBJECT_ID\" = \"19187\"",
            "pred_sql": "SELECT LABEL, CHARTTIME, VALUE_UNIT, FLAG FROM LAB WHERE SUBJECT_ID = 19187;",
            "gold_answer": "[['Alveolar-arterial Gradient'], ['Atypical Lymphocytes'], ['Bands'], ['Base Excess'], ['Basophils'], ['Bilirubin, Direct'], ['Bilirubin, Indirect'], ['Bilirubin, Total'], ['Calculated Total CO2'], ['Eosinophils'], ['Hematocrit'], ['Hemoglobin'], ['Lymphocytes'], ['MCH'], ['MCHC'], ['MCV'], ['Metamyelocytes'], ['Monocytes'], ['Myelocytes'], ['Neutrophils'], ['Nucleated Red Cells'], ['Other Cells'], ['Platelet Count'], ['RDW'], ['Red Blood Cells'], ['White Blood Cells'], ['pCO2'], ['pH'], ['pO2']]",
            "pred_answer": "[['Alveolar-arterial Gradient', '2170-10-04 14:34:00', '598mm Hg', 'None'], ['Atypical Lymphocytes', '2170-10-04 14:30:00', '4%', 'abnormal'], ['Bands', '2170-10-04 14:30:00', '5%', 'None'], ['Base Excess', '2170-10-04 14:34:00', '-1mEq/L', 'None'], ['Basophils', '2170-10-04 14:30:00', '0%', 'None'], ['Bilirubin, Direct', '2170-10-08 07:20:00', '0.2mg/dL', 'None'], ['Bilirubin, Indirect', '2170-10-08 07:20:00', '8.3mg/dL', 'None'], ['Bilirubin, Total', '2170-10-08 07:20:00', '8.5mg/dL', 'None'], ['Calculated Total CO2', '2170-10-04 14:34:00', '26mEq/L', 'None'], ['Eosinophils', '2170-10-04 14:30:00', '2%', 'None'], ['Hematocrit', '2170-10-04 14:30:00', '35.9%', 'None'], ['Hemoglobin', '2170-10-04 14:30:00', '12.3g/dL', 'None'], ['Lymphocytes', '2170-10-04 14:30:00', '20%', 'abnormal'], ['MCH', '2170-10-04 14:30:00', '36.0pg', 'abnormal'], ['MCHC', '2170-10-04 14:30:00', '34.1%', 'None'], ['MCV', '2170-10-04 14:30:00', '106fL', 'abnormal'], ['Metamyelocytes', '2170-10-04 14:30:00', '0%', 'None'], ['Monocytes', '2170-10-04 14:30:00', '4%', 'None'], ['Myelocytes', '2170-10-04 14:30:00', '0%', 'None'], ['Neutrophils', '2170-10-04 14:30:00', '61%', 'abnormal'], ['Nucleated Red Cells', '2170-10-04 14:30:00', '5%', 'abnormal'], ['Other Cells', '2170-10-04 14:30:00', '4%', 'abnormal'], ['Platelet Count', '2170-10-04 14:30:00', '294K/uL', 'None'], ['RDW', '2170-10-04 14:30:00', '17.5%', 'abnormal'], ['Red Blood Cells', '2170-10-04 14:30:00', '3.40m/uL', 'abnormal'], ['White Blood Cells', '2170-10-04 14:30:00', '20.10K/uL', 'abnormal'], ['pCO2', '2170-10-04 14:34:00', '46mm Hg', 'abnormal'], ['pH', '2170-10-04 14:34:00', '7.34units', 'abnormal'], ['pO2', '2170-10-04 14:34:00', '94mm Hg', 'None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in the SELECT clause by returning extra columns (CHARTTIME, VALUE_UNIT, FLAG) that are not asked for. The table references, filtering condition, and overall query structure remain correct. This is a localized mistake in the projection, not a logical or structural flaw spanning multiple clauses.",
            "error_type": "Local: projection error"
        },
        "9261992dc4b198991f00da4ec688e081": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find out the short title of diagnoses and health insurance by which patient with patient id 9271 is covered.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"9271\"",
            "pred_sql": "SELECT D.INSURANCE AS \"Health Insurance\", Diag.SHORT_TITLE AS \"Diagnosis Short Title\" FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID WHERE D.SUBJECT_ID = 9271;",
            "gold_answer": "[['Medicare', 'Adv eff corticosteroids'], ['Medicare', 'Asthma NOS w (ac) exac'], ['Medicare', 'Av block-2nd degree NEC'], ['Medicare', 'Bipol I cur depres NOS'], ['Medicare', 'CHF NOS'], ['Medicare', 'Chr diastolic hrt fail'], ['Medicare', 'Chr kidney dis stage III'], ['Medicare', 'DMII hprosmlr uncontrold'], ['Medicare', 'Hx-ven thrombosis/embols'], ['Medicare', 'Hy kid NOS w cr kid I-IV'], ['Medicare', 'Parox ventric tachycard'], ['Medicare', 'Pneumonia, organism NOS']]",
            "pred_answer": "[['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only returns the insurance values (one column) and fails to produce the diagnosis short titles. This is a mistake confined to the SELECT clause\u2014simply adding the missing column (Diag.SHORT_TITLE) would correct it. The overall join logic and WHERE filter are correct, so this is a localized error in the SELECT list.",
            "error_type": "Local: select-clause error"
        }
    },
    "codes": {
        "a42b052cc593a90aed642a74ae195507": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the marital status and age of the patient name orville ford? (Values: Orville Ford)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"AGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Orville Ford\"",
            "pred_sql": "SELECT demographic.marital_status, demographic.age FROM demographic INNER JOIN demographic ON demographic.name = 'Orville Ford'",
            "gold_answer": "[['None', '0.0']]",
            "pred_answer": "ambiguous column name: demographic.marital_status",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self-join on the same table without aliases and uses the join condition incorrectly (demographic.name = 'Orville Ford') instead of a proper ON clause linking two tables or simply filtering. This results in an ambiguous column reference error. The mistake is localized to the JOIN clause and column references, and does not require restructuring the overall query logic.",
            "error_type": "Local: ambiguous column reference"
        },
        "04fec7017bec275d4bc47df53bb90b78": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are born before 2121 and followed the procedure biopsy of tonsils and adenoids? (Values: Biopsy of tonsils and adenoids)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2121\" AND PROCEDURES.\"LONG_TITLE\" = \"Biopsy of tonsils and adenoids\"",
            "pred_sql": "SELECT count(procedures.hadm_id) FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.short_title = 'Biopsy of tonsils and adenoids' AND demographic.dob_year < 2121",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the intended logic in three key ways: 1) It joins PROCEDURES to DEMOGRAPHIC using subject_id only rather than matching on hadm_id (and subject_id), which fundamentally alters how rows relate between tables. 2) It filters on procedures.short_title instead of procedures.LONG_TITLE, so it never matches the intended procedure name. 3) It counts procedure.hadm_id rather than COUNT(DISTINCT subject_id), changing the aggregation semantics. The incorrect join condition is not a simple typo but a structural mistake that breaks the table relationship and requires reworking the query\u2019s core logic, making this a global error.",
            "error_type": "Global: join error"
        },
        "0df82d3e22799b2179485006b810705b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the maximum age of patients who have pneumonia primary disease and were hospitalized for 30 days. (Values: PNEUMONIA)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"PNEUMONIA\" AND DEMOGRAPHIC.\"DAYS_STAY\" = \"30\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Pneumonia' AND demographic.days_stay = 30",
            "gold_answer": "[['64.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES on only SUBJECT_ID and filters DIAGNOSES.SHORT_TITLE = 'Pneumonia'. This deviates from the intended logic, which uses the DEMOGRAPHIC.DIAGNOSIS column to identify primary pneumonia cases. The join is not only missing the HADM_ID key but also changes the table and column used for the disease filter, fundamentally altering the query\u2019s meaning and requiring restructuring of the query logic. This is not a simple syntax or local clause fix but a structural, logical error in how tables are combined.",
            "error_type": "Global: incorrect join relationship"
        },
        "b9e6f14f784427e612f5e8259145fe67": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients who stayed in the hospital for more than 34 days and have been prescribed the drug neo*po*mct oil? (Values: NEO*PO*MCT Oil)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"34\" AND PRESCRIPTIONS.\"DRUG\" = \"NEO*PO*MCT Oil\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.hadm_id = demographic.hadm_id WHERE prescriptions.drug = 'NEO*PO*MCT OIL' AND demographic.days_stay > 34",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in minor parts: it omits DISTINCT on SUBJECT_ID and uses 'NEO*PO*MCT OIL' (uppercase OIL) instead of the exact 'NEO*PO*MCT Oil'. These are localized mistakes in the WHERE clause (value literal mismatch and missing DISTINCT) and do not require reworking the overall JOIN or query structure.",
            "error_type": "Local: value parsing error"
        },
        "0e41987fae64a3292dbb83fe18b6d6eb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the number of patients admitted before the year 2154 who had medicare insurance. (Values: Medicare)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicare\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2154\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN demographic ON demographic.insurance = 'Medicare' WHERE strftime('%Y', demographic.admittime) < '2154'",
            "gold_answer": "[['22.0']]",
            "pred_answer": "ambiguous column name: demographic.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self-join on the DEMOGRAPHIC table without using aliases, and then refers to demographic.subject_id ambiguously. This is a localized mistake within the JOIN clause (missing table aliases and incorrect ON usage), rather than a fundamental restructuring of the query logic or intent.",
            "error_type": "Local: join alias error"
        },
        "920c86ed33b3bf35707a12f566236c34": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide the procedure short title and drug code of subject id 24425.",
            "gold_sql": "SELECT PROCEDURES.\"SHORT_TITLE\",PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" FROM PROCEDURES INNER JOIN PRESCRIPTIONS on PROCEDURES.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PROCEDURES.\"SUBJECT_ID\" = \"24425\"",
            "pred_sql": "SELECT procedures.short_title, prescriptions.drug FROM procedures INNER JOIN prescriptions ON procedures.subject_id = prescriptions.subject_id WHERE procedures.subject_id = 24425",
            "gold_answer": "[['Vaccination NEC', 'AMPVL'], ['Vaccination NEC', 'GENT10I'], ['Vaccination NEC', 'NAMP500I'], ['Vaccination NEC', 'NEOSYRD5W']]",
            "pred_answer": "[['Vaccination NEC', 'NEO*IV*Ampicillin Sodium'], ['Vaccination NEC', 'NEO*IV*Gentamicin'], ['Vaccination NEC', 'Send 500mg Vial'], ['Vaccination NEC', 'Syringe (Neonatal) *D5W*']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only selects the wrong column (prescriptions.drug instead of prescriptions.FORMULARY_DRUG_CD) but, more critically, it joins the tables on SUBJECT_ID rather than HADM_ID. The incorrect join key changes the relationship between PROCEDURES and PRESCRIPTIONS and thus alters the intended result set. This is not a localized typo or syntax mistake but a fundamental join logic error that affects the overall query meaning.",
            "error_type": "Global: incorrect join key"
        },
        "0bf9d7f3a5120ea9a6a3ca724295956f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the route of administration and the type of drug influenza virus vaccine is. (Values: Influenza Virus Vaccine)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"DRUG_TYPE\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Influenza Virus Vaccine\"",
            "pred_sql": "SELECT route, drug_type FROM prescriptions WHERE drug = 'Influenza virus vaccine'",
            "gold_answer": "[['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL mirrors the correct structure (same table, columns, and WHERE clause) but fails to match any rows because the literal string 'Influenza virus vaccine' uses incorrect casing compared to the ground truth value \u201cInfluenza Virus Vaccine\u201d. This is a localized issue in the WHERE clause\u2019s value literal, not a structural or logical flaw in the query.",
            "error_type": "Local: value parsing error"
        }
    },
    "macsql": {
        "0c6330427069e970ee4be464f3c99c48": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the days of hospital stay and date of death of subject id 2560?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",DEMOGRAPHIC.\"DOD\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT `DAYS_STAY`, `DOD` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 2560 AND `DOD` IS NOT NULL",
            "gold_answer": "[['4.0', 'None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth by adding the extra filter AND `DOD` IS NOT NULL in the WHERE clause. This is a localized mistake in the WHERE condition that can be fixed by simply removing that extra predicate\u2014no overall query structure or joins need changing. Hence it\u2019s a local error in the WHERE clause.",
            "error_type": "Local: condition error"
        },
        "0253fe7f06bcba29a5a6dc7342278218": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the number of patients with acute vascular insufficiency of intestine who are less than 50 years of age? (Values: Acute vascular insufficiency of intestine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"50\" AND DIAGNOSES.\"LONG_TITLE\" = \"Acute vascular insufficiency of intestine\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `ICD9_CODE` = '<ICD9_CODE_FROM_SUB_QUESTION_1>' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` WHERE T1.`AGE` < 50",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the filtering logic by joining on a derived table filtered by ICD9_CODE (and even using a placeholder) instead of filtering DIAGNOSES by LONG_TITLE = 'Acute vascular insufficiency of intestine'. This is not a simple typo or localized mistake; it alters the overall logic and intended meaning of the query, requiring restructuring of the join and WHERE clause to correct. Therefore, this is a global error in the query structure and logic.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "0138ed4f1a039573afe3d860e12c2de9": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "for how long the patient bruce harris remained admitted in hospital? (Values: Bruce Harris)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Bruce Harris\"",
            "pred_sql": "SELECT `DISCHTIME`, `ADMITTIME`, (JULIANDAY(`DISCHTIME`) - JULIANDAY(`ADMITTIME`)) AS `DAYS_STAY` FROM DEMOGRAPHIC WHERE `NAME` = 'Bruce Harris'",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[['2173-09-17 15:40:00', '2173-08-25 12:58:00', '23.113']]",
            "is_correct": false,
            "reasoning": "The predicted query errors are confined to the SELECT clause: it pulls DISCHTIME and ADMITTIME and computes a Julian\u2010day difference instead of simply returning the stored DAYS_STAY value. No JOINs or multiple clause logic is broken\u2014the mistake is selecting and computing the wrong columns. This is a localized error in the SELECT list and does not require restructuring the overall query logic.",
            "error_type": "Local: wrong-column-selection error"
        },
        "0e6e157150b6804faa15e113bdd34edd": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "calculate the maximum days of hospitalization for patients with s/p hanging primary disease. (Values: S/P HANGING)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"S/P HANGING\"",
            "pred_sql": "SELECT MAX(DAYS_STAY) AS MaxDaysStay FROM DEMOGRAPHIC WHERE HADM_ID IN ( SELECT DISTINCT HADM_ID FROM DIAGNOSES WHERE SHORT_TITLE = 'S/P HANGING' AND HADM_ID IS NOT NULL ) AND DAYS_STAY IS NOT NULL AND HADM_ID IS NOT NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL moves the filter from the DEMOGRAPHIC.DIAGNOSIS column to a subquery over the DIAGNOSES table, which fundamentally changes the query\u2019s logic. Instead of directly filtering on the primary disease stored in DEMOGRAPHIC.DIAGNOSIS, it performs an unnecessary join via HADM_ID and looks at DIAGNOSES.SHORT_TITLE. This is not a simple typo or localized mistake but a flawed restructuring of the query logic, requiring significant changes to restore the intended behavior.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "llm2sql": {
        "0a1cd1f71bf51049de78aa0501e9fa78": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "What is the number of inpatient hospital admission patients who are less than 59 years old? (Values: TRANSFER FROM HOSP/EXTRAM)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRANSFER FROM HOSP/EXTRAM\" AND DEMOGRAPHIC.\"AGE\" < \"59\"",
            "pred_sql": "SELECT COUNT(DISTINCT HADM_ID) FROM DEMOGRAPHIC WHERE AGE < 59;",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['49.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the condition on ADMISSION_LOCATION (TRANSFER FROM HOSP/EXTRAM) that is required by the question. This is a mistake confined to the WHERE clause\u2014a missing filter\u2014without changing the overall query structure.",
            "error_type": "Local: missing WHERE filter"
        },
        "0be50e80b5be87e0c4395c5608c9d39f": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Look for the number of patients with the primary disease syncope telemetry who died in or before 2158. (Values: SYNCOPE;TELEMETRY, 2158.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"SYNCOPE;TELEMETRY\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2158.0\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN DIAGNOSES nON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID nAND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID nWHERE (DIAGNOSES.SHORT_TITLE LIKE '%SYNCOPE%' OR DIAGNOSES.LONG_TITLE LIKE '%TELEMETRY%') nAND DEMOGRAPHIC.EXPIRE_FLAG = 1 nAND DEMOGRAPHIC.DOD_YEAR <= 2158;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a simple syntax mistake: the alias prefix 'n' is inadvertently glued to keywords ('nFROM', 'nJOIN', 'nWHERE'), causing a parse error near 'DEMOGRAPHIC'. This is a local issue within the SELECT/JOIN syntax and can be fixed by inserting proper whitespace or removing the stray alias markers, without changing the overall query logic or structure.",
            "error_type": "Local: syntax error"
        },
        "819886e34e7e0c9516caa63edcf8fa0d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "count the number of patients born before 2065 who had prophylactic administration of vaccine against other diseases. (Values: Prophylactic administration of vaccine against other diseases)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2065\" AND PROCEDURES.\"LONG_TITLE\" = \"Prophylactic administration of vaccine against other diseases\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN PROCEDURES ON DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID nWHERE DEMOGRAPHIC.DOB_YEAR < 2065 nAND PROCEDURES.LONG_TITLE = 'Prophylactic administration of vaccine against other diseases';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces spurious \u2018n\u2019 prefixes (e.g., nFROM, nJOIN, nWHERE) instead of proper newlines or spaces, causing a SQLite syntax error near \u201cDEMOGRAPHIC\u201d. This mistake is confined to the FROM/JOIN clause formatting and does not alter the overall query logic or relationships. It can be fixed by correcting the syntax without restructuring the query.",
            "error_type": "Local: syntax error"
        }
    }
}