{
    "macsql": {
        "900aceb61a32bfdb6cf3e6ece9597284": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "provide marital status as well as preferred language of the patient with patient id 2560.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"LANGUAGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT `MARITAL_STATUS`, `LANGUAGE` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 2560 AND `MARITAL_STATUS` IS NOT NULL AND `LANGUAGE` IS NOT NULL",
            "gold_answer": "[['None', 'None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces additional WHERE conditions (`MARITAL_STATUS IS NOT NULL AND LANGUAGE IS NOT NULL`) that filter out the only row (which has both fields as NULL), whereas the ground truth simply selects the values (even if NULL). This mistake lies solely in the WHERE clause by adding extra filters, and can be fixed by removing those conditions without changing the overall query structure or table relationships.",
            "error_type": "Local: incorrect WHERE condition"
        },
        "0d0a62ed7c9bd62e584c9f5b260d9ea0": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "on which date did the patient with patient id 24425 die?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DOD\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"24425\"",
            "pred_sql": "SELECT DOD FROM DEMOGRAPHIC WHERE SUBJECT_ID = 24425 AND EXPIRE_FLAG = 1 AND DOD IS NOT NULL LIMIT 1",
            "gold_answer": "[['None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query adds extra WHERE conditions (EXPIRE_FLAG = 1, DOD IS NOT NULL) and a LIMIT clause, which changes the intended result by over\u2010filtering. The structure of the query (SELECT from DEMOGRAPHIC, filtering by SUBJECT_ID) remains the same, and the error lies solely in the added conditions. This is a localized mistake in the WHERE clause, not a flaw in the overall query logic or joins.",
            "error_type": "Local: incorrect-filter error"
        },
        "814b08ced5bf2cfc63c311dd93979199": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "give me the number of urgent hospital admission patients who have replaced route of drug administration. (Values: URGENT, REPLACE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\" AND PRESCRIPTIONS.\"ROUTE\" = \"REPLACE\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`, T1.`HADM_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID`, `HADM_ID` FROM DEMOGRAPHIC WHERE `ADMISSION_TYPE` = 'URGENT' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID`, `HADM_ID` FROM PRESCRIPTIONS WHERE `ROUTE` = 'REPLACE' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID`",
            "gold_answer": "[['0.0']]",
            "pred_answer": "wrong number of arguments to function COUNT()",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses COUNT(DISTINCT T1.SUBJECT_ID, T1.HADM_ID), which is invalid in SQLite (only a single expression is allowed in COUNT(DISTINCT)). This is a localized syntax error in the SELECT clause\u2019s aggregate function and does not reflect a flawed table relationship or overall query logic.",
            "error_type": "Local: aggregate-function syntax error"
        },
        "9193bc2c5d90be4e199fc6a421133f88": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "Calculate the number of patients admitted to emergency who had an ld body fluid test by lab (Values: EMERGENCY, LD, Body Fluid)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND LAB.\"LABEL\" = \"LD, Body Fluid\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT `SUBJECT_ID`, `HADM_ID` FROM DEMOGRAPHIC WHERE `ADMISSION_TYPE` = 'EMERGENCY' ) AS T1 INNER JOIN ( SELECT `SUBJECT_ID`, `HADM_ID` FROM LAB WHERE `LABEL` LIKE '%LD%' AND `FLUID` = 'Body Fluid' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only mis-filters the lab records: it breaks the single LABEL = 'LD, Body Fluid' test into LABEL LIKE '%LD%' AND FLUID = 'Body Fluid'. This is a WHERE-clause value/condition mistake, not a structural or multi-clause logic error. It can be fixed by restoring the exact equality check on LABEL. Therefore this is a localized filter error.",
            "error_type": "Local: value parsing error"
        },
        "85d8a21277604d575ed70aa8589ef037": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "which language does the patient charles deshay prefer? (Values: Charles Deshay)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Charles Deshay\"",
            "pred_sql": "SELECT LANGUAGE FROM DEMOGRAPHIC WHERE NAME = 'Charles Deshay' AND LANGUAGE IS NOT NULL",
            "gold_answer": "[['None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only differs from the ground truth by adding the extra condition AND LANGUAGE IS NOT NULL in the WHERE clause. This is a mistake localized to the WHERE clause and does not change the overall query structure or table relationships. It filters out the only row (where LANGUAGE is None), resulting in no output. Therefore, this is a local error in the WHERE filter.",
            "error_type": "Local: extra WHERE condition"
        },
        "8eb82af011a6e0a42adfe3d93215d014": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "give me the marital status and date of birth of subject id 21796.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"DOB\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT `MARITAL_STATUS`, `DOB` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 21796 AND `MARITAL_STATUS` IS NOT NULL AND `DOB` IS NOT NULL",
            "gold_answer": "[['None', '2197-01-15 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs locally by adding extra WHERE conditions `AND MARITAL_STATUS IS NOT NULL AND DOB IS NOT NULL`. These additional filters are not required by the question and cause no rows to be returned, but they do not alter the overall query structure or table relationships. This is therefore a localized mistake in the WHERE clause.",
            "error_type": "Local: where-clause error"
        },
        "979a734ab882143afcde05bf1b13cbd9": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many of the patients with bradycardia had abnormal lab test results? (Values: BRADYCARDIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BRADYCARDIA\" AND LAB.\"FLAG\" = \"abnormal\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `SHORT_TITLE` = 'Bradycardia' OR `LONG_TITLE` LIKE '%Bradycardia%' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM LAB WHERE `FLAG` = 'abnormal' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table (with SHORT_TITLE/LONG_TITLE filters) instead of the DEMOGRAPHIC table\u2019s DIAGNOSIS column as specified by the question and assumptions. This is a wrong table/column reference in the FROM clause and can be fixed by referencing the correct table/column; it does not require restructuring the entire query logic or joins beyond that correction.",
            "error_type": "Local: table reference error"
        },
        "86d07eae693396f6aead63e691ef2627": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "which language did patient anna johnson speak? (Values: Anna Johnson)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Anna Johnson\"",
            "pred_sql": "SELECT LANGUAGE FROM DEMOGRAPHIC WHERE NAME = 'Anna Johnson' AND LANGUAGE IS NOT NULL",
            "gold_answer": "[['None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds an extra WHERE condition \u201cAND LANGUAGE IS NOT NULL,\u201d which filters out the row that the ground truth returns (where LANGUAGE is NULL). This mistake is confined to the WHERE clause and does not require changing the overall query structure, joins, or logic. Hence, it is a localized error in the filtering condition.",
            "error_type": "Local: where-condition error"
        },
        "0f85a50055cfb411dd0b95a57d97191f": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "find the minimum number of days of hospital stay for patients born before the year 2200.",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOB_YEAR\" > \"2200\"",
            "pred_sql": "SELECT MIN(`DAYS_STAY`) AS min_days_stay FROM DEMOGRAPHIC WHERE `DOB_YEAR` < 2200",
            "gold_answer": "[['None']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the ground truth and the predicted SQL is the comparison operator in the WHERE clause (ground truth uses > \"2200\", predicted uses < 2200). This is an error confined to the WHERE condition and does not change the overall structure or the tables involved. Such an incorrect comparator is a localized mistake in the condition, so it is a Local error.",
            "error_type": "Local: incorrect comparison operator"
        },
        "925b64a7b970e5a56de55e7db18fbd8f": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the average age of patients whose language is cape and primary disease is hyperglycemia? (Values: CAPE, HYPERGLYCEMIA)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"LANGUAGE\" = \"CAPE\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERGLYCEMIA\"",
            "pred_sql": "SELECT AVG(T1.AGE) AS average_age FROM DEMOGRAPHIC AS T1 INNER JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID AND T1.HADM_ID = T2.HADM_ID WHERE T1.LANGUAGE = 'CAPE' AND T2.LONG_TITLE LIKE '%HYPERGLYCEMIA%' AND T1.AGE IS NOT NULL AND T2.LONG_TITLE IS NOT NULL AND T1.LANGUAGE IS NOT NULL",
            "gold_answer": "[['49.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the query\u2019s overall logic by joining the DEMOGRAPHIC table to DIAGNOSES and filtering on DIAGNOSES.LONG_TITLE, whereas the ground truth filters DEMOGRAPHIC.DIAGNOSIS directly. This is not a small syntax mistake but a fundamental change to the query structure and intended semantics\u2014using the wrong table and join instead of filtering the primary diagnosis column. This requires restructuring the query to match the intended meaning, so it is a global error.",
            "error_type": "Global: incorrect join/table usage"
        },
        "a54b66567c23a6ca8e774a4507f148c5": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "give me the lab test item id and lab test abnormal status of patient with patient id 22377.",
            "gold_sql": "SELECT LAB.\"ITEMID\",LAB.\"FLAG\" FROM LAB WHERE LAB.\"SUBJECT_ID\" = \"22377\"",
            "pred_sql": "SELECT ITEMID, FLAG FROM LAB WHERE SUBJECT_ID = 22377 AND FLAG IS NOT NULL",
            "gold_answer": "[['50883.0', 'None'], ['50883.0', 'None'], ['50884.0', 'None'], ['50884.0', 'None'], ['50885.0', 'abnormal'], ['50885.0', 'abnormal']]",
            "pred_answer": "[['50885.0', 'abnormal'], ['50885.0', 'abnormal']]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds an extra filter `AND FLAG IS NOT NULL` in the WHERE clause, which removes rows where FLAG is NULL. The ground truth query returns all lab test statuses (including NULL flags) for SUBJECT_ID = 22377. The mistake is confined to the WHERE clause and does not change the overall query structure or table relationships\u2014just an overly restrictive filter.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "9d5d968951e3f72781e48eabfa97ce79": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "tell me the icu stay id for patient stephanie suchan. (Values: Stephanie Suchan)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"ICUSTAY_ID\" FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Stephanie Suchan\"",
            "pred_sql": "SELECT ICUSTAY_ID FROM PRESCRIPTIONS WHERE SUBJECT_ID = ( SELECT SUBJECT_ID FROM DEMOGRAPHIC WHERE NAME = 'Stephanie Suchan' ) AND HADM_ID = ( SELECT HADM_ID FROM DEMOGRAPHIC WHERE NAME = 'Stephanie Suchan' ) AND ICUSTAY_ID IS NOT NULL;",
            "gold_answer": "[['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None']]",
            "pred_answer": "[['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0'], ['242681.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the ground truth only by adding an extra condition in the WHERE clause (AND ICUSTAY_ID IS NOT NULL), which filters out rows with NULL ICUSTAY_ID. The table joins and overall query structure remain the same; the error is confined to the WHERE clause logic and does not affect relationships between tables.",
            "error_type": "Local: where-clause error"
        },
        "061d620cdd89282af1d48f9a0f2d3931": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "get the number of alive patients who underwent open biopsy of soft tissue. (Values: 0, Open biopsy of soft tissue)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND PROCEDURES.\"LONG_TITLE\" = \"Open biopsy of soft tissue\"",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT P.`SUBJECT_ID` FROM PROCEDURES AS P JOIN DEMOGRAPHIC AS D ON P.`SUBJECT_ID` = D.`SUBJECT_ID` AND P.`HADM_ID` = D.`HADM_ID` WHERE P.`ICD9_CODE` = '0' -- Assuming '0' is the ICD-9 code for \"Open biopsy of soft tissue\" AND D.`EXPIRE_FLAG` = 0 ) AS AlivePatients",
            "gold_answer": "[['0.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted query fails by filtering on PROCEDURES.ICD9_CODE = '0' instead of PROCEDURES.LONG_TITLE = 'Open biopsy of soft tissue'. This is a misuse of a column/value in the WHERE clause and does not alter the overall join structure or query logic. It is a localized error in the filter condition.",
            "error_type": "Local: table/column reference error"
        },
        "a04053b65b4e0a526f0eb2e1b027f139": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the maximum age of male patients diagnosed with primary disease overdose? (Values: M, OVERDOSE)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"OVERDOSE\"",
            "pred_sql": "SELECT MAX(d.AGE) AS max_age FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.HADM_ID = diag.HADM_ID WHERE d.GENDER = 'M' AND d.AGE IS NOT NULL AND diag.LONG_TITLE IS NOT NULL AND (diag.LONG_TITLE LIKE '%overdose%' OR diag.SHORT_TITLE LIKE '%overdose%')",
            "gold_answer": "[['24.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN to the DIAGNOSES table and uses LIKE searches on LONG_TITLE/SHORT_TITLE instead of filtering the DEMOGRAPHIC.DIAGNOSIS column for the exact value 'OVERDOSE'. This changes the entire logic of how 'overdose' diagnoses are identified and adds unnecessary IS NOT NULL checks, resulting in no matching rows. Fixing this would require restructuring the query\u2019s core logic (removing the join and using the correct column).",
            "error_type": "Global: join logic error"
        },
        "0ebff2e801d7f07f735ed115ff6846e8": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients are below 44 years of age and followed the procedure uni salpingo-oophor? (Values: Oth uni salpingo-oophor)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"44\" AND PROCEDURES.\"SHORT_TITLE\" = \"Oth uni salpingo-oophor\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS T1 INNER JOIN PROCEDURES AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`AGE` < 44 AND T2.`LONG_TITLE` LIKE '%Oth uni salpingo-oophor%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause by using PROCEDURES.LONG_TITLE with a LIKE pattern instead of PROCEDURES.SHORT_TITLE = 'Oth uni salpingo-oophor'. This is a localized mistake in column choice and comparison operator. The rest of the query structure\u2014including the JOIN\u2014remains correct and does not alter the overall logic. Therefore, this is a Local error.",
            "error_type": "Local: column/comparison error"
        },
        "ae0a0d761414ab17b03de7bb99fefdf0": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "For patient id 2110, specify name and lab test category",
            "gold_sql": "SELECT DEMOGRAPHIC.\"NAME\",LAB.\"CATEGORY\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2110\"",
            "pred_sql": "SELECT (SELECT `NAME` FROM DEMOGRAPHIC WHERE `SUBJECT_ID` = 2110) AS `NAME`, L.`CATEGORY` FROM LAB AS L WHERE L.`SUBJECT_ID` = 2110 GROUP BY L.`CATEGORY`",
            "gold_answer": "[['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology']]",
            "pred_answer": "[['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Hematology']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query structure by replacing the intended JOIN with a scalar subquery and adding a GROUP BY on L.CATEGORY. This grouping removes duplicate rows and changes the result set cardinality, which is a global logic change rather than a simple fix in one clause. The error spans multiple components (SELECT, FROM, GROUP BY) and changes the intended meaning of the query.",
            "error_type": "Global: flawed aggregation/join strategy"
        },
        "8c1239d9e940eb0ebcff0e32c2c18d7b": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "Tell me which primary disease the patient Walter Locher is suffering from and whether he is still alive or not.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"EXPIRE_FLAG\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Walter Locher\"",
            "pred_sql": "SELECT `EXPIRE_FLAG` FROM DEMOGRAPHIC WHERE `NAME` = 'Walter Locher'",
            "gold_answer": "[['1.0', 'NEOPLASM OF UNCERTAIN BEHAVIOR OF OTHER LYMPHATIC AND HEMATOPOIETIC TISSUES\\\\BONE MARROW TRANSPLANT']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects the EXPIRE_FLAG column and omits the DIAGNOSIS column, whereas the question asks for both the patient\u2019s primary disease (DIAGNOSIS) and whether they are alive (EXPIRE_FLAG). This is a localized mistake in the SELECT clause and does not alter the overall structure or table joins of the query.",
            "error_type": "Local: missing column in SELECT"
        },
        "074e5b4530d549289801849ce3cdde02": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the total number of patients aged below 68 who had abdominal abscess as primary disease  (Values: ABDOMINAL ABSCESS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"ABDOMINAL ABSCESS\" AND DEMOGRAPHIC.\"AGE\" < \"68\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS total_patients FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `LONG_TITLE` = 'ABDOMINAL ABSCESS' OR `SHORT_TITLE` = 'ABDOMINAL ABSCESS' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM DEMOGRAPHIC WHERE `AGE` < 68 ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls the disease condition from the DIAGNOSES table (using LONG_TITLE/SHORT_TITLE) and then joins to DEMOGRAPHIC, whereas the question asks for the primary disease recorded directly in DEMOGRAPHIC.DIAGNOSIS. This changes the source of the disease filter and the overall join strategy, fundamentally altering the query logic rather than a small fix in a single clause.",
            "error_type": "Global: join logic error"
        },
        "9192083f15618c3944fab5819282ce1f": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "give me the number of patients admitted before 2148 who are diagnosed with unspecified obesity. (Values: Obesity NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2148\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Obesity NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.`SUBJECT_ID`) FROM DEMOGRAPHIC AS D INNER JOIN DIAGNOSES AS DI ON D.`SUBJECT_ID` = DI.`SUBJECT_ID` WHERE DI.`ICD9_CODE` = '27800' -- Assuming '27800' is the ICD-9 code for \"Obesity NOS\" AND D.`ADMITYEAR` < 2148",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID as the join condition instead of matching on HADM_ID. This fundamentally changes the query\u2019s logic by associating any diagnosis of a patient across all admissions rather than only diagnoses from the same hospital stay. Fixing this requires altering the overall join strategy, not just a local clause tweak.",
            "error_type": "Global: incorrect join key error"
        },
        "a77d67ccd0a105807ef5c3fb6cb2c019": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "Provide the number of patients who were admitted before the year 2146 and had procedure short title as cardiopulm resuscita nos. (Values: Cardiopulm resuscita NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2146\" AND PROCEDURES.\"SHORT_TITLE\" = \"Cardiopulm resuscita NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `HADM_ID` FROM DEMOGRAPHIC WHERE `ADMITYEAR` < 2146 ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID`, `HADM_ID` FROM PROCEDURES WHERE `SHORT_TITLE` = 'Cardiopulm resuscita nos' ) AS T2 ON T1.`HADM_ID` = T2.`HADM_ID`",
            "gold_answer": "[['2.0']]",
            "pred_answer": "no such column: T1.SUBJECT_ID",
            "is_correct": false,
            "reasoning": "The predicted SQL references T1.SUBJECT_ID in the outer SELECT, but the subquery aliased as T1 only selects HADM_ID. This is a column reference error in a specific part of the query and can be fixed by including SUBJECT_ID in T1\u2019s SELECT. It does not affect the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "0ffc8a41b8b94e7bc01da17aac9180bf": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "for patient id 74032, specify the religion and date of death",
            "gold_sql": "SELECT DEMOGRAPHIC.\"RELIGION\",DEMOGRAPHIC.\"DOD\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"74032\"",
            "pred_sql": "SELECT `RELIGION`, `DOD` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 74032 AND `DOD` IS NOT NULL",
            "gold_answer": "[['CATHOLIC', 'None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduced an extra WHERE condition `AND DOD IS NOT NULL` that is not in the user\u2019s request. This is a mistake localized to the WHERE clause (an unnecessary filter) and does not alter the overall query structure or joins. Removing this condition would correct the query, so it\u2019s a local error.",
            "error_type": "Local: where-clause error"
        },
        "8447e58e57394384f3f325e732cdd313": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "get me the number of patients on iv route of drug administration who are diagnosed with closed fracture of surgical neck of humerus. (Values: Fx surg nck humerus-clos, IV)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Fx surg nck humerus-clos\" AND PRESCRIPTIONS.\"ROUTE\" = \"IV\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `ICD9_CODE` = 'ICD9_CODE_FROM_SUBQUESTION_1' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM PRESCRIPTIONS WHERE `ROUTE` = 'IV' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters diagnoses by a placeholder ICD9_CODE (`'ICD9_CODE_FROM_SUBQUESTION_1'`) rather than matching the actual SHORT_TITLE = \"Fx surg nck humerus-clos\". This mistake is confined to the WHERE clause of the diagnoses subquery and does not require reworking the overall query structure or join logic. It is a column/value reference error localized to that condition.",
            "error_type": "Local: column reference/value parsing error"
        },
        "a81dae5ff42498734e857c5b7dc46deb": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many female patients underwent the procedure of abdomen artery incision? (Values: F, Abdomen artery incision)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND PROCEDURES.\"SHORT_TITLE\" = \"Abdomen artery incision\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.SUBJECT_ID) AS Female_Patient_Count FROM DEMOGRAPHIC AS T1 INNER JOIN PROCEDURES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID WHERE T1.GENDER = 'F' AND T2.ICD9_CODE = '3523'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only uses the wrong filter (ICD9_CODE = '3523' instead of SHORT_TITLE = 'Abdomen artery incision') but also changes the join key from HADM_ID to SUBJECT_ID, altering the fundamental relationship between the tables. These issues span multiple clauses (JOIN and WHERE) and change the overall logic and intended meaning of the query, so this is a global error.",
            "error_type": "Global: incorrect join and wrong filter column"
        },
        "975158013a0614796649501c4458a198": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the preferred language and primary disease of the patient with patient id 3343?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"3343\"",
            "pred_sql": "SELECT T1.`LANGUAGE`, T2.`LONG_TITLE` AS `PRIMARY_DISEASE` FROM DEMOGRAPHIC AS T1 JOIN DIAGNOSES AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`SUBJECT_ID` = 3343 AND T1.`LANGUAGE` IS NOT NULL AND T2.`LONG_TITLE` IS NOT NULL GROUP BY T1.`LANGUAGE`, T2.`LONG_TITLE` LIMIT 1",
            "gold_answer": "[['None', 'NEWBORN']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The ground truth query retrieves the DIAGNOSIS directly from the DEMOGRAPHIC table, but the predicted query instead joins DEMOGRAPHIC to the DIAGNOSES table and pulls LONG_TITLE. This fundamentally alters the query\u2019s structure and logic: it introduces an unnecessary join on HADM_ID/ SUBJECT_ID, applies extra NOT NULL filters, grouping and a LIMIT, and expects data in DIAGNOSES that doesn\u2019t exist for subject 3343. Correcting this requires removing the entire JOIN and selecting the DIAGNOSIS column from DEMOGRAPHIC directly\u2014this is not a small, isolated fix but a restructuring of the query logic.",
            "error_type": "Global: incorrect join"
        },
        "9cfe1f468ae3569a8653122b46b2dce5": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "tell me the ethnic background and marital status of patient with patient id 21796.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"ETHNICITY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT `ETHNICITY`, `MARITAL_STATUS` FROM DEMOGRAPHIC WHERE `SUBJECT_ID` = 21796 AND `ETHNICITY` IS NOT NULL AND `MARITAL_STATUS` IS NOT NULL",
            "gold_answer": "[['None', 'WHITE']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds extra IS NOT NULL conditions in the WHERE clause, filtering out the desired row because MARITAL_STATUS is NULL. The overall query structure and target tables/columns are correct; only the WHERE clause is overly restrictive. This is a localized mistake in the WHERE filters, not a global logical or structural error.",
            "error_type": "Local: where-clause filter error"
        },
        "89473cb3b3eaa62d3a19fba9185e42e7": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the number of emergency room admitted patients who have diagnosis icd9 code e8798? (Values: EMERGENCY, E8798)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DIAGNOSES.\"ICD9_CODE\" = \"E8798\"",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT D1.`SUBJECT_ID`, D1.`HADM_ID` FROM DIAGNOSES AS D1 INNER JOIN DEMOGRAPHIC AS D2 ON D1.`SUBJECT_ID` = D2.`SUBJECT_ID` AND D1.`HADM_ID` = D2.`HADM_ID` WHERE D1.`ICD9_CODE` = 'E8798' AND D2.`ADMISSION_LOCATION` = 'EMERGENCY ROOM ADMIT' ) AS SubQuery",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DEMOGRAPHIC.ADMISSION_LOCATION = 'EMERGENCY ROOM ADMIT' instead of DEMOGRAPHIC.ADMISSION_TYPE = 'EMERGENCY'. This is a localized mistake in the WHERE clause referencing the wrong column and value. The overall query structure (join, counting distinct patients) is correct; only the column and literal in the filter are wrong.",
            "error_type": "Local: column reference error"
        },
        "8757aa09f5f4b407395c6608aecb69e0": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "find the number of patients with bowel obstruction primary disease who have been admitted to hospital for more than 10 days. (Values: BOWEL OBSTRUCTION)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BOWEL OBSTRUCTION\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"10\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `HADM_ID` FROM DIAGNOSES WHERE `LONG_TITLE` LIKE '%BOWEL OBSTRUCTION%' OR `SHORT_TITLE` LIKE '%BOWEL OBSTRUCTION%' ) AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`DAYS_STAY` > 10",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining to the DIAGNOSES table and using substring matches on LONG_TITLE/SHORT_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column for the primary disease. This alters the intended meaning (primary diagnosis vs. any diagnosis) and requires restructuring the query beyond a simple local fix.",
            "error_type": "Global: join error"
        },
        "a4411125b5abe68f2d0eb36a12677a73": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "How many male patients had icd9 code 8842? (Values: M)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND PROCEDURES.\"ICD9_CODE\" = \"8842\"",
            "pred_sql": "SELECT COUNT(DISTINCT T2.`SUBJECT_ID`) AS male_patient_count FROM DIAGNOSES AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`ICD9_CODE` = '8842' AND T2.`GENDER` = 'M'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC with the DIAGNOSES table rather than the PROCEDURES table, which changes the entire source of ICD9_CODE data and thus the result. This is not a minor syntax mistake in a clause but a fundamental error in how tables are related, altering the intended logic of the query.",
            "error_type": "Global: join error"
        },
        "0505b2e7a406ecc63693aa9f33c0af26": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the average days of hospital stay of patients whose primary disease is s/p fall? (Values: S/P FALL)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"S/P FALL\"",
            "pred_sql": "SELECT AVG(T2.`DAYS_STAY`) AS Average_Days_Stay FROM DIAGNOSES AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE (T1.`LONG_TITLE` LIKE '%S/P FALL%' OR T1.`SHORT_TITLE` LIKE '%S/P FALL%') AND T2.`DAYS_STAY` IS NOT NULL AND T1.`LONG_TITLE` IS NOT NULL AND T1.`SHORT_TITLE` IS NOT NULL AND T1.`HADM_ID` IS NOT NULL AND T2.`HADM_ID` IS NOT NULL;",
            "gold_answer": "[['12.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and a JOIN to filter for \u201cS/P FALL,\u201d but the question asks for the primary disease stored in DEMOGRAPHIC.DIAGNOSIS. By joining to DIAGNOSES and filtering LONG_TITLE/SHORT_TITLE, the query\u2019s fundamental logic is changed and yields no results. Fixing this requires removing the DIAGNOSES table and instead filtering directly on DEMOGRAPHIC.DIAGNOSIS, which is a structural change affecting the whole query rather than a small localized correction.",
            "error_type": "Global: join error"
        },
        "88309c7f731812aac8c0784535361b4e": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "count the number of patients who died of pituatary bleed. (Values: 1, PITUITARY BLEED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"PITUITARY BLEED\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `HADM_ID` FROM DIAGNOSES WHERE `LONG_TITLE` LIKE '%pituitary bleed%' OR `SHORT_TITLE` LIKE '%pituitary bleed%' ) AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`EXPIRE_FLAG` = 1",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by joining the DIAGNOSES table and searching in LONG_TITLE/SHORT_TITLE instead of directly filtering the DEMOGRAPHIC.DIAGNOSIS field as specified. This is not a minor syntax or naming mistake but a structural alteration that affects which records are counted (it uses a different table and join strategy). Thus it is a Global error.",
            "error_type": "Global: incorrect join/table logic"
        },
        "89a92f1bd2e74cecb90195b6d5cd860f": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the admission type and lab test name for the patient id 2560?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",LAB.\"LABEL\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT DISTINCT `LABEL` FROM LAB WHERE `SUBJECT_ID` = 2560",
            "gold_answer": "[['NEWBORN', 'Atypical Lymphocytes'], ['NEWBORN', 'Bands'], ['NEWBORN', 'Basophils'], ['NEWBORN', 'Bilirubin, Direct'], ['NEWBORN', 'Bilirubin, Indirect'], ['NEWBORN', 'Bilirubin, Total'], ['NEWBORN', 'Eosinophils'], ['NEWBORN', 'Hematocrit'], ['NEWBORN', 'Hemoglobin'], ['NEWBORN', 'Lymphocytes'], ['NEWBORN', 'MCH'], ['NEWBORN', 'MCHC'], ['NEWBORN', 'MCV'], ['NEWBORN', 'Metamyelocytes'], ['NEWBORN', 'Monocytes'], ['NEWBORN', 'Myelocytes'], ['NEWBORN', 'Neutrophils'], ['NEWBORN', 'Nucleated Red Cells'], ['NEWBORN', 'Platelet Count'], ['NEWBORN', 'RDW'], ['NEWBORN', 'Red Blood Cells'], ['NEWBORN', 'White Blood Cells']]",
            "pred_answer": "[['Atypical Lymphocytes'], ['Bands'], ['Basophils'], ['Bilirubin, Direct'], ['Bilirubin, Indirect'], ['Bilirubin, Total'], ['Eosinophils'], ['Hematocrit'], ['Hemoglobin'], ['Lymphocytes'], ['MCH'], ['MCHC'], ['MCV'], ['Metamyelocytes'], ['Monocytes'], ['Myelocytes'], ['Neutrophils'], ['Nucleated Red Cells'], ['Platelet Count'], ['RDW'], ['Red Blood Cells'], ['White Blood Cells']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the DEMOGRAPHIC table entirely, fails to join DEMOGRAPHIC and LAB on HADM_ID, and does not select the admission type. Instead, it only retrieves distinct lab labels. This error affects the overall query logic and structure, as it removes a required table and key join and changes the intended output columns, so it cannot be fixed locally within a clause.",
            "error_type": "Global: missing join and projection"
        },
        "0c6330427069e970ee4be464f3c99c48": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the days of hospital stay and date of death of subject id 2560?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",DEMOGRAPHIC.\"DOD\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT `DAYS_STAY`, `DOD` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 2560 AND `DOD` IS NOT NULL",
            "gold_answer": "[['4.0', 'None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth by adding the extra filter AND `DOD` IS NOT NULL in the WHERE clause. This is a localized mistake in the WHERE condition that can be fixed by simply removing that extra predicate\u2014no overall query structure or joins need changing. Hence it\u2019s a local error in the WHERE clause.",
            "error_type": "Local: condition error"
        },
        "0253fe7f06bcba29a5a6dc7342278218": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the number of patients with acute vascular insufficiency of intestine who are less than 50 years of age? (Values: Acute vascular insufficiency of intestine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"50\" AND DIAGNOSES.\"LONG_TITLE\" = \"Acute vascular insufficiency of intestine\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `ICD9_CODE` = '<ICD9_CODE_FROM_SUB_QUESTION_1>' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` WHERE T1.`AGE` < 50",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the filtering logic by joining on a derived table filtered by ICD9_CODE (and even using a placeholder) instead of filtering DIAGNOSES by LONG_TITLE = 'Acute vascular insufficiency of intestine'. This is not a simple typo or localized mistake; it alters the overall logic and intended meaning of the query, requiring restructuring of the join and WHERE clause to correct. Therefore, this is a global error in the query structure and logic.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "0138ed4f1a039573afe3d860e12c2de9": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "for how long the patient bruce harris remained admitted in hospital? (Values: Bruce Harris)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Bruce Harris\"",
            "pred_sql": "SELECT `DISCHTIME`, `ADMITTIME`, (JULIANDAY(`DISCHTIME`) - JULIANDAY(`ADMITTIME`)) AS `DAYS_STAY` FROM DEMOGRAPHIC WHERE `NAME` = 'Bruce Harris'",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[['2173-09-17 15:40:00', '2173-08-25 12:58:00', '23.113']]",
            "is_correct": false,
            "reasoning": "The predicted query errors are confined to the SELECT clause: it pulls DISCHTIME and ADMITTIME and computes a Julian\u2010day difference instead of simply returning the stored DAYS_STAY value. No JOINs or multiple clause logic is broken\u2014the mistake is selecting and computing the wrong columns. This is a localized error in the SELECT list and does not require restructuring the overall query logic.",
            "error_type": "Local: wrong-column-selection error"
        },
        "0e6e157150b6804faa15e113bdd34edd": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "calculate the maximum days of hospitalization for patients with s/p hanging primary disease. (Values: S/P HANGING)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"S/P HANGING\"",
            "pred_sql": "SELECT MAX(DAYS_STAY) AS MaxDaysStay FROM DEMOGRAPHIC WHERE HADM_ID IN ( SELECT DISTINCT HADM_ID FROM DIAGNOSES WHERE SHORT_TITLE = 'S/P HANGING' AND HADM_ID IS NOT NULL ) AND DAYS_STAY IS NOT NULL AND HADM_ID IS NOT NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL moves the filter from the DEMOGRAPHIC.DIAGNOSIS column to a subquery over the DIAGNOSES table, which fundamentally changes the query\u2019s logic. Instead of directly filtering on the primary disease stored in DEMOGRAPHIC.DIAGNOSIS, it performs an unnecessary join via HADM_ID and looks at DIAGNOSES.SHORT_TITLE. This is not a simple typo or localized mistake but a flawed restructuring of the query logic, requiring significant changes to restore the intended behavior.",
            "error_type": "Global: incorrect join logic"
        },
        "90ea3f943491a9110aa016d6d613cba8": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "let me know the gender and lab test abnormal status of patient with patient id 2560.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"GENDER\",LAB.\"FLAG\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT T1.`GENDER`, T2.`FLAG` FROM `DEMOGRAPHIC` AS T1 LEFT JOIN `LAB` AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` WHERE T1.`SUBJECT_ID` = 2560 AND T2.`FLAG` IS NOT NULL",
            "gold_answer": "[['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'None'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal']]",
            "pred_answer": "[['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal'], ['M', 'abnormal']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a LEFT JOIN on SUBJECT_ID rather than the correct HADM_ID join key, then filters out NULL FLAG values, which fundamentally alters the result set by excluding the expected 'None' statuses and mismatching admissions. This is not a simple typo or minor clause fix but a flawed join strategy and filter logic that changes the query\u2019s intended meaning.",
            "error_type": "Global: incorrect join key"
        },
        "87e3e952f4c6888eca35ce87b6a82e7d": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is average age of patients whose primary disease is bowel obstruction and who are aged equal or more than 71 years? (Values: BOWEL OBSTRUCTION)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BOWEL OBSTRUCTION\" AND DEMOGRAPHIC.\"AGE\" >= \"71\"",
            "pred_sql": "SELECT AVG(T2.`AGE`) AS average_age FROM DEMOGRAPHIC AS T2 INNER JOIN ( SELECT DISTINCT T1.`SUBJECT_ID`, T1.`HADM_ID` FROM DIAGNOSES AS T1 WHERE T1.`LONG_TITLE` LIKE '%BOWEL OBSTRUCTION%' ) AS T3 ON T2.`SUBJECT_ID` = T3.`SUBJECT_ID` AND T2.`HADM_ID` = T3.`HADM_ID` WHERE T2.`AGE` >= 71 AND T2.`AGE` IS NOT NULL",
            "gold_answer": "[['77.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DEMOGRAPHIC table to the DIAGNOSES table and filters on DIAGNOSES.LONG_TITLE for 'bowel obstruction', but the question asks for patients whose primary disease (stored in DEMOGRAPHIC.DIAGNOSIS) is 'BOWEL OBSTRUCTION'. Using the DIAGNOSES table and a LIKE filter fundamentally changes the logic and data source of the primary disease condition. This is not a simple column reference fix; it requires restructuring the query to remove the join and apply the filter directly on DEMOGRAPHIC.DIAGNOSIS.",
            "error_type": "Global: incorrect-join"
        },
        "9ce3e6d4028ec80eb96214edc77b6496": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "How many patients had a lab test for sodium urine and were treated with a base drug? (Values: MAIN, Sodium, Urine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\" AND LAB.\"LABEL\" = \"Sodium, Urine\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM LAB WHERE `LABEL` = 'Sodium' AND `FLUID` = 'Urine' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM PRESCRIPTIONS WHERE `DRUG_TYPE` = 'BASE' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally similar to the ground truth (it performs two subqueries and then joins on SUBJECT_ID), but it gets the filtering criteria wrong in the WHERE clauses.  Specifically, it uses DRUG_TYPE = 'BASE' instead of 'MAIN' and splits the LAB label into LABEL = 'Sodium' and FLUID = 'Urine' rather than using the single label 'Sodium, Urine'.  These are errors localized to the WHERE conditions (wrong literal values) and do not require changing the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "86e3e3a08a930e116dd8720746f1458a": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "provide me the primary disease and diagnosis short title of the patient with patient id 9575.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"9575\"",
            "pred_sql": "SELECT `LONG_TITLE`, `SHORT_TITLE` FROM DIAGNOSES WHERE `SUBJECT_ID` = 9575",
            "gold_answer": "[['CONGESTIVE HEART FAILURE', 'Abn involun movement NEC'], ['CONGESTIVE HEART FAILURE', 'Ac on chr diast hrt fail'], ['CONGESTIVE HEART FAILURE', 'Acute kidney failure NOS'], ['CONGESTIVE HEART FAILURE', 'Adv eff benzodiaz tranq'], ['CONGESTIVE HEART FAILURE', 'Anemia NOS'], ['CONGESTIVE HEART FAILURE', 'Atriovent block complete'], ['CONGESTIVE HEART FAILURE', 'CHF NOS'], ['CONGESTIVE HEART FAILURE', 'Cardiac arrest'], ['CONGESTIVE HEART FAILURE', 'Cardiogenic shock'], ['CONGESTIVE HEART FAILURE', 'Chr airway obstruct NEC'], ['CONGESTIVE HEART FAILURE', 'Chr pulmon heart dis NEC'], ['CONGESTIVE HEART FAILURE', 'Constipation NOS'], ['CONGESTIVE HEART FAILURE', 'Depend-supplement oxygen'], ['CONGESTIVE HEART FAILURE', 'Depress psychosis-unspec'], ['CONGESTIVE HEART FAILURE', 'Do not resusctate status'], ['CONGESTIVE HEART FAILURE', 'Encountr palliative care'], ['CONGESTIVE HEART FAILURE', 'Esophageal reflux'], ['CONGESTIVE HEART FAILURE', 'Food/vomit pneumonitis'], ['CONGESTIVE HEART FAILURE', 'Full incontinence-feces'], ['CONGESTIVE HEART FAILURE', 'Hx of breast malignancy'], ['CONGESTIVE HEART FAILURE', 'Hx traumatic fracture'], ['CONGESTIVE HEART FAILURE', 'Hyp ht dis NOS w ht fail'], ['CONGESTIVE HEART FAILURE', 'Hyperosmolality'], ['CONGESTIVE HEART FAILURE', 'Leukocytosis NOS'], ['CONGESTIVE HEART FAILURE', 'Mitral valve disorder'], ['CONGESTIVE HEART FAILURE', 'Osteoporosis NOS'], ['CONGESTIVE HEART FAILURE', 'Other alter consciousnes'], ['CONGESTIVE HEART FAILURE', 'Panic dis w/o agorphobia'], ['CONGESTIVE HEART FAILURE', 'Prim cardiomyopathy NEC'], ['CONGESTIVE HEART FAILURE', 'Schizoaffective dis NOS'], ['CONGESTIVE HEART FAILURE', 'Suicidal ideation'], ['CONGESTIVE HEART FAILURE', 'Thrombocytopenia NOS'], ['CONGESTIVE HEART FAILURE', 'Wheelchair dependence']]",
            "pred_answer": "[['Abnormal involuntary movements', 'Abn involun movement NEC'], ['Acute kidney failure, unspecified', 'Acute kidney failure NOS'], ['Acute on chronic diastolic heart failure', 'Ac on chr diast hrt fail'], ['Anemia, unspecified', 'Anemia NOS'], ['Atrioventricular block, complete', 'Atriovent block complete'], ['Benzodiazepine-based tranquilizers causing adverse effects in therapeutic use', 'Adv eff benzodiaz tranq'], ['Cardiac arrest', 'Cardiac arrest'], ['Cardiogenic shock', 'Cardiogenic shock'], ['Chronic airway obstruction, not elsewhere classified', 'Chr airway obstruct NEC'], ['Congestive heart failure, unspecified', 'CHF NOS'], ['Constipation, unspecified', 'Constipation NOS'], ['Do not resuscitate status', 'Do not resusctate status'], ['Encounter for palliative care', 'Encountr palliative care'], ['Esophageal reflux', 'Esophageal reflux'], ['Full incontinence of feces', 'Full incontinence-feces'], ['Hyperosmolality and/or hypernatremia', 'Hyperosmolality'], ['Leukocytosis, unspecified', 'Leukocytosis NOS'], ['Major depressive affective disorder, single episode, unspecified', 'Depress psychosis-unspec'], ['Mitral valve disorders', 'Mitral valve disorder'], ['Osteoporosis, unspecified', 'Osteoporosis NOS'], ['Other alteration of consciousness', 'Other alter consciousnes'], ['Other chronic pulmonary heart diseases', 'Chr pulmon heart dis NEC'], ['Other dependence on machines, supplemental oxygen', 'Depend-supplement oxygen'], ['Other primary cardiomyopathies', 'Prim cardiomyopathy NEC'], ['Panic disorder without agoraphobia', 'Panic dis w/o agorphobia'], ['Personal history of malignant neoplasm of breast', 'Hx of breast malignancy'], ['Personal history of traumatic fracture', 'Hx traumatic fracture'], ['Pneumonitis due to inhalation of food or vomitus', 'Food/vomit pneumonitis'], ['Schizoaffective disorder, unspecified', 'Schizoaffective dis NOS'], ['Suicidal ideation', 'Suicidal ideation'], ['Thrombocytopenia, unspecified', 'Thrombocytopenia NOS'], ['Unspecified hypertensive heart disease with heart failure', 'Hyp ht dis NOS w ht fail'], ['Wheelchair dependence', 'Wheelchair dependence']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the DEMOGRAPHIC table entirely and fails to retrieve the primary disease field (DEMOGRAPHIC.DIAGNOSIS). Instead, it only selects LONG_TITLE and SHORT_TITLE from DIAGNOSES, thus missing the required join on HADM_ID and the DEMOGRAPHIC.DIAGNOSIS column. This is not a simple typo or localized mistake but a fundamental structural error that changes the intended query logic and output.",
            "error_type": "Global: join error"
        },
        "9374426f7a7a5514195f42d67b10ae3a": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "which patients have lab test item id 51200?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51200\"",
            "pred_sql": "SELECT DISTINCT T1.`SUBJECT_ID`, T2.`NAME`, T2.`AGE`, T2.`GENDER` FROM LAB AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`ITEMID` = 51200",
            "gold_answer": "[['74.0']]",
            "pred_answer": "[['10317.0', 'Wayne Shelton', '54.0', 'M'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['14680.0', 'Roxanna Weaver', '0.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['15061.0', 'Lue White', '55.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['17519.0', 'Shirley Mckenney', '80.0', 'F'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17772.0', 'Jesse Ran', '0.0', 'M'], ['17787.0', 'Charley Pescatore', '0.0', 'M'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18372.0', 'Billy Brewer', '0.0', 'M'], ['18480.0', 'Brian Taylor', '48.0', 'M'], ['19187.0', 'David Trammell', '0.0', 'M'], ['19620.0', 'Dawn Cooper', '43.0', 'F'], ['2110.0', 'Chandra Schulman', '0.0', 'F'], ['21796.0', 'Erik Dickerson', '0.0', 'M'], ['22999.0', 'Orville Ford', '0.0', 'M'], ['24425.0', 'Wilbur Braatz', '0.0', 'M'], ['25167.0', 'Cynthia Robinson', '86.0', 'F'], ['25167.0', 'Cynthia Robinson', '86.0', 'F'], ['25543.0', 'Michael Witt', '0.0', 'M'], ['2560.0', 'Shawn Green', '0.0', 'M'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['29541.0', 'Estrella Carroll', '58.0', 'F'], ['29767.0', 'Warren Baldwin', '50.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['32418.0', 'Michelle Skidmore', '0.0', 'F'], ['3284.0', 'Kevin Jones', '0.0', 'M'], ['3343.0', 'Bessie Giordano', '0.0', 'F'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['42820.0', 'Evelyn Lawrence', '74.0', 'F'], ['42963.0', 'Marilyn Norvell', '51.0', 'F'], ['43220.0', 'Thomas Nazario', '82.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['53707.0', 'Stephanie Suchan', '56.0', 'F'], ['5506.0', 'Gonzalo Wasson', '83.0', 'M'], ['55094.0', 'Lawrence Wibbens', '56.0', 'M'], ['55094.0', 'Lawrence Wibbens', '56.0', 'M'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['64208.0', 'Darlene Martin', '81.0', 'F'], ['65652.0', 'Troy Friedman', '20.0', 'M'], ['65652.0', 'Troy Friedman', '20.0', 'M']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the correct tables and join conditions and applies the correct WHERE filter on ITEMID = 51200, so the overall structure and relationships are fine. Its mistake is confined to the SELECT clause: it returns raw subject_id, name, age, gender rows (with duplicates) instead of the count of distinct SUBJECT_IDs as specified by the ground truth. Fixing this is a matter of changing the SELECT clause to COUNT(DISTINCT SUBJECT_ID), without altering the joins or WHERE logic.",
            "error_type": "Local: incorrect projection/aggregation"
        },
        "0dd083c0247abcf28d92f74623eae302": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients who stayed in the hospital for more than 3 days had procedure icd9 code 5451?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"3\" AND PROCEDURES.\"ICD9_CODE\" = \"5451\"",
            "pred_sql": "SELECT COUNT(DISTINCT `SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS D INNER JOIN PROCEDURES AS P ON D.`HADM_ID` = P.`HADM_ID` WHERE D.`DAYS_STAY` > 3 AND P.`ICD9_CODE` = 5451",
            "gold_answer": "[['1.0']]",
            "pred_answer": "ambiguous column name: SUBJECT_ID",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with an 'ambiguous column name: SUBJECT_ID' error because the SELECT clause references SUBJECT_ID without qualifying it by table alias. Both DEMOGRAPHIC and PROCEDURES have a SUBJECT_ID column, so the parser cannot resolve which one is intended. This is a localized issue in the column reference and can be fixed simply by prefixing SUBJECT_ID with the correct alias (e.g. D.SUBJECT_ID). The overall query structure, joins, and logic remain correct.",
            "error_type": "Local: column reference error"
        },
        "ade3038af3cee4bf7d6dad698f9a4b7b": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients are diagnosed with opioid type dependence (unspecified) with td drug route? (Values: Opioid type dependence, unspecified, TD)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"LONG_TITLE\" = \"Opioid type dependence, unspecified\" AND PRESCRIPTIONS.\"ROUTE\" = \"TD\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `ICD9_CODE` = '30400' -- Assuming '30400' is the ICD-9 code for \"Opioid type dependence, unspecified\" ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM PRESCRIPTIONS WHERE `ROUTE` = 'TD' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['0.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL violates the assumption that the exact LONG_TITLE value (\"Opioid type dependence, unspecified\") must be used. Instead it filters on ICD9_CODE = '30400'. This is a localized mistake in the WHERE clause (wrong column/value) rather than a fundamental restructuring of the query. Therefore it is a Local error (incorrect column reference in the filter).",
            "error_type": "Local: incorrect-column-in-WHERE"
        }
    },
    "codes": {
        "829b1b9874d4849cb5c8f0fbabd0373e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of government health insurance patients who had wbc, joint fluid lab test. (Values: Government, WBC, Joint Fluid)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Government\" AND LAB.\"LABEL\" = \"WBC, Joint Fluid\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.subject_id = lab.subject_id WHERE demographic.insurance = 'Government' AND lab.itemid IN (50802, 50804)",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses lab.itemid IN (50802, 50804) instead of matching LAB.LABEL = 'WBC, Joint Fluid' as required. This is a localized mistake in the WHERE clause\u2019s predicate (wrong column and value), not a structural issue affecting joins or overall logic.",
            "error_type": "Local: incorrect predicate"
        },
        "0e8fd2d722e37743ae80d2f8b9c7a3f0": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients admitted under emergency admission type are of hispanic/latino - puertorican ethnicity? (Values: EMERGENCY, HISPANIC/LATINO - PUERTO RICAN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"HISPANIC/LATINO - PUERTO RICAN\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN ethnicity ON ethnicity.subject_id = demographic.subject_id WHERE demographic.admission_type = 'EMERGENCY' AND demographic.ethnicity = 'HISPANIC/LATINO - PUERTO RICAN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such table: ethnicity",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN with a non-existent table 'ethnicity', causing a table reference error, but the overall query structure for filtering by admission_type and ethnicity remains unchanged. This is a localized mistake in the FROM/JOIN clause rather than a flaw in the query\u2019s logic or multi-clause structure.",
            "error_type": "Local: table reference error"
        },
        "0d83c52fc4822a82a4ebf2428e18c0fa": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients born before the year 2167 were primarily diagnosed for hyperglycemia? (Values: HYPERGLYCEMIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERGLYCEMIA\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2167\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.long_title = 'Hyperglycemia' AND demographic.dob_year < 2167",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the DIAGNOSES table and filters on diagnoses.long_title rather than using the primary diagnosis stored in DEMOGRAPHIC.DIAGNOSIS. The question asks for patients primarily diagnosed with hyperglycemia, so the correct filter should be applied directly on the DEMOGRAPHIC table\u2019s DIAGNOSIS column. By joining and filtering on a different table/column, the predicted query fundamentally changes the intended logic and returns incorrect results.",
            "error_type": "Global: incorrect join logic"
        },
        "9d1a610fdeca814a481fcfd2f4cfa096": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the drug code and route of doxazosin administration (Values: Doxazosin)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Doxazosin\"",
            "pred_sql": "SELECT drug, route FROM prescriptions WHERE drug = 'Doxazosin'",
            "gold_answer": "[['DOXA1', 'PO'], ['DOXA1', 'PO']]",
            "pred_answer": "[['Doxazosin', 'PO'], ['Doxazosin', 'PO']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the SELECT clause: it returns the DRUG column instead of the FORMULARY_DRUG_CD column. This is a localized mistake in the column reference and does not affect the overall query structure or joins.",
            "error_type": "Local: column reference error"
        },
        "93071743563d2991581ad4f1a614e467": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find out the route of administration for drug with drug code nabc50s. (Values: NABC50S)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"NABC50S\"",
            "pred_sql": "SELECT route FROM prescriptions WHERE drug = 'NABC50S'",
            "gold_answer": "[['IV'], ['IV']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause: it filters on drug = 'NABC50S' instead of the FORMULARY_DRUG_CD column. This is a localized column reference error and does not require restructuring the query\u2019s overall logic.",
            "error_type": "Local: column reference error"
        },
        "a7285ffa3213ab37d2c756b87b37babf": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "which primary disease is the patient paul edwards suffering from? (Values: Paul Edwards)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Paul Edwards\"",
            "pred_sql": "SELECT diagnoses.short_title FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.name = 'Paul Edwards' AND demographic.diagnosis = 'CORONARY ARTERY DISEASE'",
            "gold_answer": "[['HYPERGLYCEMIA;HYPONATREMIA']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DEMOGRAPHIC table with the DIAGNOSES table and adds an extra filter on demographic.diagnosis = 'CORONARY ARTERY DISEASE', which changes the overall logic and structure of the query. The question asks for the primary disease stored in DEMOGRAPHIC.DIAGNOSIS for Paul Edwards, not for diagnoses.short_title or a specific hard-coded disease. This incorrect join and extra condition fundamentally alter the intended query meaning.",
            "error_type": "Global: join logic error"
        },
        "90c61fa47e6f3ee70f1d5a5262acea5b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients who underwent radical cystectomy? (Values: Radical cystectomy)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"Radical cystectomy\"",
            "pred_sql": "SELECT count(procedures.subject_id) FROM procedures INNER JOIN diagnoses ON procedures.subject_id = diagnoses.subject_id WHERE procedures.short_title = 'Radical cystectomy'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining PROCEDURES to DIAGNOSES on SUBJECT_ID rather than joining PROCEDURES to DEMOGRAPHIC (and using HADM_ID) as in the ground truth. This wrong join both changes the tables involved and leads to duplicate counts (no DISTINCT), producing an incorrect result. Fixing this requires restructuring the join strategy, not just a minor tweak in a single clause.",
            "error_type": "Global: incorrect join"
        },
        "8c6bcade1f9fdf7786a7a64151da5b73": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients uses the drug code hepa10syr? (Values: HEPA10SYR)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"HEPA10SYR\"",
            "pred_sql": "SELECT count(hadm_id) FROM prescriptions WHERE formulary_drug_cd = 'HEPA10SYR'",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply counts rows of PRESCRIPTIONS (using HADM_ID) rather than counting distinct patients (SUBJECT_ID) as required. Fixing this is not a minor syntax or reference tweak; it requires changing the aggregation and adding (or using) the correct patient identifier, and potentially joining DEMOGRAPHIC. This impacts the core logic and intended meaning of the query.",
            "error_type": "Global: incorrect aggregation and missing DISTINCT on SUBJECT_ID"
        },
        "04f70250fa8b8c71fcf64afe5f04935c": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find .the number of patients who are female and were diagnosed with transient ischemic attack? (Values: F, TRANSIENT ISCHEMIC ATTACK)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"TRANSIENT ISCHEMIC ATTACK\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.gender = 'F' AND diagnoses.short_title = 'TRANSIENT ISCHEMIC ATTACK'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the DIAGNOSES table and filters on diagnoses.short_title, whereas the correct diagnosis value resides in DEMOGRAPHIC.DIAGNOSIS. This join changes the query\u2019s logic and results, as it looks for the diagnosis in the wrong table rather than simply filtering DEMOGRAPHIC.DIAGNOSIS. Fixing this would require removing the join and correcting the column reference, which is a structural change affecting overall query logic rather than a minor clause tweak.",
            "error_type": "Global: incorrect join"
        },
        "acc06fbcc04ac4f4e727cebaf4062a9f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Tell me the type of admission and status of death of the patient with patient id 10317.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"EXPIRE_FLAG\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"10317\"",
            "pred_sql": "SELECT admission_type, discharge_location FROM demographic WHERE subject_id = 10317",
            "gold_answer": "[['EMERGENCY', '0.0']]",
            "pred_answer": "[['EMERGENCY', 'HOME']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the wrong second column (discharge_location) instead of the intended expire_flag (status of death). This is a column reference error localized within the SELECT clause and does not require restructuring the query or altering joins.",
            "error_type": "Local: column reference error"
        },
        "82fe69e357204798f6fa15a9b66761ff": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients younger than 70 years have got the cerebrospinal fluid (csf) lab test done? (Values: Cerebrospinal Fluid (CSF))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"70\" AND LAB.\"FLUID\" = \"Cerebrospinal Fluid (CSF)\"",
            "pred_sql": "SELECT count(DISTINCT lab.subject_id) FROM lab INNER JOIN demographic ON lab.subject_id = demographic.subject_id WHERE demographic.age < 70 AND lab.label = 'Cerebrospinal Fluid (CSF)'",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on lab.label instead of lab.fluid, so it never matches \u2018Cerebrospinal Fluid (CSF)\u2019 and returns zero. This is a localized mistake in the WHERE clause (wrong column reference), and does not require changing the overall query structure or logic. Hence it is a local error.",
            "error_type": "Local: column reference error"
        },
        "8620eaa43239b46cc9bde99b6286773a": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "For how many days Stephanie Sechan stayed in the hospital? (Values: Stephanie Suchan)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Stephanie Suchan\"",
            "pred_sql": "SELECT days_stay FROM demographic WHERE name = 'Stephanie Sechan'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has the correct structure and references the correct table and column, but the literal in the WHERE clause is misspelled (\u2018Stephanie Sechan\u2019 instead of the required \u2018Stephanie Suchan\u2019). This is a localized error in the value rather than a change in the overall query logic or structure.",
            "error_type": "Local: value parsing error"
        },
        "80d0e73a185289625d60921e65b0f2d8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Among the patients admitted before the year 2172, how many had contact dermatitis and other eczema with an unspecified cause? (Values: Contact dermatitis and other eczema, unspecified cause)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2172\" AND DIAGNOSES.\"LONG_TITLE\" = \"Contact dermatitis and other eczema, unspecified cause\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.admittime < '2172-01-01' AND diagnoses.short_title LIKE 'Contact dermatitis and other eczema, unspecified cause'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall structure remains the same\u2014it performs a JOIN, applies filters, and counts patients. However, it uses the wrong join key (demographic.subject_id instead of hadm_id), the wrong date column (admittime instead of admityear), and the wrong diagnosis column (short_title with LIKE instead of long_title with equality). These are localized reference errors in the JOIN and WHERE clauses that can be fixed without restructuring the entire query logic.",
            "error_type": "Local: join/column reference error"
        },
        "8a8314389b88631016bc7ce9cefe7351": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the average age of male patients with medicaid insurance? (Values: M, Medicaid)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"INSURANCE\" = \"Medicaid\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.insurance = 'Medicaid' AND demographic.gender = 'M'",
            "gold_answer": "[['20.833']]",
            "pred_answer": "[['47.884']]",
            "is_correct": false,
            "reasoning": "The predicted query introduces an INNER JOIN with the PRESCRIPTIONS table on SUBJECT_ID, even though the question only asks for demographics. This join filters out male Medicaid patients who have no prescriptions and duplicates rows for those with multiple prescriptions, skewing the average age. Fixing this requires removing the join and changing the overall query structure, which affects the query\u2019s logic across clauses. Hence, this is a global error.",
            "error_type": "Global: join error"
        },
        "aa3c4803c0aca1c991037d46ab8891a2": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "for patient sheila riley, specify the duration of hospital stay (Values: Sheila Riley)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Sheila Riley\"",
            "pred_sql": "SELECT sum(demographic.days_stay) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.name = 'Sheila Riley'",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['56.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the DIAGNOSES table (and sums DAYS_STAY), which duplicates the DAYS_STAY row for each diagnosis record and then sums them, rather than just selecting the single DAYS_STAY value. This join error changes the overall logic and requires restructuring beyond a single clause. Therefore it is a global error.",
            "error_type": "Global: join error"
        },
        "b9b1e272af0cd9a909d5b78a51ebf792": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients with drug code beth5 died in or before the year 2122? (Values: 2122.0, BETH5)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOD_YEAR\" <= \"2122.0\" AND PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"BETH5\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'BETH5' AND strftime('%Y', demographic.dod) < '2122'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL makes several mistakes confined to specific clauses rather than altering the overall query structure. 1) It uses prescriptions.drug instead of PRESCRIPTIONS.FORMULARY_DRUG_CD to filter on the drug code BETH5 (column reference error). 2) It applies strftime('%Y', demographic.dod) < '2122' instead of checking DOD_YEAR <= 2122.0 and also omits the equality, excluding deaths in 2122 (where\u2010clause error). 3) It joins on demographic.subject_id = prescriptions.subject_id instead of hadm_id, but this is still a valid join and doesn\u2019t change the overall structure. All errors are localized to SELECT/WHERE clauses and can be fixed without restructuring the query.",
            "error_type": "Local: column reference error"
        },
        "a71d8a20f3c1c0d0fc6a9372df547def": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the patients who have to be tested for ketones with diagnosis of dysphagia nos. (Values: Dysphagia NOS, Ketone)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Dysphagia NOS\" AND LAB.\"LABEL\" = \"Ketone\"",
            "pred_sql": "SELECT demographic.name FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Dysphagia NOS' AND demographic.name IS NOT NULL AND demographic.name != '' AND (demographic.diagnosis LIKE '%ketone%' OR demographic.diagnosis LIKE '%Ketone%' OR demographic.diagnosis LIKE '%Ketones%' OR demographic.diagnosis LIKE '%Ketones%')",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the LAB table entirely and instead searches for 'ketone' in the DEMOGRAPHIC.DIAGNOSIS column. It also returns patient names rather than counting distinct SUBJECT_IDs as required. This is not a minor syntax mistake or column name error but a fundamental mis-structuring of the query: the necessary INNER JOIN to LAB and the appropriate filter on LAB.LABEL are missing, and the demographic.diagnosis text search is not what the question asks for. Correcting this would require significant changes to the query\u2019s structure and logic.",
            "error_type": "Global: join error"
        },
        "0d4c35f952b8d4ff9c7b0b736d1556f1": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the total number of patients who had abdomen artery incision. (Values: Abdomen artery incision)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"Abdomen artery incision\"",
            "pred_sql": "SELECT count(DISTINCT procedures.subject_id) FROM procedures INNER JOIN diagnoses ON procedures.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Abdomen artery incision'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by using the DIAGNOSES table instead of the PROCEDURES table for both the join and the filter. Rather than counting patients with the specified procedure, it joins PROCEDURES to DIAGNOSES and filters on DIAGNOSES.SHORT_TITLE. This is not a minor syntax or reference issue but a core join/table-selection mistake that alters the query\u2019s intended meaning. Therefore, it is a global error.",
            "error_type": "Global: join error"
        },
        "8cb9d9cd9777cb7de0f280d81e661f92": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the average days of hospital stay of patients who died before year 2154? (Values: 2154.0)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOD_YEAR\" < \"2154.0\"",
            "pred_sql": "SELECT avg(demographic.days_stay) FROM demographic INNER JOIN procedures ON procedures.subject_id = demographic.subject_id WHERE strftime('%Y', demographic.dischtime) < '2154' AND demographic.expire_flag = 1",
            "gold_answer": "[['10.727']]",
            "pred_answer": "[['16.269']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic and structure of the intended query. Instead of simply filtering by the DOD_YEAR column in the DEMOGRAPHIC table, it introduces an unnecessary JOIN to the PROCEDURES table, filters on the DISCHTIME date rather than the date of death, and adds an expire_flag condition that wasn\u2019t in the original requirement. These changes span multiple clauses and alter the intended meaning of the query, so the error is not localized to a single clause but affects the overall query logic.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "a6efda66101f5ee356834b8d407f433a": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find out the number of patients with lab test item id 51245.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51245\"",
            "pred_sql": "SELECT count(hadm_id) FROM lab WHERE itemid = 51245",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only counts the number of lab records (count(hadm_id)) rather than the number of distinct patients (distinct SUBJECT_ID). This mistake is confined to the SELECT clause and aggregation choice, without affecting the overall query structure or table relationships.",
            "error_type": "Local: wrong-aggregation"
        },
        "83e3314b9aa82aaf732ff1c45a51d195": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Is the subject id 74032 married, find his religion ?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"RELIGION\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"74032\"",
            "pred_sql": "SELECT religion FROM demographic WHERE subject_id = 74032",
            "gold_answer": "[['MARRIED', 'CATHOLIC']]",
            "pred_answer": "[['CATHOLIC']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects the RELIGION column and omits MARITAL_STATUS, thus failing to retrieve whether the subject is married. This is a localized error in the SELECT clause (missing column), not a structural or multi-clause logic error, so it\u2019s a Local error.",
            "error_type": "Local: missing-select-column"
        },
        "86960807966ab11fb4a31cae186af57a": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of emergency hospital admission patients who had procedure icd9 code 331. (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PROCEDURES.\"ICD9_CODE\" = \"331\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM procedures INNER JOIN demographic ON procedures.hadm_id = demographic.hadm_id WHERE demographic.admission_type = 'EMERGENCY' AND procedures.icd9_code = 331",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the SELECT clause\u2019s aggregation: it uses COUNT(demographic.subject_id) instead of COUNT(DISTINCT demographic.subject_id). This is a localized mistake in the aggregate function (missing DISTINCT) and does not alter the overall query structure or joins.",
            "error_type": "Local: aggregation error"
        },
        "0782eff13b2a6fe1c1f0fb94270bb9c4": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of patients who were admitted before 2158 and had thyroxine (t4) lab test. (Values: Thyroxine (T4))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2158\" AND LAB.\"LABEL\" = \"Thyroxine (T4)\"",
            "pred_sql": "SELECT count(lab.subject_id) FROM lab INNER JOIN demographic ON lab.hadm_id = demographic.hadm_id WHERE demographic.admittime < '2158' AND lab.itemid = 50982",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses wrong columns in the WHERE clause: it compares demographic.admittime (a datetime) to '2158' instead of admityear, and filters on lab.itemid rather than lab.label = 'Thyroxine (T4)'. These are localized mistakes in column references/conditions and can be corrected without restructuring the query logic.",
            "error_type": "Local: column reference error"
        },
        "9e2bb2a0d86e0e587fb393ad10311fbd": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of emergency hospital admission patients who had manual reticulocyte count lab test? (Values: EMERGENCY, Reticulocyte Count, Manual)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND LAB.\"LABEL\" = \"Reticulocyte Count, Manual\"",
            "pred_sql": "SELECT count(DISTINCT lab.hadm_id) FROM lab INNER JOIN procedures ON lab.hadm_id = procedures.hadm_id WHERE procedures.icd9_code = '99591' AND lab.label = 'Reticulocyte count (IQR)'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the tables and filters used: it joins LAB to PROCEDURES instead of DEMOGRAPHIC to LAB, omits the admission_type = 'EMERGENCY' filter, uses an unrelated ICD9 code filter on PROCEDURES, and mismatches the lab label. These errors span multiple clauses and alter the query\u2019s overall logic and intended relationships, not just a single localized mistake.",
            "error_type": "Global: incorrect join and filter"
        },
        "85458bc20e32da11c87969332a42276f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many male patients had the drug code lipid250i? (Values: M, LIPID250I)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"LIPID250I\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.gender = 'M' AND prescriptions.drug_type = 'LIPID250I'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake is confined to the WHERE clause (it filters on prescriptions.drug_type instead of the correct prescriptions.formulary_drug_cd) and also uses the wrong join key (subject_id vs. hadm_id). Both are isolated to specific parts of the query (column reference and join condition) and don\u2019t require restructuring the overall logic\u2014this is a localized error in column usage. Therefore it\u2019s a Local error.",
            "error_type": "Local: column reference error"
        },
        "9e98c86301154d7baf44b1654b51c1f0": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients with replaced route of drug administration? (Values: REPLACE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"ROUTE\" = \"REPLACE\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE route = 'REPLACE'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts all prescription rows with route = 'REPLACE' rather than counting distinct patients. It uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id), leading to an inflated result. This is a localized mistake in the aggregation clause and does not require restructuring joins or the overall query logic.",
            "error_type": "Local: aggregation error"
        },
        "adae5e26acd2cff0c947689a1e07525e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Specify the average age of unmarried patients admitted in or after the year 2156 (Values: SINGLE)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"ADMITYEAR\" >= \"2156\"",
            "pred_sql": "SELECT avg(age) FROM demographic WHERE marital_status = 'SINGLE' AND admittime LIKE '2156%'",
            "gold_answer": "[['54.538']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column and filter in the WHERE clause: it filters on admittime with a LIKE pattern instead of comparing ADMITYEAR >= 2156. This mistake is localized to the condition and does not affect the overall structure or logic beyond that filter. Hence, it is a local error in the WHERE clause.",
            "error_type": "Local: WHERE clause filter error"
        },
        "0f06b77b423037f77d9cf551771a846d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the maximum age of medicaid patients who belong to black/cape verdean ethnic background. (Values: Medicaid, BLACK/CAPE VERDEAN)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicaid\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/CAPE VERDEAN\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE procedures.icd9_code = 99591 WHERE demographic.insurance = 'Medicaid' AND demographic.ethnicity = 'BLACK/CAPE VERDEAN'",
            "gold_answer": "[['None']]",
            "pred_answer": "near \"WHERE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the PROCEDURES table and even repeats the WHERE keyword, causing a syntax error. Joining an unrelated table fundamentally changes the structure and intended logic of the query (we only need the DEMOGRAPHIC table) and cannot be fixed by a simple local edit. This is a global schema/logic error.",
            "error_type": "Global: incorrect join logic"
        },
        "91e4268fef7e4c6940983b195523dd3d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Calculate the average age of unmarried patients born after the year 2097 (Values: SINGLE)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"DOB_YEAR\" > \"2097\"",
            "pred_sql": "SELECT avg(age) FROM demographic WHERE dob > '2097-01-01' AND marital_status = 'SINGLE'",
            "gold_answer": "[['47.6']]",
            "pred_answer": "[['46.0']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly filters on the full DOB column with a date literal ('2097-01-01') instead of using the DOB_YEAR column with a year comparison (> 2097). This mistake is confined to a specific condition and does not require restructuring joins or the overall query logic, so it is a local error.",
            "error_type": "Local: column reference/value parsing error"
        },
        "9ce7d4ba8e33f5a9bb87ec0199c5fcd2": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients had procedure creation of conduit between left ventricle and aorta? (Values: Creation of conduit between left ventricle and aorta)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"LONG_TITLE\" = \"Creation of conduit between left ventricle and aorta\"",
            "pred_sql": "SELECT count(DISTINCT procedures.subject_id) FROM procedures INNER JOIN diagnoses ON procedures.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Creation of conduit between left ventricle and aorta'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining PROCEDURES to DIAGNOSES and filtering on DIAGNOSES.SHORT_TITLE rather than filtering PROCEDURES.LONG_TITLE. This is not a minor typo or localized mistake; it changes the tables and relationships used and thus the overall meaning of the query. Therefore it is a global error.",
            "error_type": "Global: incorrect table join/condition"
        },
        "0d8668ce9bb2d5b309822a5b89240277": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find out the birth date and time of admission for patient john gartman. (Values: John Gartman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DOB\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"John Gartman\"",
            "pred_sql": "SELECT demographic.dob, demographic.admittime FROM demographic INNER JOIN demographic ON demographic.subject_id = demographic.subject_id WHERE demographic.name = 'John Gartman'",
            "gold_answer": "[['2083-08-25 00:00:00', '2145-05-05 18:57:00']]",
            "pred_answer": "ambiguous column name: demographic.dob",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self\u2010JOIN on the same table without aliases, causing ambiguous column references (demographic.dob, demographic.admittime). This is a join/local reference issue that does not change the overall query logic\u2014it\u2019s confined to the JOIN clause and column references. Hence it\u2019s a localized error, not a structural rework of the query.",
            "error_type": "Local: ambiguous column reference"
        },
        "93f1c2203790abf5e19f70304d9fa6cb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "among patients treated with amitriptyline, calculate the number of female patients. (Values: F, Amitriptyline)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND PRESCRIPTIONS.\"DRUG\" = \"Amitriptyline\"",
            "pred_sql": "SELECT count(demographic.gender) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'Amitriptyline' AND demographic.gender = 'F'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(demographic.gender) without DISTINCT and joins PRESCRIPTIONS to DEMOGRAPHIC only on SUBJECT_ID rather than both SUBJECT_ID and HADM_ID. This incorrect join key and lack of de-duplication fundamentally changes the logic (it counts prescription rows, not unique patients), leading to an inflated count. Fixing it requires more than a minor syntax tweak\u2014it requires correcting the join relationship and adding DISTINCT, altering the core query structure.",
            "error_type": "Global: incorrect join strategy"
        },
        "00904139a2ed10eebae557a135321017": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients were transferred within this facilty? (Values: TRSF WITHIN THIS FACILITY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRSF WITHIN THIS FACILITY\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE admission_location = 'TRANSFER FROM HOSP/EXTRAM'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['10.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong literal in the WHERE clause (\u2018TRANSFER FROM HOSP/EXTRAM\u2019 instead of the required \u2018TRSF WITHIN THIS FACILITY\u2019). This is a localized mistake in the filter value and does not affect the query\u2019s overall structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "8385c7828a91dfb287c01c8c00c801d6": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients were admitted before the year 2103 and underwent procedure under icd9 code 93?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2103\" AND PROCEDURES.\"ICD9_CODE\" = \"93\"",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM procedures WHERE icd9_code = 93 AND strftime('%Y', admittedtime) < '2103'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: admittedtime",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references a non-existent column 'admittedtime'. This is a simple column reference error (misspelling or wrong column name) and can be fixed by using the correct column name (e.g., ADMITTIME or ADMITYEAR) without altering the overall query structure or joins.",
            "error_type": "Local: table/column reference error"
        },
        "8e4f2450d232947ef620cb4b261579f2": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give the lab test results of patient id 2560.",
            "gold_sql": "SELECT LAB.\"FLAG\" FROM LAB WHERE LAB.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT label, value_unit FROM lab WHERE subject_id = 2560",
            "gold_answer": "[['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['abnormal'], ['abnormal'], ['abnormal'], ['abnormal'], ['abnormal'], ['abnormal']]",
            "pred_answer": "[['Atypical Lymphocytes', '0%'], ['Bands', '3%'], ['Basophils', '0%'], ['Bilirubin, Direct', '0.3mg/dL'], ['Bilirubin, Indirect', '2.3mg/dL'], ['Bilirubin, Total', '2.6mg/dL'], ['Eosinophils', '0%'], ['Hematocrit', '48.7%'], ['Hemoglobin', '15.6g/dL'], ['Lymphocytes', '19%'], ['MCH', '36.7pg'], ['MCHC', '32.0%'], ['MCV', '115fL'], ['Metamyelocytes', '0%'], ['Monocytes', '9%'], ['Myelocytes', '0%'], ['Neutrophils', '69%'], ['Nucleated Red Cells', '3%'], ['Platelet Count', '334K/uL'], ['RDW', '17.0%'], ['Red Blood Cells', '4.24m/uL'], ['White Blood Cells', '13.0K/uL']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the LABEL and VALUE_UNIT columns instead of the FLAG column, which the question and ground truth specify. This is a column reference error in the SELECT clause that does not change the overall query structure or table relationships.",
            "error_type": "Local: table/column reference error"
        },
        "95467eb799726ac08b222ad6b2fa9fc8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Specify the total number of male patients with item id 51148 (Values: M)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND LAB.\"ITEMID\" = \"51148\"",
            "pred_sql": "SELECT count(subject_id) FROM lab WHERE itemid = 51148 AND value LIKE 'M%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'value' in the LAB table (the actual column is VALUE_UNIT). This is a simple column reference error and does not affect overall query logic or structure.",
            "error_type": "Local: table/column reference error"
        },
        "96a65966374941a10a74e3436a43aadd": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Count the number of newborn patients whose lab test fluid is blood (Values: NEWBORN, Blood)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"NEWBORN\" AND LAB.\"FLUID\" = \"Blood\"",
            "pred_sql": "SELECT count(lab.hadm_id) FROM lab INNER JOIN demographic ON lab.hadm_id = demographic.hadm_id WHERE demographic.name = 'NEWBORN' AND lab.fluid = 'Blood'",
            "gold_answer": "[['22.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses demographic.name = 'NEWBORN' instead of the correct demographic.DIAGNOSIS = 'NEWBORN', and it counts lab.hadm_id rather than counting distinct subject IDs. These are local mistakes in column references and aggregation that do not alter the overall query structure or required joins.",
            "error_type": "Local: table/column reference error"
        },
        "bb33d9ea342c4d53171831ac967d5204": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find out the type of admission patient jerry deberry had. (Values: Jerry Deberry)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT admission_type FROM demographic WHERE name = 'J<NAME>'",
            "gold_answer": "[['EMERGENCY']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply uses an incorrect literal in the WHERE clause (\u2018J<NAME>\u2019 instead of \u2018Jerry Deberry\u2019), causing no rows to match. This is a localized mistake in the value parsing of the condition and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "9ff64e587978e15c158f7b6b80563161": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are diagnosed with primary disease upper gi bleed and are born before 2112? (Values: UPPER GI BLEED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"UPPER GI BLEED\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2112\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'UPPER GI BLEED' AND demographic.dob_year < 2112",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The ground truth query filters patients based on the primary disease stored in the DEMOGRAPHIC.DIAGNOSIS column. The predicted query instead joins to the DIAGNOSES table on diagnoses.short_title, thus using secondary diagnoses rather than the primary diagnosis field. This is not a simple column typo or missing alias but a fundamentally different join strategy that changes the logic of which patients are counted.",
            "error_type": "Global: incorrect join/table usage"
        },
        "a8ed9d698cc466a86e52507edcf4e5df": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "What is the number of patients born before 2120 who had urine lab tests? (Values: Urine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2120\" AND LAB.\"FLUID\" = \"Urine\"",
            "pred_sql": "SELECT count(DISTINCT lab.subject_id) FROM lab INNER JOIN demographic ON lab.subject_id = demographic.subject_id WHERE demographic.dob_year < 2120",
            "gold_answer": "[['48.0']]",
            "pred_answer": "[['67.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the condition LAB.FLUID = 'Urine' from the WHERE clause, which is a missing filter on the lab test fluid type. This is a localized issue within the WHERE clause and does not require restructuring joins or subqueries; it can be fixed by adding the missing predicate.",
            "error_type": "Local: missing WHERE predicate"
        },
        "0dc83447bac46f820dda4a5f23a89502": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients born before 2078 are discharged to short term hospital? (Values: SHORT TERM HOSPITAL)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"SHORT TERM HOSPITAL\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2078\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE dob_year < 2078",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['34.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies the dob_year filter and omits the discharge_location condition entirely. This is an error confined to the WHERE clause (missing filter) and does not require restructuring joins or subqueries.",
            "error_type": "Local: missing where condition"
        },
        "a042f037d40f6ca5ddaa6b3b1cd2d4fb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients were tested for triiodothyronine (t3)? (Values: Triiodothyronine (T3))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"LABEL\" = \"Triiodothyronine (T3)\"",
            "pred_sql": "SELECT count(subject_id) FROM lab WHERE label = 'Triiodothyronine (T3)'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts all lab records for Triiodothyronine (T3) (count(subject_id)) rather than the number of unique patients. It omits the DISTINCT qualifier on subject_id, leading to duplicate counts, but the overall query structure (one table, one WHERE clause) remains correct. This is a localized mistake in the aggregation rather than a flaw in the query\u2019s logic or joins.",
            "error_type": "Local: missing-distinct error"
        },
        "06221a83812b03cacce79b7404bb3785": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients have the drug code pant40i? (Values: PANT40I)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"PANT40I\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE formulary_drug_cd = 'PANT40I'",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['37.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all rows in the PRESCRIPTIONS table matching the drug code, without using DISTINCT, so duplicated SUBJECT_IDs across multiple prescriptions are counted multiple times. The ground truth uses COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) to count unique patients. This is a localized mistake in the SELECT/aggregate clause (missing DISTINCT) and does not require restructuring joins or overall logic.",
            "error_type": "Local: missing DISTINCT in COUNT"
        },
        "ad0c2ee55406703b73c20f92102a868d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the number of emergency hospital admission patients who have docusate sodium prescription. (Values: EMERGENCY, Docusate Sodium)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PRESCRIPTIONS.\"DRUG\" = \"Docusate Sodium\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.admission_type = 'EMERGENCY' AND prescriptions.drug = 'Docusate Sodium'",
            "gold_answer": "[['35.0']]",
            "pred_answer": "[['49.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses demographic.subject_id = prescriptions.subject_id for the JOIN instead of the correct hadm_id relationship, and it also omits DISTINCT, counting prescription rows rather than unique patients. This incorrect join key and aggregation changes the query\u2019s overall logic and produces the wrong result.",
            "error_type": "Global: incorrect join key"
        },
        "041461cfd563e6b139f3910205ec8e31": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the drug code and route for the drug propafenone hcl? (Values: Propafenone HCl)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Propafenone HCl\"",
            "pred_sql": "SELECT drug, route FROM prescriptions WHERE drug = 'Propafenone HCl'",
            "gold_answer": "[['PROP225', 'PO'], ['PROP225', 'PO']]",
            "pred_answer": "[['Propafenone HCl', 'PO'], ['Propafenone HCl', 'PO']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the wrong column for the drug code: it uses PRESCRIPTIONS.drug instead of PRESCRIPTIONS.FORMULARY_DRUG_CD. This is a localized mistake in the SELECT clause that does not require changing the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "a67acde1ffdd89fb4002c394789b52e8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many of the patients admitted to emergency had a lab test for magnesium level? (Values: EMERGENCY, Magnesium)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND LAB.\"LABEL\" = \"Magnesium\"",
            "pred_sql": "SELECT count(lab.subject_id) FROM lab INNER JOIN demographic ON lab.hadm_id = demographic.hadm_id WHERE demographic.admission_location = 'EMERGENCY' AND lab.label = 'Magnesium'",
            "gold_answer": "[['60.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses demographic.admission_location = 'EMERGENCY' instead of demographic.admission_type = 'EMERGENCY', so it\u2019s filtering on the wrong column. This is a localized mistake in the WHERE clause column reference and doesn\u2019t alter the overall structure or join logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "103c53116811183189bbac6240b1c38e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many married patients died in or before 2165? (Values: MARRIED, 2165.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2165.0\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE marital_status = 'MARRIED' AND (dod IS NULL OR strftime('%Y', dod) < '2165') AND expire_flag = 1",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the death\u2010filtering logic: it uses dod IS NULL OR strftime('%Y', dod) < '2165' plus expire_flag = 1 instead of DOD_YEAR <= 2165. This is not a minor syntax or naming mistake but a fundamental change in how deaths are identified and filtered, altering the intended result.",
            "error_type": "Global: incorrect death filter logic"
        },
        "0c9f6eae00f98d18ac9f50768c7c5a86": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many asian patients had a base drug type? (Values: ASIAN, BASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"ASIAN\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"BASE\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.subject_id = demographic.subject_id WHERE demographic.ethnicity = 'ASIAN' AND prescriptions.drug_type = 'BASE'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all prescription rows rather than unique patients. It omits the DISTINCT on SUBJECT_ID in the COUNT and thus aggregates the wrong values. The join on SUBJECT_ID instead of HADM_ID also broadens the match but the core mistake is in the aggregation clause. This is a localized error in the SELECT aggregation (missing DISTINCT) and does not require restructuring joins or overall logic.",
            "error_type": "Local: missing-distinct"
        },
        "060b7c924b230b79b3935a795cfaf37d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many of the asian patients had unspecified neutropenia? (Values: ASIAN, Neutropenia, unspecified)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"ASIAN\" AND DIAGNOSES.\"LONG_TITLE\" = \"Neutropenia, unspecified\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.hadm_id = lab.hadm_id WHERE demographic.ethnicity = 'ASIAN' AND lab.value = 'unspecified' AND lab.label = 'Neutropenia'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: lab.value",
            "is_correct": false,
            "reasoning": "The predicted query mistakenly joins the LAB table and references non-existent columns (lab.value and lab.label) instead of using the DIAGNOSES table and its LONG_TITLE column. This is a localized mistake in table/column references and does not require restructuring the overall logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "a8548cc3da3f7647e4b65fbcb65de4f3": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the gender and icd9 code for patient  id 7578",
            "gold_sql": "SELECT DEMOGRAPHIC.\"GENDER\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"7578\"",
            "pred_sql": "SELECT demographic.gender, diagnoses.icd9_code FROM demographic INNER JOIN diagnoses ON demographic.hadm_id = diagnoses.hadm_id WHERE demographic.subject_id = 7578",
            "gold_answer": "[['F', 'Vaccination NEC']]",
            "pred_answer": "[['F', 'V053'], ['F', 'V290'], ['F', 'V3000']]",
            "is_correct": false,
            "reasoning": "The predicted query joins the DEMOGRAPHIC table with DIAGNOSES and selects ICD9_CODE, whereas the ground truth joins DEMOGRAPHIC with PROCEDURES and selects SHORT_TITLE. This is not a minor syntax or column reference issue but a fundamental change to the tables and the intended join relationship, altering the overall logic of which records are retrieved. Fixing it requires replacing the entire join and target table, not just a column or condition tweak.",
            "error_type": "Global: incorrect table join error"
        },
        "871b48c28ceee54f43ba16f2a5d5dd15": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the duration of hospital stay and admission location of patient troy friedman (Values: Troy Friedman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",DEMOGRAPHIC.\"ADMISSION_LOCATION\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Troy Friedman\"",
            "pred_sql": "SELECT demographic.days_stay, demographic.admission_location FROM demographic INNER JOIN procedures ON demographic.hadm_id = procedures.hadm_id WHERE demographic.name = 'Troy Friedman'",
            "gold_answer": "[['1.0', 'CLINIC REFERRAL/PREMATURE']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN with the PROCEDURES table on hadm_id. This join changes the overall logic by filtering out the patient (who has no matching PROCEDURES row), resulting in no output. Fixing it requires removing the join, which is a structural change affecting the entire query logic rather than a simple local tweak. Therefore, this is a global error involving an incorrect join that alters the intended meaning.",
            "error_type": "Global: join error"
        },
        "9126eb002106d4c790c7ca9f1745106d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Out of total number of patients admitted before 2198, how many of them were married. (Values: MARRIED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2198\"",
            "pred_sql": "SELECT count(hadm_id) FROM demographic WHERE strftime('%Y', admittedtime) < 2198 AND marital_status = 'MARRIED'",
            "gold_answer": "[['34.0']]",
            "pred_answer": "no such column: admittedtime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'admittedtime' instead of the correct 'ADMITTIME' from the DEMOGRAPHIC table. This is a simple column reference error that can be fixed locally without altering the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "05bac85c2939f9bbec8d57102c7b6156": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the minimum days of hospital stay for newborns. (Values: NEWBORN)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"NEWBORN\"",
            "pred_sql": "SELECT min(days_stay) FROM demographic WHERE name = 'NEWBORN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause (filtering on NAME instead of ADMISSION_TYPE). The overall query structure (aggregating MIN on DAYS_STAY) is correct, and only a column reference in the WHERE clause is incorrect.",
            "error_type": "Local: column reference error"
        },
        "a862cb556c8a303b9fd05ffbc3866738": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "What is the minimum days of hospital stay for patients who are of black/haitian ethnicity? (Values: BLACK/HAITIAN)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/HAITIAN\"",
            "pred_sql": "SELECT min(demographic.days_stay) FROM demographic INNER JOIN demographic ON demographic.subject_id = demographic.subject_id WHERE demographic.ethnicity = 'BLACK/HAITIAN'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "ambiguous column name: demographic.days_stay",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self-join on the DEMOGRAPHIC table (INNER JOIN DEMOGRAPHIC ON demographic.subject_id = demographic.subject_id), which is fundamentally incorrect and changes the query structure. This causes an ambiguous column reference for demographic.days_stay because two copies of the same table are unnamed. The error stems from an incorrect join that alters the intended query logic and requires restructuring (i.e., removing the join), so it is a global error.",
            "error_type": "Global: join error"
        },
        "8318a358e23fc3e43d9c6c3cb70e75af": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide the duration of hospital stay and procedure title for subject id 71798.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",PROCEDURES.\"LONG_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"71798\"",
            "pred_sql": "SELECT demographic.days_stay, procedures.short_title FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.subject_id = 71798",
            "gold_answer": "[['2.0', 'Enteral infusion of concentrated nutritional substances']]",
            "pred_answer": "[['2.0', 'Entral infus nutrit sub']]",
            "is_correct": false,
            "reasoning": "The predicted query not only selects the wrong column (short_title instead of LONG_TITLE), but more critically joins on SUBJECT_ID rather than HADM_ID. Joining on SUBJECT_ID breaks the intended one\u2010to\u2010one admission\u2010procedure relationship and can produce incorrect data associations, fundamentally altering the query\u2019s logic. This is more than a minor syntax or column\u2010name issue\u2014it is a flawed join strategy that requires restructuring the query to correct the intended meaning.",
            "error_type": "Global: incorrect join key"
        },
        "a59c2fe7f918bb9e3f87dfd11186b2e7": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "To how many patients on urgent admission were d5w (excel bag) administered? (Values: URGENT, D5W (EXCEL BAG))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\" AND PRESCRIPTIONS.\"DRUG\" = \"D5W (EXCEL BAG)\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.admission_type = 'URGENT' AND prescriptions.drug_name = 'D5W (EXCEL BAG)'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: prescriptions.drug_name",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column prescriptions.drug_name (should be PRESCRIPTIONS.DRUG). This is a localized column reference error and does not require restructuring the query\u2019s overall logic or joins.",
            "error_type": "Local: column reference error"
        },
        "09836ee89fd684d22d1fd39b5efb7692": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give the number of patients who have stayed in the hospital for more than one day whose lab test fluid is urine. (Values: 1, Urine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"1\" AND LAB.\"FLUID\" = \"Urine\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.subject_id = lab.subject_id WHERE demographic.days_stay > 1 AND lab.fluid = 'Urine'",
            "gold_answer": "[['55.0']]",
            "pred_answer": "[['1274.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC and LAB on SUBJECT_ID instead of on HADM_ID as required. This incorrect join key links rows across different admissions improperly, drastically changing the result (1274 vs 55). Since it affects the core relationship and logic of the query, it is a global error in the join structure.",
            "error_type": "Global: incorrect join key"
        },
        "93b9e31884f67490542d6aaf4d6b8108": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many male patients have lab test item id 51146? (Values: M)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND LAB.\"ITEMID\" = \"51146\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.subject_id = lab.subject_id WHERE demographic.gender = 'M' AND lab.itemid = 51146",
            "gold_answer": "[['38.0']]",
            "pred_answer": "[['96.0']]",
            "is_correct": false,
            "reasoning": "The ground truth joins DEMOGRAPHIC to LAB on HADM_ID to correctly link each hospital admission\u2019s lab tests to the patient. The predicted SQL instead joins on SUBJECT_ID, which is a fundamentally incorrect relationship and inflates the count. This error affects the core logic of how tables are related and thus requires restructuring the join, making it a global error.",
            "error_type": "Global: join error"
        },
        "fffdd25c4b1fd2c10864a81fa85fd02e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the minimum age of white female patients . (Values: F, WHITE)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"WHITE\"",
            "pred_sql": "SELECT min(demographic.age) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE demographic.gender = 'F' AND demographic.race = 'WHITE'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: demographic.race",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references a non-existent column demographic.race instead of DEMOGRAPHIC.ETHNICITY, causing a 'no such column' error. This is a localized column reference mistake that can be fixed without altering the overall query structure or logic.",
            "error_type": "Local: column-reference error"
        },
        "97d303ec02cd44af836860b5bbec1d68": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "which patients have diagnoses icd9 code 2920?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"ICD9_CODE\" = \"2920\"",
            "pred_sql": "SELECT demographic.name FROM diagnoses INNER JOIN demographic ON diagnoses.subject_id = demographic.subject_id WHERE diagnoses.icd9_code = 2920",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['Claude Chipman']]",
            "is_correct": false,
            "reasoning": "The ground truth query returns the COUNT of DISTINCT patient IDs who have ICD9_CODE = '2920', joining on HADM_ID to correctly associate diagnoses per admission. The predicted query instead selects patient names (demographic.name) and joins on SUBJECT_ID, changing both the JOIN key and the SELECT target. This is not a simple naming or syntax mistake but a fundamental change in the query\u2019s purpose and logic (from counting unique patients to listing names) and an incorrect join strategy. Such a change affects the overall query structure and intended result, so it is a Global error.",
            "error_type": "Global: incorrect selection and join logic"
        },
        "aa87e8e508f6984831876860be1761df": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "What is the number of dead patients who had a prescription for pantoprazole sodium medication? (Values: 1, Pantoprazole Sodium)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND PRESCRIPTIONS.\"DRUG\" = \"Pantoprazole Sodium\"",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM prescriptions WHERE drug = 'Pantoprazole Sodium' AND drug_dose = '1 mg'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the DEMOGRAPHIC table and the expire_flag filter, instead wrongly filtering on drug_dose. Fixing this requires adding a JOIN to DEMOGRAPHIC and applying the expire_flag condition. This is not a simple typo or minor clause fix; it is a fundamental flaw in the query structure and logic (missing table join and key filter). Thus it is a global error.",
            "error_type": "Global: join error"
        },
        "aa0feba409fddd516dabb88f3e4e1131": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what number of patients under the age of 85 had lab test named heparin,imw? (Values: Heparin, LMW)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"85\" AND LAB.\"LABEL\" = \"Heparin, LMW\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.hadm_id = lab.hadm_id WHERE demographic.age < 85 AND lab.label = 'heparin, imw'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause (string literal casing/format) and in the SELECT aggregation (missing DISTINCT). Both are isolated issues within individual clauses and don\u2019t alter the overall join or query structure. This is a localized error in the WHERE condition (incorrect literal) and SELECT aggregate (missing DISTINCT), so it\u2019s a Local error.",
            "error_type": "Local: value parsing error"
        },
        "0709550ec8aeff15ee0fd6fe36ed4c0f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Theresa Vedovelli is primarily suffering from which disease?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Theresa Vedovelli\"",
            "pred_sql": "SELECT DISTINCT diagnoses.long_title FROM diagnoses INNER JOIN demographic ON diagnoses.subject_id = demographic.subject_id WHERE demographic.name = 'Theresa Vedovelli'",
            "gold_answer": "[['COMPLETE HEART BLOCK']]",
            "pred_answer": "[['Acute kidney failure, unspecified'], ['Anxiety state, unspecified'], ['Atrial fibrillation'], ['Atrioventricular block, complete'], ['Chronic airway obstruction, not elsewhere classified'], ['Coronary atherosclerosis of native coronary artery'], ['Depressive disorder, not elsewhere classified'], ['Diabetes mellitus without mention of complication, type II or unspecified type, not stated as uncontrolled'], ['Epilepsy, unspecified, without mention of intractable epilepsy'], ['Hypopotassemia'], ['Long-term (current) use of anticoagulants'], ['Other persistent mental disorders due to conditions classified elsewhere'], ['Other pulmonary embolism and infarction'], ['Pneumonia, organism unspecified'], ['Retention of urine, unspecified'], ['Unspecified acquired hypothyroidism'], ['Unspecified essential hypertension'], ['Urinary complications, not elsewhere classified']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DEMOGRAPHIC and DIAGNOSES tables and returns all diagnosis long_titles for the patient, whereas the question asks for the primary disease recorded in the DEMOGRAPHIC table\u2019s DIAGNOSIS column. This error affects the overall logic: it selects from the wrong table and returns multiple rows instead of the single primary diagnosis, requiring restructuring of the query to use DEMOGRAPHIC.DIAGNOSIS. Therefore the mistake is global in nature.",
            "error_type": "Global: join logic error"
        },
        "080127e8abd81701f88e49d4a966b56d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the number of patients on prescription of alteplase (catheter clearance) who had to stay for more than 43 days in hospital. (Values: Alteplase (Catheter Clearance))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"43\" AND PRESCRIPTIONS.\"DRUG\" = \"Alteplase (Catheter Clearance)\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.hadm_id = prescriptions.hadm_id WHERE prescriptions.drug = 'Alteplase (Catheter Clearance)' AND demographic.days_stay > 43",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(demographic.subject_id) instead of COUNT(DISTINCT demographic.subject_id). This causes it to count every prescription row rather than distinct patients. The only issue is in the aggregation/counting clause (missing DISTINCT); the overall join and filter logic remains correct. This is a localized mistake in the SELECT aggregation.",
            "error_type": "Local: missing DISTINCT"
        },
        "00263ad8de751999af6aa3a7cf5276ad": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients whose hospital stay is above 14 days and with lab test methemoglobin? (Values: Methemoglobin)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"14\" AND LAB.\"LABEL\" = \"Methemoglobin\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.hadm_id = lab.hadm_id WHERE lab.label = 'Methmoglobin' AND demographic.days_stay > 14",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL misspells the lab label as 'Methmoglobin' instead of the correct 'Methemoglobin', causing no rows to match. This is a localized mistake in the WHERE clause\u2019s literal value, not a structural or logical join issue. Therefore it\u2019s a Local error (value/literal parsing).",
            "error_type": "Local: value parsing error"
        },
        "06fbe5c6c99bc3f7cd0a53249b2de3dd": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients aged below 43 years old are diagnosed with prob-head/neck/trunk nos? (Values: Prob-head/neck/trunk NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"43\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Prob-head/neck/trunk NOS\"",
            "pred_sql": "SELECT count(subject_id) FROM diagnoses WHERE icd9_code IN (3547, 3548, 3549) AND age < 43",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: age",
            "is_correct": false,
            "reasoning": "The predicted SQL references the column age within the diagnoses table, but age exists only in the demographic table. This is a simple column reference error (no join on demographic and wrong table for age), which does not change the overall query structure or logic, only a localized misuse of a column.",
            "error_type": "Local: table/column reference error"
        },
        "aa9d08c1a7ed4cdd6bf2b289a54a6cc1": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are below 49 years of age and admitted under the year 2198?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"AGE\" < \"49\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2198\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE strftime('%Y', admittime) < 2198 AND age < 49",
            "gold_answer": "[['34.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses strftime('%Y', admittime) to filter by admission year instead of using the ADMITYEAR column as specified, and it also omits DISTINCT. This is a mistake localized to the WHERE clause and SELECT aggregation rather than a structural or multi-clause logical error. It can be fixed by referencing the correct column and adding DISTINCT without changing the overall query structure.",
            "error_type": "Local: column reference error"
        },
        "92b1c7ad9fe0af5c4ec35a3b48791815": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the drug code and drug dose of the drug acetaminophen iv? (Values: Acetaminophen IV)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"DRUG_DOSE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Acetaminophen IV\"",
            "pred_sql": "SELECT formulary_drug_cd, drug_dose FROM prescriptions WHERE drug = 'acetaminophen' AND route = 'IV'",
            "gold_answer": "[['ACET1000I', '1000mg'], ['ACET1000I', '1000mg'], ['ACET1000I', '1000mg'], ['ACET1000I', '1000mg'], ['ACET1000I', '1000mg']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only matches rows where drug = 'acetaminophen' and route = 'IV', whereas the actual drug name in the data is 'Acetaminophen IV'. This is a mistake in the WHERE clause values rather than a structural rewrite of the query or its joins. It doesn\u2019t require changing tables or join logic\u2014only the filtering condition is wrong.",
            "error_type": "Local: value parsing error"
        },
        "a6ee3501998234967c1043a5887f7a1b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the name and route of administration of drug code asa325 (Values: ASA325)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"DRUG\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"ASA325\"",
            "pred_sql": "SELECT drug, route FROM prescriptions WHERE drug = 'ASA325'",
            "gold_answer": "[['Aspirin', 'NG'], ['Aspirin', 'NG'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG'], ['Aspirin', 'PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on the wrong column (`drug` instead of `FORMULARY_DRUG_CD`), which is a localized column reference error in the WHERE clause. The overall query structure is correct but the wrong field is used for the filter, leading to no results. This does not require changing the join logic or query flow, only fixing the column reference.",
            "error_type": "Local: column-reference error"
        },
        "045c5269cdae83454d68e393e12c8658": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Give me the list of patients born before 2071 who prefer spanish. (Values: SPAN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"LANGUAGE\" = \"SPAN\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2071\"",
            "pred_sql": "SELECT demographic.name FROM demographic INNER JOIN demographic ON demographic.subject_id = demographic.subject_id WHERE demographic.dob_year < 2071 AND demographic.language = 'SPAN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "ambiguous column name: demographic.name",
            "is_correct": false,
            "reasoning": "The predicted SQL self-joins the DEMOGRAPHIC table without aliases, causing an ambiguous reference to demographic.name. This is a column reference error localized to the JOIN/SELECT clause and does not require reworking the overall query logic.",
            "error_type": "Local: ambiguous-column error"
        },
        "008452ec0d8e3074023da5b599850454": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are tested with lab item id 50922?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"50922\"",
            "pred_sql": "SELECT count(subject_id) FROM lab WHERE itemid = 50922",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all lab rows for itemid 50922 rather than counting distinct patients. It misses the DISTINCT modifier (and the JOIN is unnecessary since subject_id is in LAB). This is a localized error in the aggregation clause, not a structural or multi-clause logical error.",
            "error_type": "Local: missing DISTINCT"
        },
        "04f2d8289f6c164c281c05e3fc7f3940": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are admitted in emergency and admitted before the year 2163? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2163\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE strftime('%Y', admittime) < '2163'",
            "gold_answer": "[['38.0']]",
            "pred_answer": "[['58.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the required filter on ADMISSION_TYPE = 'EMERGENCY' and only checks admittime year < 2163. This is a mistake confined to the WHERE clause (a missing condition), and can be corrected by adding the emergency filter without restructuring the entire query.",
            "error_type": "Local: missing filter condition"
        },
        "a76a171c5f287a62a48890fd64927138": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find me the death status and primary language that patient with patient id 3623 prefers.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\",DEMOGRAPHIC.\"EXPIRE_FLAG\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"3623\"",
            "pred_sql": "SELECT diagnosis, LANGUAGE FROM demographic WHERE subject_id = 3623",
            "gold_answer": "[['ENGL', '1.0']]",
            "pred_answer": "[['PNEUMONIA', 'ENGL']]",
            "is_correct": false,
            "reasoning": "The predicted query selects the wrong column in the SELECT clause: it uses DEMOGRAPHIC.DIAGNOSIS instead of the required EXPIRE_FLAG (death status). The query structure (FROM DEMOGRAPHIC, WHERE subject_id filter) is correct; only the column choice is incorrect. This is a localized error in the SELECT clause and does not affect overall query logic or table relationships.",
            "error_type": "Local: column reference error"
        },
        "8793d7722bc0a33c0337714fb51f5a3b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are admitted before the year 2200 and diagnosed with icd9 code e8791? (Values: E8791)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2200\" AND DIAGNOSES.\"ICD9_CODE\" = \"E8791\"",
            "pred_sql": "SELECT count(subject_id) FROM diagnoses WHERE icd9_code = 'E8791' AND strftime('%Y', admittime) < '2200'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL references the column admittime in the DIAGNOSES table, but admittime exists only in the DEMOGRAPHIC table. There is no JOIN to DEMOGRAPHIC, and the column reference is incorrect, causing the 'no such column' error. This is a localized issue with a column reference, not a fundamental change to the query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "0bc26b6c69c9bf3628430b1e23d18007": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the number of patients born before 1879 ere tranferred within the facility. (Values: TRSF WITHIN THIS FACILITY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRSF WITHIN THIS FACILITY\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"1879\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE dob_year < 1879",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies the DOB_YEAR < 1879 filter and omits the required filter on ADMISSION_LOCATION = 'TRSF WITHIN THIS FACILITY'. This is a localized error in the WHERE clause, not altering the overall query structure or joins, but missing a specific condition.",
            "error_type": "Local: missing WHERE condition"
        },
        "a5ed08bdcb4c1e8be442640558d76f49": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the average age of government health insurance patients who were admitted in hospital for a day. (Values: Government, 1)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Government\" AND DEMOGRAPHIC.\"DAYS_STAY\" = \"1\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN patients ON demographic.subject_id = patients.subject_id WHERE patients.insurance = 'Government' AND demographic.days_stay = 1",
            "gold_answer": "[['20.0']]",
            "pred_answer": "no such table: patients",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with a table named \u201cpatients,\u201d which does not exist in the provided schema. This is a reference error confined to a specific part of the query (the FROM/JOIN clause) and does not require restructuring the overall logic\u2014only correcting the table name or removing the invalid join. Therefore, it is a Local error.",
            "error_type": "Local: table reference error"
        },
        "a918a0036b56f9087c0676932fa13024": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients with an elective admission type were discharged at home health care? (Values: ELECTIVE, HOME HEALTH CARE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"HOME HEALTH CARE\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN procedures ON procedures.subject_id = demographic.subject_id WHERE demographic.admission_type = 'ELECTIVE' AND demographic.discharge_location = 'HOME HEALTH CARE'",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[['52.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the PROCEDURES table, which is not required by the question. This join changes the result set by excluding patients without procedures and by counting the same patient multiple times if they have multiple procedures. The original query only filters on DEMOGRAPHIC and uses COUNT(DISTINCT) to get the correct number of unique patients. The unnecessary join fundamentally alters the logic and requires restructuring (removing the join and adding DISTINCT), so this is a global error.",
            "error_type": "Global: incorrect join"
        },
        "050ed27881bce414cfd05ba83a22a4d4": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide the number of emergency room admitted patients who were hospitalized for more than 4 days. (Values: EMERGENCY ROOM ADMIT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"4\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE days_stay > 4",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['60.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies days_stay > 4 but omits the filter on ADMISSION_LOCATION = \"EMERGENCY ROOM ADMIT\" (and also doesn\u2019t use DISTINCT). These are localized omissions in the WHERE and SELECT clauses and don\u2019t require restructuring joins or the overall logic. Thus it\u2019s a local error in the WHERE clause.",
            "error_type": "Local: missing filter in WHERE clause"
        },
        "ab3f3d61bd231c11a4f22dce7af58d02": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the number of patients who were diagnosed with unspecified cerebral artery occlusion with cerebral infarction. (Values: Crbl art ocl NOS w infrc)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Crbl art ocl NOS w infrc\"",
            "pred_sql": "SELECT count(DISTINCT diagnoses.subject_id) FROM diagnoses INNER JOIN procedures ON diagnoses.subject_id = procedures.subject_id WHERE diagnoses.short_title = 'Crbl art ocl NOS w infrc'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly joins the DIAGNOSES table with the PROCEDURES table rather than with DEMOGRAPHIC (or simply using DEMOGRAPHIC to count distinct patients). This fundamentally changes the logic by restricting the count to patients who also have procedure records, instead of all patients with the specified diagnosis. Fixing this requires restructuring the join, so it is a global error.",
            "error_type": "Global: incorrect join"
        },
        "0c27eb4d99b5a50b55dcea73a2a9f54f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify icd9 code for patient id 14755",
            "gold_sql": "SELECT PROCEDURES.\"LONG_TITLE\" FROM PROCEDURES WHERE PROCEDURES.\"SUBJECT_ID\" = \"14755\"",
            "pred_sql": "SELECT icd9_code FROM procedures WHERE subject_id = 14755",
            "gold_answer": "[['Arterial catheterization'], ['Closed [endoscopic] biopsy of bronchus'], ['Continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['Enteral infusion of concentrated nutritional substances'], ['Spinal tap'], ['Spinal tap'], ['Venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['331.0'], ['331.0'], ['3324.0'], ['3891.0'], ['3893.0'], ['966.0'], ['9672.0']]",
            "is_correct": false,
            "reasoning": "Both queries filter on the same table (PROCEDURES) and the same SUBJECT_ID, so the join and WHERE logic is correct. The only difference is that the predicted SQL selects the icd9_code column instead of the LONG_TITLE column, which is a mistake confined to the SELECT clause and does not change the overall query structure or logic.",
            "error_type": "Local: wrong-column reference"
        },
        "046dedd9201c79ad138cee000f7e9354": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the average age of female patients who were discharged to long term care hospital. (Values: F, LONG TERM CARE HOSPITAL)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"LONG TERM CARE HOSPITAL\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE procedures.icd9_code IN (36428, 36429, 36430) AND demographic.gender = 'F'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the query\u2019s logic by introducing an unnecessary JOIN to the PROCEDURES table, filtering on ICD9 codes, and omitting the required discharge location filter. This is not a minor syntax or column error but a fundamental restructuring that no longer matches the question\u2019s intent. The join and filter strategy would need to be redone to correctly target DEMOGRAPHIC.DISCHARGE_LOCATION, so this is a global error.",
            "error_type": "Global: join error"
        },
        "aa94e974b8ee66950b649251ba9090dc": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are less than 31 year old with a chemistry lab test category? (Values: Chemistry)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"31\" AND LAB.\"CATEGORY\" = \"Chemistry\"",
            "pred_sql": "SELECT count(lab.subject_id) FROM lab INNER JOIN diagnoses ON lab.subject_id = diagnoses.subject_id INNER JOIN demographic ON demographic.subject_id = lab.subject_id WHERE lab.category = 'Chemistry' AND demographic.age < 31",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[['3869.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary join to the DIAGNOSES table and joins tables only on SUBJECT_ID rather than matching HADM_ID, which changes the set of patients counted and multiplies rows. It also omits DISTINCT, counting each lab record rather than unique patients. These mistakes alter the overall query logic and data associations, so fixing them requires restructuring the joins and aggregation, not just a local tweak.",
            "error_type": "Global: incorrect join logic"
        },
        "0a1dcb8cb5d013e3feb7229a7b8ac869": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Get me the number of patients who had a urine magnesium lab test. (Values: Magnesium, Urine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"LABEL\" = \"Magnesium, Urine\"",
            "pred_sql": "SELECT count(DISTINCT lab.subject_id) FROM lab INNER JOIN demographic ON lab.subject_id = demographic.subject_id WHERE lab.itemid = 50968",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the WHERE clause filter: it uses lab.itemid = 50968 instead of lab.label = 'Magnesium, Urine'. The JOIN and overall structure remain correct. This is a localized mistake in the filter condition (wrong column/value), so it can be fixed without altering the query\u2019s structure or logic flow.",
            "error_type": "Local: incorrect filter column"
        },
        "0a1afd3f6a8a58a11490424176d9f026": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the item id of kelly gallardo. (Values: Kelly Gallardo)",
            "gold_sql": "SELECT LAB.\"ITEMID\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Kelly Gallardo\"",
            "pred_sql": "SELECT itemid FROM lab WHERE label = 'KELLY GALLARDO'",
            "gold_answer": "[['50868.0'], ['50882.0'], ['50883.0'], ['50883.0'], ['50884.0'], ['50884.0'], ['50885.0'], ['50885.0'], ['50902.0'], ['50971.0'], ['50983.0'], ['51143.0'], ['51144.0'], ['51146.0'], ['51200.0'], ['51221.0'], ['51222.0'], ['51244.0'], ['51248.0'], ['51249.0'], ['51250.0'], ['51251.0'], ['51254.0'], ['51255.0'], ['51256.0'], ['51257.0'], ['51265.0'], ['51277.0'], ['51279.0'], ['51301.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query uses the LAB table and its LABEL column to filter by 'KELLY GALLARDO', but the patient\u2019s name resides in the DEMOGRAPHIC.NAME column. It omits the DEMOGRAPHIC\u2192LAB join entirely and uses the wrong column for filtering, causing no results. This is a simple table/column reference mistake rather than a structural or multi-clause logic error.",
            "error_type": "Local: table/column reference error"
        },
        "06bbd920c4191e622346b9c33d8b0f43": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of living patients diagnosed with suicide and self-inflicted poisoning by analgesics, antipyretics, and antirheumatics? (Values: 0, Suicide and self-inflicted poisoning by analgesics, antipyretics, and antirheumatics)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"LONG_TITLE\" = \"Suicide and self-inflicted poisoning by analgesics, antipyretics, and antirheumatics\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM diagnoses INNER JOIN demographic ON diagnoses.hadm_id = demographic.hadm_id WHERE diagnoses.icd9_code IN (387, 388, 389) AND demographic.expire_flag = 0",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL error lies solely in the WHERE clause: it filters on diagnoses.icd9_code IN (387, 388, 389) instead of matching the specified LONG_TITLE and the exact expire_flag value. The join, counting, and overall structure remain correct. This is a localized mistake in the filtering condition and does not require restructuring the query or altering joins.",
            "error_type": "Local: incorrect WHERE clause predicate"
        },
        "0cdaeea27c2f1e7de2ef4c3f9622d5cb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the type of admission patient ida cook had. (Values: Ida Cook)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Ida Cook\"",
            "pred_sql": "SELECT DISTINCT admission_type FROM demographic WHERE subject_id = 25543",
            "gold_answer": "[['EMERGENCY']]",
            "pred_answer": "[['NEWBORN']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses `subject_id = 25543` instead of filtering on `NAME = 'Ida Cook'` as required by the question. The overall query structure (SELECT admission_type FROM demographic) is correct; only the WHERE clause\u2019s column and literal value are wrong. This is a localized mistake that can be fixed by replacing the filter with the proper column and value.",
            "error_type": "Local: value parsing error"
        },
        "06f15c6be771186668997528e52270cb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the gender of patient stephanie suchan. (Values: Stephanie Suchan)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"GENDER\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Stephanie Suchan\"",
            "pred_sql": "SELECT demographic.gender FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'Stephanie Suchan'",
            "gold_answer": "[['F']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly joins the DEMOGRAPHIC table with PRESCRIPTIONS and filters on prescriptions.drug instead of simply selecting the GENDER from DEMOGRAPHIC where NAME matches 'Stephanie Suchan'. This is not a small typo or localized mistake but a fundamental change to the query\u2019s structure and logic\u2014using the wrong table and column for filtering and introducing an unnecessary join that alters the intended meaning of the query.",
            "error_type": "Global: incorrect join logic"
        },
        "ad0222178143c1561a0a8a027757997f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the maximum age of patients with government insurance who expired before the year 2154. (Values: Government, 2154.0)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Government\" AND DEMOGRAPHIC.\"DOD_YEAR\" < \"2154.0\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN insurance ON demographic.subject_id = insurance.subject_id WHERE demographic.expire_flag = 1 AND strftime('%Y', insurance.insurance) = '2154'",
            "gold_answer": "[['None']]",
            "pred_answer": "no such table: insurance",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent table 'insurance', which is not part of the provided schema. This is a table reference error, a localized issue that can be fixed by correcting or removing the invalid table reference without altering the overall query structure.",
            "error_type": "Local: table reference error"
        },
        "81246894ba88abc13f0ae732be0ae33f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many black/haitian patients have phenylephrine medication prescription? (Values: BLACK/HAITIAN, Phenylephrine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/HAITIAN\" AND PRESCRIPTIONS.\"DRUG\" = \"Phenylephrine\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.ethnicity = 'BLACK/HAITIAN' AND prescriptions.drug = 'Phenylephrine'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an incorrect join key (demographic.subject_id = prescriptions.subject_id) instead of the correct hospital admission ID (hadm_id). This fundamentally alters the intended logic by linking prescriptions across different admissions for the same patient, resulting in an inflated count and incorrect associations. Fixing this requires changing the join structure, so it\u2019s a global error.",
            "error_type": "Global: incorrect join key"
        },
        "9c05a565ee5d9c64aaf12d921d02d4c9": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "among the patients who were treated with pravastatin, how many of them were aged below 72? (Values: Pravastatin)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"72\" AND PRESCRIPTIONS.\"DRUG\" = \"Pravastatin\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'Pravastatin' AND demographic.age < 72",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['8.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the ground truth in two specific, fixable parts: (1) it joins DEMOGRAPHIC and PRESCRIPTIONS on SUBJECT_ID instead of HADM_ID, and (2) it omits the DISTINCT in COUNT to ensure unique patients. Both issues are localized to the JOIN clause and the SELECT aggregation and do not require restructuring the overall query logic.",
            "error_type": "Local: join-condition error"
        },
        "81b4faef3515b239d74fcabdef8c1a42": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients born before the year 2170 had a lab test named other cells? (Values: Other Cells)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2170\" AND LAB.\"LABEL\" = \"Other Cells\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.subject_id = lab.subject_id WHERE lab.label = 'Other Cells' AND demographic.dob_year < 2170",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted query joins DEMOGRAPHIC and LAB on demographic.subject_id = lab.subject_id instead of using the hadm_id relationship as in the ground truth, and it also omits the DISTINCT in the COUNT, causing over\u2010counting of patients with multiple lab entries. The incorrect join key changes the fundamental data relationship and thus the meaning of the query, requiring a structural fix rather than a simple tweak in one clause.",
            "error_type": "Global: incorrect-join error"
        },
        "8758789c5295576a106780e5c140c9bd": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the maximum days for which patients who died before 2164 stayed in hospital. (Values: 2164.0)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOD_YEAR\" < \"2164.0\"",
            "pred_sql": "SELECT max(demographic.dischtime - demographic.admittime) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE procedures.icd9_code = 4240 AND demographic.expire_flag = 1 AND demographic.dischtime LIKE '2164%'",
            "gold_answer": "[['34.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely diverges from the intended logic: it joins the PROCEDURES table using an unrelated ICD9 code filter, applies an expire_flag and dischtime LIKE condition, and computes a timestamp difference instead of using the DAYS_STAY column and filtering by DOD_YEAR < 2164.0. This is not a localized syntax or reference mistake but a fundamental structural and logical error that alters the query\u2019s purpose, requiring a significant rewrite of multiple clauses.",
            "error_type": "Global: incorrect join strategy"
        },
        "879e430064f5011919b19ba70a193bc9": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give me the number of patients who were diagnosed under icd9 code 76528 and also their lab test category is chemistry. (Values: Chemistry)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DIAGNOSES.\"ICD9_CODE\" = \"76528\" AND LAB.\"CATEGORY\" = \"Chemistry\"",
            "pred_sql": "SELECT count(diagnoses.subject_id) FROM lab INNER JOIN diagnoses ON lab.subject_id = diagnoses.subject_id WHERE diagnoses.icd9_code = 76528 AND lab.category = 'Chemistry'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['36.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins LAB and DIAGNOSES only on subject_id rather than hadm_id and does not use DISTINCT, thereby changing the intended logic of counting unique patients per admission with both the specific diagnosis and lab category. This is not a minor syntax or column reference mistake but an incorrect join strategy that fundamentally alters the query\u2019s meaning and results.",
            "error_type": "Global: incorrect join key"
        },
        "a42b052cc593a90aed642a74ae195507": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the marital status and age of the patient name orville ford? (Values: Orville Ford)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"AGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Orville Ford\"",
            "pred_sql": "SELECT demographic.marital_status, demographic.age FROM demographic INNER JOIN demographic ON demographic.name = 'Orville Ford'",
            "gold_answer": "[['None', '0.0']]",
            "pred_answer": "ambiguous column name: demographic.marital_status",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self-join on the same table without aliases and uses the join condition incorrectly (demographic.name = 'Orville Ford') instead of a proper ON clause linking two tables or simply filtering. This results in an ambiguous column reference error. The mistake is localized to the JOIN clause and column references, and does not require restructuring the overall query logic.",
            "error_type": "Local: ambiguous column reference"
        },
        "04fec7017bec275d4bc47df53bb90b78": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients are born before 2121 and followed the procedure biopsy of tonsils and adenoids? (Values: Biopsy of tonsils and adenoids)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2121\" AND PROCEDURES.\"LONG_TITLE\" = \"Biopsy of tonsils and adenoids\"",
            "pred_sql": "SELECT count(procedures.hadm_id) FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.short_title = 'Biopsy of tonsils and adenoids' AND demographic.dob_year < 2121",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the intended logic in three key ways: 1) It joins PROCEDURES to DEMOGRAPHIC using subject_id only rather than matching on hadm_id (and subject_id), which fundamentally alters how rows relate between tables. 2) It filters on procedures.short_title instead of procedures.LONG_TITLE, so it never matches the intended procedure name. 3) It counts procedure.hadm_id rather than COUNT(DISTINCT subject_id), changing the aggregation semantics. The incorrect join condition is not a simple typo but a structural mistake that breaks the table relationship and requires reworking the query\u2019s core logic, making this a global error.",
            "error_type": "Global: join error"
        },
        "0df82d3e22799b2179485006b810705b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the maximum age of patients who have pneumonia primary disease and were hospitalized for 30 days. (Values: PNEUMONIA)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"PNEUMONIA\" AND DEMOGRAPHIC.\"DAYS_STAY\" = \"30\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Pneumonia' AND demographic.days_stay = 30",
            "gold_answer": "[['64.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES on only SUBJECT_ID and filters DIAGNOSES.SHORT_TITLE = 'Pneumonia'. This deviates from the intended logic, which uses the DEMOGRAPHIC.DIAGNOSIS column to identify primary pneumonia cases. The join is not only missing the HADM_ID key but also changes the table and column used for the disease filter, fundamentally altering the query\u2019s meaning and requiring restructuring of the query logic. This is not a simple syntax or local clause fix but a structural, logical error in how tables are combined.",
            "error_type": "Global: incorrect join relationship"
        },
        "b9e6f14f784427e612f5e8259145fe67": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients who stayed in the hospital for more than 34 days and have been prescribed the drug neo*po*mct oil? (Values: NEO*PO*MCT Oil)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"34\" AND PRESCRIPTIONS.\"DRUG\" = \"NEO*PO*MCT Oil\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.hadm_id = demographic.hadm_id WHERE prescriptions.drug = 'NEO*PO*MCT OIL' AND demographic.days_stay > 34",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in minor parts: it omits DISTINCT on SUBJECT_ID and uses 'NEO*PO*MCT OIL' (uppercase OIL) instead of the exact 'NEO*PO*MCT Oil'. These are localized mistakes in the WHERE clause (value literal mismatch and missing DISTINCT) and do not require reworking the overall JOIN or query structure.",
            "error_type": "Local: value parsing error"
        },
        "0e41987fae64a3292dbb83fe18b6d6eb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the number of patients admitted before the year 2154 who had medicare insurance. (Values: Medicare)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicare\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2154\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN demographic ON demographic.insurance = 'Medicare' WHERE strftime('%Y', demographic.admittime) < '2154'",
            "gold_answer": "[['22.0']]",
            "pred_answer": "ambiguous column name: demographic.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary self-join on the DEMOGRAPHIC table without using aliases, and then refers to demographic.subject_id ambiguously. This is a localized mistake within the JOIN clause (missing table aliases and incorrect ON usage), rather than a fundamental restructuring of the query logic or intent.",
            "error_type": "Local: join alias error"
        },
        "920c86ed33b3bf35707a12f566236c34": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide the procedure short title and drug code of subject id 24425.",
            "gold_sql": "SELECT PROCEDURES.\"SHORT_TITLE\",PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" FROM PROCEDURES INNER JOIN PRESCRIPTIONS on PROCEDURES.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PROCEDURES.\"SUBJECT_ID\" = \"24425\"",
            "pred_sql": "SELECT procedures.short_title, prescriptions.drug FROM procedures INNER JOIN prescriptions ON procedures.subject_id = prescriptions.subject_id WHERE procedures.subject_id = 24425",
            "gold_answer": "[['Vaccination NEC', 'AMPVL'], ['Vaccination NEC', 'GENT10I'], ['Vaccination NEC', 'NAMP500I'], ['Vaccination NEC', 'NEOSYRD5W']]",
            "pred_answer": "[['Vaccination NEC', 'NEO*IV*Ampicillin Sodium'], ['Vaccination NEC', 'NEO*IV*Gentamicin'], ['Vaccination NEC', 'Send 500mg Vial'], ['Vaccination NEC', 'Syringe (Neonatal) *D5W*']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only selects the wrong column (prescriptions.drug instead of prescriptions.FORMULARY_DRUG_CD) but, more critically, it joins the tables on SUBJECT_ID rather than HADM_ID. The incorrect join key changes the relationship between PROCEDURES and PRESCRIPTIONS and thus alters the intended result set. This is not a localized typo or syntax mistake but a fundamental join logic error that affects the overall query meaning.",
            "error_type": "Global: incorrect join key"
        },
        "0bf9d7f3a5120ea9a6a3ca724295956f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the route of administration and the type of drug influenza virus vaccine is. (Values: Influenza Virus Vaccine)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"DRUG_TYPE\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Influenza Virus Vaccine\"",
            "pred_sql": "SELECT route, drug_type FROM prescriptions WHERE drug = 'Influenza virus vaccine'",
            "gold_answer": "[['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM'], ['MAIN', 'IM']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL mirrors the correct structure (same table, columns, and WHERE clause) but fails to match any rows because the literal string 'Influenza virus vaccine' uses incorrect casing compared to the ground truth value \u201cInfluenza Virus Vaccine\u201d. This is a localized issue in the WHERE clause\u2019s value literal, not a structural or logical flaw in the query.",
            "error_type": "Local: value parsing error"
        },
        "95f8a3f0cc5f6486ee39cf3575176b39": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the marital status and the ethnic group of subject id 30011?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"ETHNICITY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"30011\"",
            "pred_sql": "SELECT marital_status FROM demographic WHERE subject_id = 30011",
            "gold_answer": "[['SINGLE', 'HISPANIC OR LATINO']]",
            "pred_answer": "[['SINGLE']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects marital_status and omits the ethnicity column. This is a local issue in the SELECT clause (a missing column) and does not affect the overall query structure or logic.",
            "error_type": "Local: missing column"
        },
        "021ebbc73156e10c78d02f72ff647325": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Count the number of patients that survived and had a procedure icd9 code of 331. (Values: 0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND PROCEDURES.\"ICD9_CODE\" = \"331\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.icd9_code = 331 AND demographic.expire_flag = 0",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an INNER JOIN on procedures.subject_id = demographic.subject_id instead of joining on the admission identifier (HADM_ID) as in the ground truth. This incorrect join key fundamentally alters which rows are paired and thus changes the query\u2019s results. Additionally, the predicted version omits the DISTINCT on SUBJECT_ID, further changing the intended patient count. Both issues go beyond a localized syntax fix and affect the overall logic and table relationships.",
            "error_type": "Global: incorrect join key"
        },
        "ab0da7d05a345b85c091b469d2dc2d98": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Specify the discharge location of patient Dona Cole along with the ICU stay ID",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DISCHARGE_LOCATION\",PRESCRIPTIONS.\"ICUSTAY_ID\" FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Dona Cole\"",
            "pred_sql": "SELECT discharge_location, icustay_id FROM demographic WHERE name = 'Dona Cole'",
            "gold_answer": "[['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None']]",
            "pred_answer": "no such column: icustay_id",
            "is_correct": false,
            "reasoning": "The predicted SQL references the column icustay_id from the DEMOGRAPHIC table, but that column does not exist there. The correct query joins DEMOGRAPHIC with PRESCRIPTIONS to retrieve ICUSTAY_ID. This is a localized column reference error that can be fixed by adding the proper JOIN rather than restructuring the overall logic.",
            "error_type": "Local: column reference error"
        },
        "870f671294fda99c32cc17f92aad78a8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients had the diagnosis long title hemochromatosis due to repeated red blood cell transfusions? (Values: Hemochromatosis due to repeated red blood cell transfusions)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"LONG_TITLE\" = \"Hemochromatosis due to repeated red blood cell transfusions\"",
            "pred_sql": "SELECT count(DISTINCT diagnoses.subject_id) FROM diagnoses INNER JOIN procedures ON diagnoses.subject_id = procedures.subject_id WHERE procedures.long_title LIKE 'Hemochromatosis due to repeated red blood cell transfusions'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the intended logic by joining the DIAGNOSES table to the PROCEDURES table on subject_id and filtering on procedures.long_title. The correct query should filter DIAGNOSES.long_title and join DIAGNOSES to DEMOGRAPHIC on hadm_id. This is not a minor syntax or column reference mistake but a structural error in choosing the wrong table and join relationship, altering the query\u2019s overall meaning.",
            "error_type": "Global: incorrect join table"
        },
        "952343ad7bc01827ae39783f6285e3d6": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the drug code of midazolam hcl medication. (Values: Midazolam HCl)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Midazolam HCl\"",
            "pred_sql": "SELECT formulary_drug_cd FROM prescriptions WHERE drug LIKE 'Midazolam%'",
            "gold_answer": "[['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I']]",
            "pred_answer": "[['MIDA100'], ['MIDA100'], ['MIDA100'], ['MIDA100'], ['MIDA100'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA2I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I'], ['MIDA50I']]",
            "is_correct": false,
            "reasoning": "The only difference is in the WHERE clause: the predicted SQL uses an overly broad LIKE 'Midazolam%' filter instead of the exact match DRUG = 'Midazolam HCl'. This is a localized error in the condition, not affecting the overall query structure or JOINs. It can be fixed by adjusting the predicate to use the correct equality operator and full string. Therefore, it is a Local error.",
            "error_type": "Local: value parsing error"
        },
        "08e84aa74e5e3adb7969241939f3af64": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients used the drug code ergo400t? (Values: ERGO400T)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"ERGO400T\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE formulary_drug_cd = 'ERGO400T'",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all prescription rows matching the drug code rather than counting distinct patient IDs. It omits the DISTINCT keyword (and the join to ensure proper patient identification), leading to an overcount. This is a localized issue in the SELECT clause and does not require restructuring the overall query logic.",
            "error_type": "Local: missing-distinct error"
        },
        "964051ea9739d5463dc623efb7016c1e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the lab category forJerry Deberry?",
            "gold_sql": "SELECT LAB.\"CATEGORY\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT category FROM lab WHERE subject_id = 1121",
            "gold_answer": "[['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas']]",
            "pred_answer": "[['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Blood Gas'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry'], ['Chemistry']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on LAB.subject_id instead of filtering DEMOGRAPHIC.NAME = 'Jerry Deberry' and joining DEMOGRAPHIC to LAB by hadm_id. This is a mistake confined to the WHERE clause (wrong column reference and missing join), and can be fixed by using the correct column and join. It does not require restructuring the overall query logic or altering multiple clauses.",
            "error_type": "Local: column reference error"
        },
        "8182d5043bd51f337b846751434757f1": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give me the number of patients less than 41 years who have delta abnormal lab test status.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"41\" AND LAB.\"FLAG\" = \"delta\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.subject_id = lab.subject_id WHERE demographic.age < 41 AND lab.flag = 'delta'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the join condition and aggregation in a way that alters the fundamental logic of the query. The ground truth uses INNER JOIN on HADM_ID (to match each lab record to the correct admission) and COUNT(DISTINCT SUBJECT_ID) to count unique patients. The prediction instead joins only on SUBJECT_ID and uses COUNT without DISTINCT, causing multiple admissions and lab records per patient to be overcounted. This is not a simple typo or syntax issue isolated to one clause but a core logical mistake in how data rows are combined and aggregated. It requires restructuring the join and aggregation strategy to fix, so it is a Global error.",
            "error_type": "Global: incorrect join and aggregation logic"
        },
        "0e597171d8f49a56ee59fb5598fd6322": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give the birthdate and procedure title of jerry deberry. (Values: Jerry Deberry)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DOB\",PROCEDURES.\"LONG_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT demographic.dob, procedures.short_title FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE demographic.name = 'Jerry Deberry'",
            "gold_answer": "[['2043-02-28 00:00:00', 'Arterial catheterization'], ['2043-02-28 00:00:00', 'Central venous catheter placement with guidance'], ['2043-02-28 00:00:00', 'Continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['2043-02-28 00:00:00', 'Enteral infusion of concentrated nutritional substances'], ['2043-02-28 00:00:00', 'Insertion of endotracheal tube'], ['2043-02-28 00:00:00', 'Percutaneous [endoscopic] gastrostomy [PEG]'], ['2043-02-28 00:00:00', 'Venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['2043-02-28 00:00:00', 'Arterial catheterization'], ['2043-02-28 00:00:00', 'CV cath plcmt w guidance'], ['2043-02-28 00:00:00', 'Cont inv mec ven <96 hrs'], ['2043-02-28 00:00:00', 'Entral infus nutrit sub'], ['2043-02-28 00:00:00', 'Insert endotracheal tube'], ['2043-02-28 00:00:00', 'Percu endosc gastrostomy'], ['2043-02-28 00:00:00', 'Venous cath NEC']]",
            "is_correct": false,
            "reasoning": [
                "The ground truth query selects the column PROCEDURES.LONG_TITLE, while the predicted query selects PROCEDURES.SHORT_TITLE. This is a wrong column reference in the SELECT clause.",
                "Additionally, the ground truth joins on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID, but the predicted query joins on DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID. Although this could lead to incorrect row associations in some cases, the immediate semantic mismatch in the result text (long vs. short title) stems from the column choice.",
                "Selecting the wrong column is a localized mistake in the SELECT clause and does not require restructuring the overall query logic or changing table relationships at a higher level."
            ],
            "error_type": "Local: column reference error"
        },
        "927bcc5acfc5a7c2f5b2ebf99de1cd47": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide me with the number of patients less than 59 years who were transferred within the facility. (Values: TRSF WITHIN THIS FACILITY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRSF WITHIN THIS FACILITY\" AND DEMOGRAPHIC.\"AGE\" < \"59\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE admission_location = 'TRANSFER FROM HOSP/EXTRAM' AND age < 59",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs by using the wrong literal in the WHERE clause\u2014\u2018TRANSFER FROM HOSP/EXTRAM\u2019 instead of the exact required value \u2018TRSF WITHIN THIS FACILITY\u2019. This is a localized mistake in the filter condition (value parsing), not affecting overall structure or joins.",
            "error_type": "Local: value parsing error"
        },
        "a667eb8609a76490ddbf55b5c12e3695": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of emergency hospital admission patients who have gangrene primary disease. (Values: EMERGENCY, GANGRENE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"GANGRENE\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE diagnoses.short_title = 'Gangrene' AND demographic.admission_type = 'EMERGENCY'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly joins DEMOGRAPHIC to the DIAGNOSES table and filters on DIAGNOSES.SHORT_TITLE rather than using DEMOGRAPHIC.DIAGNOSIS for the primary disease. This changes the overall logic and data source for the condition, leading to the wrong result. Fixing it would require restructuring the query to remove the join and reference the correct column, which is beyond a localized syntax fix.",
            "error_type": "Global: join error"
        },
        "9057aad89e8f66541aadbb45ad8f4747": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "which patients are on bethanechol prescription? (Values: Bethanechol)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Bethanechol\"",
            "pred_sql": "SELECT prescriptions.subject_id FROM prescriptions WHERE prescriptions.drug = 'Bethanechol'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4589.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects subject_id rows from the PRESCRIPTIONS table without applying COUNT or DISTINCT, whereas the ground truth requires counting distinct subject IDs. This is a mistake in the SELECT clause (wrong aggregation and missing DISTINCT), and does not involve reworking joins or overall query structure.",
            "error_type": "Local: aggregation error"
        },
        "a557a27486342d06e45decaab91aeed8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the birth date and primary disease of patient caren obrien. (Values: Caren Obrien)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DOB\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Caren Obrien\"",
            "pred_sql": "SELECT dob, diagnosis FROM demographic WHERE name = 'CAREN OBRIEN'",
            "gold_answer": "[['1882-10-10 00:00:00', 'SYNCOPE;TELEMETRY']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The only mistake in the predicted SQL is the casing of the literal 'CAREN OBRIEN' versus the actual name 'Caren Obrien' in the DEMOGRAPHIC table. SQLite\u2019s default comparison is case\u2010sensitive here, so the wrong case prevents any match. Everything else\u2014table, columns, clauses\u2014matches the ground truth. This is a localized value parsing/formatting error and doesn\u2019t alter the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "05b45f4c6bb925d5091172692b926676": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the number of patients born before the year 1837 who are no longer alive.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"1837\"",
            "pred_sql": "SELECT count(expire_flag) FROM demographic WHERE dob_year < 1837 AND expire_flag = 0",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s mistakes are confined to the WHERE clause and the aggregation: it uses expire_flag = 0 instead of = 1 and uses count(expire_flag) rather than counting distinct SUBJECT_ID. The overall structure (single-table query, no joins or subqueries) is correct, and the error can be fixed locally without restructuring the query\u2019s logic or joins.",
            "error_type": "Local: value/filter aggregation error"
        },
        "007b736a7e85c0d907a1aa0574bcd582": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients were admitted in emergency primarily for brain mass; intracranial hemorrhage ? (Values: EMERGENCY, BRAIN MASS;INTRACRANIAL HEMORRHAGE)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"BRAIN MASS;INTRACRANIAL HEMORRHAGE\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.admission_location = 'EMERGENCY' AND diagnoses.short_title = 'BRAIN MASS;INTRACRANIAL HEMORRHAGE'",
            "gold_answer": "[['71.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic by joining to the DIAGNOSES table and filtering on diagnoses.short_title rather than using the DEMOGRAPHIC.DIAGNOSIS field and the ADMISSION_TYPE column. It also uses the wrong admission_location column instead of ADMISSION_TYPE and introduces a join that was not in the original intent. These are not isolated typos but a flawed join strategy and incorrect table/column usage that alter the query\u2019s overall meaning, so this is a Global error.",
            "error_type": "Global: incorrect join"
        },
        "817fe7ae70c2b5623a475f9d9081d340": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the maximum age of married patients aged 47 years or more? (Values: MARRIED)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"AGE\" >= \"47\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN prescriptions ON demographic.hadm_id = prescriptions.hadm_id WHERE prescriptions.drug_type = 'BASE' AND demographic.marital_status = 'MARRIED' AND demographic.age >= 47",
            "gold_answer": "[['89.0']]",
            "pred_answer": "[['82.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN with the PRESCRIPTIONS table and filters on prescriptions.drug_type = 'BASE', which is not part of the original question. This join and extra filter changes the overall logic and scope of the query by restricting the population to patients with a specific prescription type. Fixing this would require removing the join and filter entirely, which is more than a localized syntax or reference fix\u2014it alters the fundamental structure and intended meaning of the query.",
            "error_type": "Global: join error"
        },
        "804ca29d119c7ae443c69c0e0f4bee74": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what number of patients under the age of 51 were given the drug metoprolol tartrate? (Values: Metoprolol Tartrate)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"51\" AND PRESCRIPTIONS.\"DRUG\" = \"Metoprolol Tartrate\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'Metoprolol Tartrate' AND demographic.age < 51",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['16.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC and PRESCRIPTIONS on SUBJECT_ID instead of HADM_ID, and it omits COUNT(DISTINCT ...) so it counts duplicate patient entries across multiple admissions. This wrong join key and missing DISTINCT fundamentally alters the patient-level aggregation and the logical relationship between tables, leading to an inflated count. Fixing this requires changing the join condition and aggregation approach, not just a local tweak.",
            "error_type": "Global: incorrect join condition"
        },
        "a6a732ecc77a93f6fac9453d69cce42b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the insurance and diagnosis icd9 code of subject id 21796?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DIAGNOSES.\"ICD9_CODE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT DISTINCT demographic.insurance, demographic.diagnosis FROM demographic INNER JOIN diagnoses ON demographic.hadm_id = diagnoses.hadm_id WHERE demographic.subject_id = 21796",
            "gold_answer": "[['Medicaid', '75261.0'], ['Medicaid', '76517.0'], ['Medicaid', 'V3101']]",
            "pred_answer": "[['Medicaid', 'NEWBORN'], ['Medicaid', 'NEWBORN'], ['Medicaid', 'NEWBORN']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects demographic.diagnosis instead of diagnoses.icd9_code. The JOIN and filter are correct, but the wrong column is chosen for the ICD9 code, leading to incorrect values. This is a localized column reference error and does not affect the overall query structure or logic.",
            "error_type": "Local: incorrect-column-reference"
        },
        "827f8123c25ca3f714d96faaa00d1b84": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many pateints aged below 77 had undergone the procedure with icd9 code 4632?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"77\" AND PROCEDURES.\"ICD9_CODE\" = \"4632\"",
            "pred_sql": "SELECT count(procedures.subject_id) FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.icd9_code = 4632 AND demographic.age < 77",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an incorrect join condition and aggregation, joining PROCEDURES to DEMOGRAPHIC on SUBJECT_ID rather than HADM_ID, and then counting all procedure records rather than distinct patients. This fundamentally alters the query logic and leads to overcounting. Fixing it requires changing the join key and aggregation approach, which affects multiple clauses and the overall structure.",
            "error_type": "Global: incorrect join key and aggregation"
        },
        "81b5be8c3f3f6d80a55a59f385e43659": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients whose drug route is tp and admission year is before 2167? (Values: TP)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2167\" AND PRESCRIPTIONS.\"ROUTE\" = \"TP\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE route = 'TP' AND strftime('%Y', admittime) < '2167'",
            "gold_answer": "[['17.0']]",
            "pred_answer": "no such column: admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'admittime' in the PRESCRIPTIONS table and fails to join with DEMOGRAPHIC to filter by admission year. The core mistake is a column reference error (using a column that doesn\u2019t exist in the referenced table), which is localized to the WHERE clause and does not require restructuring the entire query logic.",
            "error_type": "Local: table/column reference error"
        },
        "94b7c34d08a1a683e0a44fc75d03d1ed": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the discharge location and icu stay id of subject id 42820?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DISCHARGE_LOCATION\",PRESCRIPTIONS.\"ICUSTAY_ID\" FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"42820\"",
            "pred_sql": "SELECT discharge_location, icustay_id FROM demographic WHERE subject_id = 42820",
            "gold_answer": "[['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None']]",
            "pred_answer": "no such column: icustay_id",
            "is_correct": false,
            "reasoning": "The predicted SQL tries to select icustay_id directly from the DEMOGRAPHIC table without joining PRESCRIPTIONS, but icustay_id is not a column in DEMOGRAPHIC. This is a column reference error in the SELECT clause that can be fixed by adding the correct JOIN; it does not require restructuring the entire query logic.",
            "error_type": "Local: table/column reference error"
        },
        "0e6c9aa4ec70962176e94cd1b787f404": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "get me the number of private insurance patients who had phys referral/normal deli admission. (Values: Private, PHYS REFERRAL/NORMAL DELI)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Private\" AND DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"PHYS REFERRAL/NORMAL DELI\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.insurance = 'Private' AND diagnoses.short_title = 'PHYSIO/ENT REFERRAL' AND diagnoses.icd9_code = 3140 AND demographic.admission_type = 'PHYS REFERRAL/NORMAL DELI'",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the DIAGNOSES table and applies filters on diagnoses.short_title and diagnoses.icd9_code, none of which are relevant to counting patients by insurance and admission location. It also filters demographic.admission_type instead of demographic.admission_location. These mistakes span the FROM and WHERE clauses, fundamentally altering the query\u2019s logic and requiring restructuring (removing the join, changing columns, and dropping irrelevant conditions).",
            "error_type": "Global: incorrect join and filter logic"
        },
        "93e5a26d56fc80b963506bc71b380d70": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give me the number of government health insurance patients who had elective hospital admission. (Values: ELECTIVE, Government)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"INSURANCE\" = \"Government\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE demographic.admission_type = 'ELECTIVE'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['73.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN on the PROCEDURES table and omits the required INSURANCE = 'Government' filter. This join alters the intended data relationships (restricting to patients with procedures) and fundamentally changes the query logic. Fixing this would require removing the join and adding the missing WHERE condition, not just tweaking a single clause. Therefore, it\u2019s a global logical/structural error.",
            "error_type": "Global: incorrect join logic"
        },
        "8314ff25189b1cf1389172288628eccb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of emergency hospital admission patients who are taking main drug type prescription. (Values: EMERGENCY, MAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.hadm_id = demographic.hadm_id WHERE demographic.admission_type = 'EMERGENCY' AND prescriptions.drug_type = 'MAIN'",
            "gold_answer": "[['53.0']]",
            "pred_answer": "[['4092.0']]",
            "is_correct": false,
            "reasoning": "Both queries join DEMOGRAPHIC and PRESCRIPTIONS correctly and apply the same WHERE filters on ADMISSION_TYPE and DRUG_TYPE. The only difference is in the SELECT clause: the ground truth uses COUNT(DISTINCT demographic.subject_id) to count unique patients, whereas the predicted query uses COUNT(prescriptions.subject_id), counting every prescription record (including duplicates for the same patient). This is a localized mistake in the aggregation function and does not alter the overall structure or table relationships.",
            "error_type": "Local: missing DISTINCT"
        },
        "0db846be36da67fce1f489804cad086f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "get the number of jehovah's witness religious background patients who are less than 71 years of age. (Values: JEHOVAH'S WITNESS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"RELIGION\" = \"JEHOVAH'S WITNESS\" AND DEMOGRAPHIC.\"AGE\" < \"71\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE religion = 'JEHOVAH' AND age < 71",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses religion = 'JEHOVAH' instead of the exact value 'JEHOVAH'S WITNESS'. This is a mistake in the literal value used in the WHERE clause and does not involve changing joins or overall query structure. Hence it is a localized value parsing error.",
            "error_type": "Local: value parsing error"
        }
    },
    "deepseek": {
        "91c5218be91466059b6ab55b5c9a214b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many of the patients on urgent admission had Neb drug route? (Values: URGENT, NEB)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\" AND PRESCRIPTIONS.\"ROUTE\" = \"NEB\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID, D.HADM_ID) AS COUNT FROM DEMOGRAPHIC D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.ADMISSION_TYPE = 'URGENT' AND P.ROUTE = 'NEB';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "wrong number of arguments to function COUNT()",
            "is_correct": false,
            "reasoning": "The predicted SQL attempts to use COUNT(DISTINCT D.SUBJECT_ID, D.HADM_ID), but in SQLite COUNT(DISTINCT ) only accepts a single argument. This is a localized mistake within the aggregate function and does not affect the overall query structure or join logic. It is a syntax/argument error in the COUNT expression that can be fixed by using a single column or a different approach to count distinct pairs.",
            "error_type": "Local: aggregate argument error"
        },
        "082284313a19eb529220fdbbf1676c91": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many of the patients on self pay had coronary athrscl natve vessel? (Values: Self Pay, Crnry athrscl natve vssl)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Self Pay\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Crnry athrscl natve vssl\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC DM JOIN DIAGNOSES D ON DM.SUBJECT_ID = D.SUBJECT_ID AND DM.HADM_ID = D.HADM_ID WHERE DM.INSURANCE = 'Self Pay' AND D.LONG_TITLE = 'Coronary atherosclerosis native vessel';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the LONG_TITLE column and a spelled\u2010out disease name (\u2018Coronary atherosclerosis native vessel\u2019) instead of matching the SHORT_TITLE column and exact literal (\u2018Crnry athrscl natve vssl\u2019) specified by the question. The join structure and overall query logic remain correct, but the wrong column and mismatched value in the WHERE clause cause the discrepancy. This is a localized error that can be fixed without altering the query\u2019s structure.",
            "error_type": "Local: column reference/value error"
        },
        "867cf2582e1a2e8d3334cc3214b69472": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Calculate the minimum age of patients born after 2095 who have transient ischemic attack as their primary disease. (Values: TRANSIENT ISCHEMIC ATTACK)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"TRANSIENT ISCHEMIC ATTACK\" AND DEMOGRAPHIC.\"DOB_YEAR\" > \"2095\"",
            "pred_sql": "SELECT MIN(AGE) AS MIN_AGE FROM DEMOGRAPHIC WHERE YEAR(DOB) > 2095 AND DIAGNOSIS LIKE '%TRANSIENT ISCHEMIC ATTACK%';",
            "gold_answer": "[['None']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because SQLite does not support the YEAR() function. This is a localized error in the WHERE clause (incorrect function usage), not a fundamental mis\u2010join or structural mistake. Everything else (MIN(AGE), table reference, LIKE condition) is syntactically local to that clause and doesn\u2019t require rewriting the overall query logic.",
            "error_type": "Local: function usage error"
        },
        "bba6972bfb39756cdb923e4518ec3895": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients whose discharge location is short term hospital and who were admitted to the hospital before year 2182. (Values: SHORT TERM HOSPITAL)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"SHORT TERM HOSPITAL\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2182\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE DISCHARGE_LOCATION = 'SHORT TERM HOSPITAL' AND EXTRACT(YEAR FROM ADMITTIME) < 2182;",
            "gold_answer": "[['2.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the EXTRACT(YEAR FROM ADMITTIME) function, which is not supported by SQLite and causes a syntax error in the WHERE clause. This is a localized issue within the WHERE clause that does not affect the overall query structure or logic\u2014only the function call needs correction (e.g., using ADMITYEAR or a supported date function). Therefore, it is a local error.",
            "error_type": "Local: syntax error"
        },
        "8df86af35155206db971854cf41e6139": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "specify the icd9 code for patient id 18480",
            "gold_sql": "SELECT PROCEDURES.\"SHORT_TITLE\" FROM PROCEDURES WHERE PROCEDURES.\"SUBJECT_ID\" = \"18480\"",
            "pred_sql": "SELECT ICD9_CODE FROM DIAGNOSES WHERE SUBJECT_ID = 18480;",
            "gold_answer": "[['AICD check']]",
            "pred_answer": "[['135.0'], ['2384.0'], ['4254.0'], ['4271.0'], ['4280.0'], ['49390.0'], ['V4502'], ['V4983'], ['V5861']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls ICD9_CODEs from the DIAGNOSES table, whereas the ground truth and the question context require retrieving data from the PROCEDURES table. This is not a simple typo or minor syntax fix; it fundamentally alters the source of the data and thus the overall logic of the query. Correcting it requires changing the table (and column) in the FROM clause, which is a structural change affecting the query\u2019s meaning.",
            "error_type": "Global: incorrect table selection"
        },
        "bb19be85af3e917061a124154ac4818b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "What is the total number of patients who died in or before the year 2111? (Values: 2111.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOD_YEAR\" <= \"2111.0\"",
            "pred_sql": "error in generating sql: No",
            "gold_answer": "[['1.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is not even a valid query but an error message instead of SQL code. This is a localized syntax/format error in the prediction rather than a structural or logical flaw in the query design. It does not require rethinking the overall query logic, just producing valid SQL.",
            "error_type": "Local: syntax error"
        },
        "96c7391dfa5656e7563b51eacaee15a7": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find out the type of admission and lab test ordered for patient with patient id 18480.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",LAB.\"LABEL\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"18480\"",
            "pred_sql": "SELECT D.ADMISSION_TYPE, L.LABEL AS LAB_TEST, L.CHARTTIME FROM DEMOGRAPHIC D JOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID AND D.HADM_ID = L.HADM_ID WHERE D.SUBJECT_ID = 18480;",
            "gold_answer": "[['EMERGENCY', 'Alanine Aminotransferase (ALT)'], ['EMERGENCY', 'Alkaline Phosphatase'], ['EMERGENCY', 'Anion Gap'], ['EMERGENCY', 'Asparate Aminotransferase (AST)'], ['EMERGENCY', 'Basophils'], ['EMERGENCY', 'Bicarbonate'], ['EMERGENCY', 'Bilirubin, Total'], ['EMERGENCY', 'Calcium, Total'], ['EMERGENCY', 'Calcium, Total'], ['EMERGENCY', 'Chloride'], ['EMERGENCY', 'Creatine Kinase (CK)'], ['EMERGENCY', 'Creatine Kinase (CK)'], ['EMERGENCY', 'Creatine Kinase, MB Isoenzyme'], ['EMERGENCY', 'Creatine Kinase, MB Isoenzyme'], ['EMERGENCY', 'Creatinine'], ['EMERGENCY', 'Creatinine'], ['EMERGENCY', 'Eosinophils'], ['EMERGENCY', 'Glucose'], ['EMERGENCY', 'Glucose'], ['EMERGENCY', 'Hematocrit'], ['EMERGENCY', 'Hemoglobin'], ['EMERGENCY', 'Lymphocytes'], ['EMERGENCY', 'MCH'], ['EMERGENCY', 'MCHC'], ['EMERGENCY', 'MCV'], ['EMERGENCY', 'Magnesium'], ['EMERGENCY', 'Magnesium'], ['EMERGENCY', 'Monocytes'], ['EMERGENCY', 'Neutrophils'], ['EMERGENCY', 'PT'], ['EMERGENCY', 'PTT'], ['EMERGENCY', 'Phosphate'], ['EMERGENCY', 'Platelet Count'], ['EMERGENCY', 'Potassium'], ['EMERGENCY', 'Potassium'], ['EMERGENCY', 'Potassium, Whole Blood'], ['EMERGENCY', 'RDW'], ['EMERGENCY', 'Red Blood Cells'], ['EMERGENCY', 'Sodium'], ['EMERGENCY', 'Sodium'], ['EMERGENCY', 'Troponin T'], ['EMERGENCY', 'Troponin T'], ['EMERGENCY', 'Urea Nitrogen'], ['EMERGENCY', 'Urea Nitrogen'], ['EMERGENCY', 'White Blood Cells']]",
            "pred_answer": "[['EMERGENCY', 'Alanine Aminotransferase (ALT)', '2138-07-18 07:30:00'], ['EMERGENCY', 'Alkaline Phosphatase', '2138-07-18 07:30:00'], ['EMERGENCY', 'Anion Gap', '2138-07-18 00:30:00'], ['EMERGENCY', 'Asparate Aminotransferase (AST)', '2138-07-18 07:30:00'], ['EMERGENCY', 'Basophils', '2138-07-18 00:30:00'], ['EMERGENCY', 'Bicarbonate', '2138-07-18 00:30:00'], ['EMERGENCY', 'Bilirubin, Total', '2138-07-18 07:30:00'], ['EMERGENCY', 'Calcium, Total', '2138-07-18 00:30:00'], ['EMERGENCY', 'Calcium, Total', '2138-07-18 07:30:00'], ['EMERGENCY', 'Chloride', '2138-07-18 00:30:00'], ['EMERGENCY', 'Creatine Kinase (CK)', '2138-07-18 00:30:00'], ['EMERGENCY', 'Creatine Kinase (CK)', '2138-07-18 07:30:00'], ['EMERGENCY', 'Creatine Kinase, MB Isoenzyme', '2138-07-18 00:30:00'], ['EMERGENCY', 'Creatine Kinase, MB Isoenzyme', '2138-07-18 07:30:00'], ['EMERGENCY', 'Creatinine', '2138-07-18 00:30:00'], ['EMERGENCY', 'Creatinine', '2138-07-18 07:30:00'], ['EMERGENCY', 'Eosinophils', '2138-07-18 00:30:00'], ['EMERGENCY', 'Glucose', '2138-07-18 00:30:00'], ['EMERGENCY', 'Glucose', '2138-07-18 07:30:00'], ['EMERGENCY', 'Hematocrit', '2138-07-18 00:30:00'], ['EMERGENCY', 'Hemoglobin', '2138-07-18 00:30:00'], ['EMERGENCY', 'Lymphocytes', '2138-07-18 00:30:00'], ['EMERGENCY', 'MCH', '2138-07-18 00:30:00'], ['EMERGENCY', 'MCHC', '2138-07-18 00:30:00'], ['EMERGENCY', 'MCV', '2138-07-18 00:30:00'], ['EMERGENCY', 'Magnesium', '2138-07-18 00:30:00'], ['EMERGENCY', 'Magnesium', '2138-07-18 07:30:00'], ['EMERGENCY', 'Monocytes', '2138-07-18 00:30:00'], ['EMERGENCY', 'Neutrophils', '2138-07-18 00:30:00'], ['EMERGENCY', 'PT', '2138-07-18 00:30:00'], ['EMERGENCY', 'PTT', '2138-07-18 00:30:00'], ['EMERGENCY', 'Phosphate', '2138-07-18 00:30:00'], ['EMERGENCY', 'Platelet Count', '2138-07-18 00:30:00'], ['EMERGENCY', 'Potassium', '2138-07-18 00:30:00'], ['EMERGENCY', 'Potassium', '2138-07-18 07:30:00'], ['EMERGENCY', 'Potassium, Whole Blood', '2138-07-18 02:04:00'], ['EMERGENCY', 'RDW', '2138-07-18 00:30:00'], ['EMERGENCY', 'Red Blood Cells', '2138-07-18 00:30:00'], ['EMERGENCY', 'Sodium', '2138-07-18 00:30:00'], ['EMERGENCY', 'Sodium', '2138-07-18 07:30:00'], ['EMERGENCY', 'Troponin T', '2138-07-18 00:30:00'], ['EMERGENCY', 'Troponin T', '2138-07-18 07:30:00'], ['EMERGENCY', 'Urea Nitrogen', '2138-07-18 00:30:00'], ['EMERGENCY', 'Urea Nitrogen', '2138-07-18 07:30:00'], ['EMERGENCY', 'White Blood Cells', '2138-07-18 00:30:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the correct tables and join conditions (it even redundantly joins on SUBJECT_ID and HADM_ID, which doesn\u2019t alter the result), and the WHERE clause correctly filters SUBJECT_ID = 18480. The only mistake is that the SELECT clause includes an extra column (L.CHARTTIME) which was not asked for. This is a localized error in the SELECT projection and does not affect the overall query structure or logic.",
            "error_type": "Local: projection error"
        },
        "a9d92fdd4552de6119472381096c41d5": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients with drug type base were diagnosed with diaphragmatic hernia? (Values: Diaphragmatic hernia, BASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Diaphragmatic hernia\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"BASE\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE Diag.LONG_TITLE = 'Diaphragmatic hernia' AND P.DRUG_TYPE = 'BASE';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column for the diagnosis filter: it checks Diag.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE as required by the question. All joins and overall structure remain correct, and only the column reference in the WHERE clause is incorrect. This is a localized error that can be fixed by changing the column name without altering the query\u2019s logic or structure.",
            "error_type": "Local: column reference error"
        },
        "9bd46aa88780c8ffc2597327bfa6d9bd": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients whose admission type is urgent. (Values: URGENT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\"",
            "pred_sql": "error in generating sql: No",
            "gold_answer": "[['1.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is just the text \u201cerror in generating sql: No\u201d rather than any valid query. This is a localized syntax/formatting failure and does not involve mis\u2010joining tables or breaking overall logic. It\u2019s a missing or invalid SQL snippet, which is a local error.",
            "error_type": "Local: syntax error"
        },
        "0f58d7c3462afbaf1092a9aaca163031": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "specify type of admission of patient edward castillo (Values: Edward Castillo)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Edward Castillo\"",
            "pred_sql": "SELECT ADMISSION_TYPE FROM DEMOGRAPHIC WHERE NAME = 'EDWARD CASTILLO';",
            "gold_answer": "[['EMERGENCY']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL is nearly identical to the ground truth except for the literal value in the WHERE clause: it uses 'EDWARD CASTILLO' instead of the exact 'Edward Castillo'. This mismatch in the string literal causes no rows to match. The issue is isolated to the value in the WHERE clause and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "09e03098584e026254e4f3b57ab3f524": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "tell me the short title and icd9 code of procedure for patient alice nixon. (Values: Alice Nixon)",
            "gold_sql": "SELECT PROCEDURES.\"ICD9_CODE\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Alice Nixon\"",
            "pred_sql": "SELECT P.SHORT_TITLE, P.ICD9_CODE FROM PROCEDURES P INNER JOIN DEMOGRAPHIC D ON P.SUBJECT_ID = D.SUBJECT_ID AND P.HADM_ID = D.HADM_ID WHERE D.NAME = 'Alice Nixon';",
            "gold_answer": "[['3893.0', 'Venous cath NEC'], ['5101.0', 'Percutan aspiration gb'], ['5185.0', 'Endosc sphincterotomy']]",
            "pred_answer": "[['Endosc sphincterotomy', '5185.0'], ['Percutan aspiration gb', '5101.0'], ['Venous cath NEC', '3893.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the two queries is the order of the selected columns: the predicted SQL returns SHORT_TITLE then ICD9_CODE, whereas the expected output (and ground truth SQL\u2019s column order) is ICD9_CODE then SHORT_TITLE. The join structure, WHERE condition, and overall logic are correct. This is a localized mistake in the SELECT clause and doesn\u2019t affect query structure or table relationships.",
            "error_type": "Local: select-column-order error"
        },
        "0b8b878c3c9bf9cfc59e3c2528341dfa": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "for patient id 1875, specify the icd9 code and discharge time",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DISCHTIME\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"1875\"",
            "pred_sql": "SELECT D.ICD9_CODE, DE.DISCHTIME FROM DEMOGRAPHIC DE INNER JOIN DIAGNOSES D ON DE.SUBJECT_ID = D.SUBJECT_ID AND DE.HADM_ID = D.HADM_ID WHERE DE.SUBJECT_ID = 1875;",
            "gold_answer": "[['2120-04-22 12:27:00', 'Abn react-surg proc NEC'], ['2120-04-22 12:27:00', 'Hypertension NOS'], ['2120-04-22 12:27:00', 'Injury femoral nerve'], ['2120-04-22 12:27:00', 'Malig neo bladder NEC'], ['2120-04-22 12:27:00', 'Mitral valve disorder'], ['2120-04-22 12:27:00', 'Other postop infection']]",
            "pred_answer": "[['1888.0', '2120-04-22 12:27:00'], ['4019.0', '2120-04-22 12:27:00'], ['4240.0', '2120-04-22 12:27:00'], ['9561.0', '2120-04-22 12:27:00'], ['99859.0', '2120-04-22 12:27:00'], ['E8788', '2120-04-22 12:27:00']]",
            "is_correct": false,
            "reasoning": "The predicted query differs from the ground truth only in the SELECT clause: it returns ICD9_CODE and DISCHTIME, whereas the ground truth returns DISCHTIME and SHORT_TITLE. The discrepancy is purely a column reference/selection error in the SELECT list and does not affect joins, filters, or overall query structure.",
            "error_type": "Local: column-reference error"
        },
        "9b581dcd0f727ee14875356de70bafdd": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many female patients use a drug named theophylline sr? (Values: F, Theophylline SR)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND PRESCRIPTIONS.\"DRUG\" = \"Theophylline SR\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) AS FEMALE_PATIENTS USING_THEOPHYLLINE_SR FROM DEMOGRAPHIC D INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.GENDER = 'F' AND P.DRUG = 'Theophylline SR';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"USING_THEOPHYLLINE_SR\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to a syntax error in the SELECT clause: it attempts to assign two aliases without proper comma or AS usage (\"AS FEMALE_PATIENTS USING_THEOPHYLLINE_SR\"), which is a localized mistake in aliasing and does not affect the overall query logic or joins.",
            "error_type": "Local: syntax error"
        },
        "8404915c4b01e35617f74cf4e165a482": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Let me know the short title of diagnoses and procedures for the patient with patient id 74463.",
            "gold_sql": "SELECT DIAGNOSES.\"SHORT_TITLE\",PROCEDURES.\"SHORT_TITLE\" FROM DIAGNOSES INNER JOIN PROCEDURES on DIAGNOSES.HADM_ID = PROCEDURES.HADM_ID WHERE DIAGNOSES.\"SUBJECT_ID\" = \"74463\"",
            "pred_sql": "SELECT SHORT_TITLE FROM DIAGNOSES WHERE SUBJECT_ID = 74463 UNION SELECT SHORT_TITLE FROM PROCEDURES WHERE SUBJECT_ID = 74463;",
            "gold_answer": "[['Abn react-artif implant', 'Dx ultrasound-heart'], ['Abn react-artif implant', 'Extracorporeal circulat'], ['Abn react-artif implant', 'Opn/oth rep aort vlv-tis'], ['Aortic valve disorder', 'Dx ultrasound-heart'], ['Aortic valve disorder', 'Extracorporeal circulat'], ['Aortic valve disorder', 'Opn/oth rep aort vlv-tis'], ['Aortocoronary bypass', 'Dx ultrasound-heart'], ['Aortocoronary bypass', 'Extracorporeal circulat'], ['Aortocoronary bypass', 'Opn/oth rep aort vlv-tis'], ['Atrial fibrillation', 'Dx ultrasound-heart'], ['Atrial fibrillation', 'Extracorporeal circulat'], ['Atrial fibrillation', 'Opn/oth rep aort vlv-tis'], ['Crnry athrscl natve vssl', 'Dx ultrasound-heart'], ['Crnry athrscl natve vssl', 'Extracorporeal circulat'], ['Crnry athrscl natve vssl', 'Opn/oth rep aort vlv-tis'], ['DMII wo cmp nt st uncntr', 'Dx ultrasound-heart'], ['DMII wo cmp nt st uncntr', 'Extracorporeal circulat'], ['DMII wo cmp nt st uncntr', 'Opn/oth rep aort vlv-tis'], ['Hyperlipidemia NEC/NOS', 'Dx ultrasound-heart'], ['Hyperlipidemia NEC/NOS', 'Extracorporeal circulat'], ['Hyperlipidemia NEC/NOS', 'Opn/oth rep aort vlv-tis'], ['Hypertension NOS', 'Dx ultrasound-heart'], ['Hypertension NOS', 'Extracorporeal circulat'], ['Hypertension NOS', 'Opn/oth rep aort vlv-tis'], ['Hypoxemia', 'Dx ultrasound-heart'], ['Hypoxemia', 'Extracorporeal circulat'], ['Hypoxemia', 'Opn/oth rep aort vlv-tis'], ['Long-term use anticoagul', 'Dx ultrasound-heart'], ['Long-term use anticoagul', 'Extracorporeal circulat'], ['Long-term use anticoagul', 'Opn/oth rep aort vlv-tis'], ['Old myocardial infarct', 'Dx ultrasound-heart'], ['Old myocardial infarct', 'Extracorporeal circulat'], ['Old myocardial infarct', 'Opn/oth rep aort vlv-tis'], ['Secundum atrial sept def', 'Dx ultrasound-heart'], ['Secundum atrial sept def', 'Extracorporeal circulat'], ['Secundum atrial sept def', 'Opn/oth rep aort vlv-tis'], ['Seroma complicting proc', 'Dx ultrasound-heart'], ['Seroma complicting proc', 'Extracorporeal circulat'], ['Seroma complicting proc', 'Opn/oth rep aort vlv-tis'], ['Status-post ptca', 'Dx ultrasound-heart'], ['Status-post ptca', 'Extracorporeal circulat'], ['Status-post ptca', 'Opn/oth rep aort vlv-tis']]",
            "pred_answer": "[['Abn react-artif implant'], ['Aortic valve disorder'], ['Aortocoronary bypass'], ['Atrial fibrillation'], ['Crnry athrscl natve vssl'], ['DMII wo cmp nt st uncntr'], ['Dx ultrasound-heart'], ['Extracorporeal circulat'], ['Hyperlipidemia NEC/NOS'], ['Hypertension NOS'], ['Hypoxemia'], ['Long-term use anticoagul'], ['Old myocardial infarct'], ['Opn/oth rep aort vlv-tis'], ['Secundum atrial sept def'], ['Seroma complicting proc'], ['Status-post ptca']]",
            "is_correct": false,
            "reasoning": "The predicted SQL replaces the intended INNER JOIN between DIAGNOSES and PROCEDURES with a UNION, fundamentally changing the query\u2019s structure and semantics. Instead of returning paired short titles for diagnoses and procedures per patient stay, it returns a single deduplicated list of titles across both tables. Fixing this requires restructuring the entire query (replacing the UNION with a proper JOIN), so it is not a localized mistake.",
            "error_type": "Global: join error"
        },
        "827507e01ab3afc178dc211fd3c77e6e": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many patients with an american indian/alaska native ethnic background were born before 2129? (Values: AMERICAN INDIAN/ALASKA NATIVE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"AMERICAN INDIAN/ALASKA NATIVE\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2129\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DEMOGRAPHIC WHERE ETHNICITY = 'AMERICAN INDIAN/ALASKA NATIVE' AND YEAR(DOB) < 2129;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the YEAR(DOB) function, which does not exist in SQLite, resulting in a 'no such function: YEAR' error. This is a localized issue within the WHERE clause\u2019s date handling and does not affect the overall query structure or table relationships.",
            "error_type": "Local: other-temporal error"
        },
        "9f47c8570c1f03b0c2ced60a72ab7677": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find the diagnostic decription of diagnoses icd9 code v202. (Values: V202)",
            "gold_sql": "SELECT DIAGNOSES.\"LONG_TITLE\" FROM DIAGNOSES WHERE DIAGNOSES.\"ICD9_CODE\" = \"V202\"",
            "pred_sql": "SELECT SHORT_TITLE, LONG_TITLE FROM DIAGNOSES WHERE ICD9_CODE = 'V202';",
            "gold_answer": "[['Routine infant or child health check'], ['Routine infant or child health check']]",
            "pred_answer": "[['Routin child health exam', 'Routine infant or child health check'], ['Routin child health exam', 'Routine infant or child health check']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in the SELECT clause: it returns both SHORT_TITLE and LONG_TITLE instead of only LONG_TITLE. The WHERE clause and table are correct, and no joins or logic are altered. This is a localized mistake in the selected columns and does not affect the overall query structure or relationships.",
            "error_type": "Local: wrong-selection error"
        },
        "089d4aad2ec39bb2d761af30e107414a": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Get me the number of white russian patients born before year 2103. (Values: WHITE - RUSSIAN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"WHITE - RUSSIAN\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2103\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE ETHNICITY = 'WHITE' AND DOB_YEAR < 2103;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['41.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters for ETHNICITY = 'WHITE' instead of the exact value 'WHITE - RUSSIAN' specified in the question. This is a mistake in the WHERE clause literal value and does not alter the overall query structure or join logic. It can be fixed by changing the string literal to the correct ethnicity value.",
            "error_type": "Local: value parsing error"
        },
        "ba3064e8b2f13a2448df23529f31de36": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients with admission location as transfer from hosp/extram and were admitted before the year 2131? (Values: TRANSFER FROM HOSP/EXTRAM)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRANSFER FROM HOSP/EXTRAM\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2131\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE ADMISSION_LOCATION = 'TRANSFER FROM HOSP/EXTRAM' AND YEAR(ADMITTIME) < 2131;",
            "gold_answer": "[['3.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the SQLite-unsupported YEAR() function to extract the year from ADMITTIME. This is a local mistake confined to the WHERE clause (an invalid function call), not a deeper logical or structural issue with joins or overall query intent. It can be fixed by replacing YEAR() with the correct SQLite date extraction (e.g., strftime).",
            "error_type": "Local: function reference error"
        },
        "054cab8f7975c46d400a46a17beb5a13": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "which patients had emergency hospital admission? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\"",
            "pred_sql": "SELECT SUBJECT_ID, NAME FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'EMERGENCY';",
            "gold_answer": "[['62.0']]",
            "pred_answer": "[['10317.0', 'Wayne Shelton'], ['14755.0', 'Bessie Wood'], ['15061.0', 'Lue White'], ['15898.0', 'Hector Blair'], ['16438.0', 'Sheila Riley'], ['17519.0', 'Shirley Mckenney'], ['17570.0', 'Paul Dunn'], ['18351.0', 'Andrea Capone'], ['18480.0', 'Brian Taylor'], ['19620.0', 'Dawn Cooper'], ['23602.0', 'Ida Cook'], ['23733.0', 'Dominga Garvin'], ['25167.0', 'Cynthia Robinson'], ['26285.0', 'Cynthia Gomez'], ['26746.0', 'Antonio Mccarthy'], ['28588.0', 'Theresa Vedovelli'], ['29541.0', 'Estrella Carroll'], ['29767.0', 'Warren Baldwin'], ['29961.0', 'Jerry Deberry'], ['3369.0', 'Charles Nilsson'], ['3623.0', 'Joseph Dillman'], ['42067.0', 'Kurt Buczek'], ['42820.0', 'Evelyn Lawrence'], ['42963.0', 'Marilyn Norvell'], ['43220.0', 'Thomas Nazario'], ['4333.0', 'Kenneth Koeller'], ['4589.0', 'Tina Henderson'], ['45962.0', 'Jonathan Wiggins'], ['52012.0', 'Jane Dillard'], ['52118.0', 'John Gartman'], ['53707.0', 'Stephanie Suchan'], ['5506.0', 'Gonzalo Wasson'], ['55094.0', 'Lawrence Wibbens'], ['62296.0', 'Morgan Mcgee'], ['64208.0', 'Darlene Martin'], ['65652.0', 'Troy Friedman'], ['65759.0', 'Francis Baseler'], ['66411.0', 'Jennifer Fulgham'], ['67965.0', 'Richard Gundlach'], ['6983.0', 'Edward Castillo'], ['71798.0', 'Edward Schafer'], ['73843.0', 'Claude Chipman'], ['74345.0', 'Miguel Hodges'], ['76508.0', 'Alice Nixon'], ['81254.0', 'Paul Edwards'], ['813.0', 'Travis Hofman'], ['81923.0', 'Rafael Stewart'], ['83678.0', 'Albert Bauer'], ['8440.0', 'Gwendolyn Hager'], ['85673.0', 'Gus Marques'], ['87275.0', 'Josette Orr'], ['8990.0', 'Michelle Medina'], ['91588.0', 'Elizabeth Bateman'], ['9258.0', 'Bryant Johnson'], ['9271.0', 'Lynette Upham'], ['92796.0', 'Tiffany Williams'], ['93033.0', 'Michael Tyndall'], ['94756.0', 'Betty Campbell'], ['9575.0', 'Melinda Miers'], ['98220.0', 'Bruce Harris'], ['990.0', 'Dawn Brill'], ['99936.0', 'Caren Obrien']]",
            "is_correct": false,
            "reasoning": "The predicted SQL matches the correct table and WHERE clause but fails to apply the COUNT DISTINCT aggregation requested by the question. Instead, it selects SUBJECT_ID and NAME directly. This mistake is isolated to the SELECT clause and does not affect joins or the overall query structure, so it\u2019s a local error in the aggregation/SELECT list.",
            "error_type": "Local: incorrect-aggregation"
        },
        "8745cad27000303a093d0dd8dafeeb7b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients are with medicare insurance and admitted before 2187? (Values: Medicare)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicare\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2187\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS number_of_patients FROM DEMOGRAPHIC WHERE INSURANCE = 'Medicare' AND EXTRACT(YEAR FROM ADMITTIME) < 2187;",
            "gold_answer": "[['39.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the EXTRACT(YEAR FROM ADMITTIME) function, which is not supported in SQLite and leads to a syntax error. This mistake is localized to the WHERE clause (a function call error), and the overall query structure remains correct. Therefore, it is a local error in the use of a temporal extraction function.",
            "error_type": "Local: other-temporal error"
        },
        "855cf4c4ba2b077fb516522ce28ed18b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "let me know the drug code and route of administration of lidocaine jelly 2% (urojet). (Values: Lidocaine Jelly 2% (Urojet))",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Lidocaine Jelly 2% (Urojet)\"",
            "pred_sql": "SELECT FORMULARY_DRUG_CD AS \"Drug Code\", ROUTE AS \"Route of Administration\" FROM PRESCRIPTIONS WHERE DRUG = 'Lidocaine Jelly 2% (Urojet)';",
            "gold_answer": "[['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP']]",
            "pred_answer": "[['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to return the ROUTE column even though it\u2019s requested in the SELECT clause. This is a localized mistake in the SELECT clause (omitting or mis\u2010referencing a column), and does not affect joins, filters, or the overall query logic. Hence it is classified as a local error.",
            "error_type": "Local: missing-column-reference"
        },
        "83d2f2439a19c3bac564156d4d7f0f20": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients admitted in urgent care who were born before 2074. (Values: URGENT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2074\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'URGENT' AND YEAR(DOB) < 2074;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the function YEAR(DOB) which is not supported in SQLite, causing a runtime error. The issue is limited to this localized misuse of a date function and does not affect the overall query logic or table relationships. It can be fixed by replacing YEAR(DOB) with the correct column DOB_YEAR or by parsing the date differently, without restructuring the query.",
            "error_type": "Local: other-temporal error"
        },
        "84d545e92b1a3cabb755523ca1f95bd4": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Show the number of patients who are less than 60 years of age and have the drug code morp2i. (Values: 62, MORP2I)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"62\" AND PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"MORP2I\"",
            "pred_sql": "SELECT COUNT(D.SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC D INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.AGE < 60 AND P.FORMULARY_DRUG_CD = 'MORP2I';",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses AGE < 60 instead of AGE < 62 (wrong literal value) and omits DISTINCT in the COUNT, both of which are local mistakes in the WHERE and SELECT clauses. The overall join structure and logic remain intact.",
            "error_type": "Local: value-parsing error"
        },
        "0ef4e32954ca70a4dbc73fedfa9a6ba8": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "count the number of patients on norepinephrine prescription who were born before the year 2109. (Values: NORepinephrine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2109\" AND PRESCRIPTIONS.\"DRUG\" = \"NORepinephrine\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE P.DRUG = 'norepinephrine' AND CAST(substr(D.DOB, 1, 4) AS INT) < 2109;",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the wrong literal for the drug name ('norepinephrine' instead of the exact 'NORepinephrine') and it parses DOB instead of using the provided DOB_YEAR column. Both issues are confined to the WHERE clause and involve incorrect literal and column usage, without altering the overall join structure or logic of the query.",
            "error_type": "Local: value parsing error"
        },
        "885d89607152d4e994c2d3ea430b4cde": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of divorced patients who were admitted before 2120? (Values: DIVORCED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"DIVORCED\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2120\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_DIVORCED_PATIENTS FROM DEMOGRAPHIC WHERE MARITAL_STATUS = 'DIVORCED' AND EXTRACT(YEAR FROM ADMITTIME) < 2120;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the EXTRACT(YEAR FROM ADMITTIME) syntax, which is not supported in SQLite, leading to a syntax error. This mistake is confined to the WHERE clause\u2019s date extraction and does not alter the overall query structure or table relationships.",
            "error_type": "Local: other-temporal error"
        },
        "aa7c9f0f083c93b10c5c1410673a02c2": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients who were admitted before the year 2174 and given the drug cefpodoxime proxetil. (Values: Cefpodoxime Proxetil)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2174\" AND PRESCRIPTIONS.\"DRUG\" = \"Cefpodoxime Proxetil\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.ADMITYEAR < 2174 AND P.DRUG = 'cefpodoxime proxetil';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query differs from the ground truth only in the literal values used in the WHERE clause: it uses an unquoted numeric literal for the year (which is fine) and a lowercase string 'cefpodoxime proxetil' instead of the correctly cased 'Cefpodoxime Proxetil'. This mismatch in string casing prevents any rows from matching the filter, but the overall query structure, joins, and logic remain correct. This is a localized error in the value parsing/comparison in the WHERE clause, not a structural or logical join issue.",
            "error_type": "Local: value parsing error"
        },
        "a8e264d96300a69aa8dea07d6d507559": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Out of total number of patients admitted before the year 2150, how many of them had hemodialysis? (Values: Hemodialysis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2150\" AND PROCEDURES.\"LONG_TITLE\" = \"Hemodialysis\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) AS NUMBER_OF_PATIENTS_WITH_HEMODIALYSIS FROM DEMOGRAPHIC DG JOIN PROCEDURES P ON DG.SUBJECT_ID = P.SUBJECT_ID AND DG.HADM_ID = P.HADM_ID WHERE DG.ADMITYEAR < 2150 AND P.ICD9_CODE = '3995';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "no such column: D.SUBJECT_ID",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the alias DG for the DEMOGRAPHIC table but then references D.SUBJECT_ID in the SELECT clause, which is undefined. This is a column reference error localized to a single part of the query and does not affect the overall logic.",
            "error_type": "Local: table/column reference error"
        },
        "0b70acfbd0281aad4f480933ffe64361": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "tell me the short title of diagnoses and days for which patient with patient id 74463 was hospitalized.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"74463\"",
            "pred_sql": "SELECT D.SHORT_TITLE, DG.DAYS_STAY FROM DIAGNOSES D JOIN DEMOGRAPHIC DG ON D.HADM_ID = DG.HADM_ID WHERE D.SUBJECT_ID = 74463 GROUP BY D.HADM_ID;",
            "gold_answer": "[['14.0', 'Abn react-artif implant'], ['14.0', 'Aortic valve disorder'], ['14.0', 'Aortocoronary bypass'], ['14.0', 'Atrial fibrillation'], ['14.0', 'Crnry athrscl natve vssl'], ['14.0', 'DMII wo cmp nt st uncntr'], ['14.0', 'Hyperlipidemia NEC/NOS'], ['14.0', 'Hypertension NOS'], ['14.0', 'Hypoxemia'], ['14.0', 'Long-term use anticoagul'], ['14.0', 'Old myocardial infarct'], ['14.0', 'Secundum atrial sept def'], ['14.0', 'Seroma complicting proc'], ['14.0', 'Status-post ptca']]",
            "pred_answer": "[['Old myocardial infarct', '14.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a GROUP BY on D.HADM_ID, which is not required and causes the query to return only one arbitrary diagnosis per admission instead of all diagnoses. The error is confined to the GROUP BY clause and does not affect the overall join logic or table relationships. This is a localized mistake in grouping rather than a structural or multi-clause logical flaw.",
            "error_type": "Local: incorrect GROUP BY error"
        },
        "b9e81a7036c0f844cb0892ae06fa0c72": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what number of patients have diagnosis short title as rec depr psych-psychotic and drug route as po/ng? (Values: Rec depr psych-psychotic, PO/NG)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Rec depr psych-psychotic\" AND PRESCRIPTIONS.\"ROUTE\" = \"PO/NG\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DIAGNOSES D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.SHORT_TITLE = 'rec depr psych-psychotic' AND P.ROUTE = 'PO/NG';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to return any rows because it uses a lowercase literal 'rec depr psych-psychotic' which does not match the stored value 'Rec depr psych-psychotic' (SQLite string comparisons are case-sensitive by default). This is a localized error in the WHERE clause value matching and does not require reshaping the query\u2019s overall join structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "aa253b41030e5c298e4ceb1c7aef9028": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients below the age of 56 were given voriconazole? (Values: Voriconazole)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"56\" AND PRESCRIPTIONS.\"DRUG\" = \"Voriconazole\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS Demo JOIN PRESCRIPTIONS AS Pre ON Demo.SUBJECT_ID = Pre.SUBJECT_ID AND Demo.HADM_ID = Pre.HADM_ID WHERE Demo.AGE < 56 AND Pre.DRUG = 'Voriconazole';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: D.SUBJECT_ID",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the alias Demo for DEMOGRAPHIC but in the SELECT clause refers to D.SUBJECT_ID, an undefined alias. This is a simple table/column reference error that can be fixed by using the correct alias (Demo) without changing the overall query structure.",
            "error_type": "Local: table/column reference error"
        },
        "088bfde60ac975ad3850f459ad47cfef": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "bring the number of patients younger than 48 years who were admitted before 2138.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"AGE\" < \"48\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2138\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE AGE < 48 AND YEAR(ADMITTIME) < 2138;",
            "gold_answer": "[['12.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the non-existent YEAR() function on ADMITTIME in SQLite instead of using the ADMITYEAR column as specified. This is a localized mistake in the WHERE clause\u2019s temporal handling and does not require restructuring the joins or overall logic of the query. It violates the SQL assumptions but is fixable by replacing YEAR(ADMITTIME) with ADMITYEAR.",
            "error_type": "Local: other-temporal error"
        },
        "bb83ddfb2b58462bada823ae66a5bd95": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many patients born before the year 2109 were primarily diagnosed for guillain barre syndrome? (Values: GUILLAIN BARRE SYNDROME)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"GUILLAIN BARRE SYNDROME\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2109\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC DM JOIN DIAGNOSES D ON DM.SUBJECT_ID = D.SUBJECT_ID AND DM.HADM_ID = D.HADM_ID WHERE DM.DOB_YEAR < 2109 AND D.LONG_TITLE LIKE '%Guillain Barre Syndrome%';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic by joining the DIAGNOSES table and filtering on its LONG_TITLE rather than using the DEMOGRAPHIC.DIAGNOSIS field for the primary diagnosis. This alters the meaning of \u201cprimarily diagnosed\u201d and requires restructuring the query beyond a simple fix in one clause.",
            "error_type": "Global: incorrect join relationship"
        },
        "07f753a76f9e4833bde8ba792b3b14c9": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the primary disease and diagnosis of the patient id 8440?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"8440\"",
            "pred_sql": "SELECT DIAGNOSIS AS primary_disease FROM DEMOGRAPHIC WHERE SUBJECT_ID = 8440;",
            "gold_answer": "[['GASTROINTESTINAL BLEED', 'Acute kidney failure NOS'], ['GASTROINTESTINAL BLEED', 'Acute respiratry failure'], ['GASTROINTESTINAL BLEED', 'Atrial fibrillation'], ['GASTROINTESTINAL BLEED', 'CHF NOS'], ['GASTROINTESTINAL BLEED', 'Chr airway obstruct NEC'], ['GASTROINTESTINAL BLEED', 'Chr blood loss anemia'], ['GASTROINTESTINAL BLEED', 'Gastrointest hemorr NOS'], ['GASTROINTESTINAL BLEED', 'Hyperosmolality'], ['GASTROINTESTINAL BLEED', 'Hypertension NOS'], ['GASTROINTESTINAL BLEED', 'Shock w/o trauma NEC'], ['GASTROINTESTINAL BLEED', 'Urin tract infection NOS']]",
            "pred_answer": "[['GASTROINTESTINAL BLEED']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the DIAGNOSES table entirely and only selects the primary disease from DEMOGRAPHIC, failing to retrieve the diagnosis information (SHORT_TITLE). This is not a minor syntax or column reference mistake but a structural issue: the necessary JOIN to DIAGNOSES is missing and the query\u2019s logic is fundamentally altered. Fixing it requires adding the join and selecting the second column, which affects the overall query structure.",
            "error_type": "Global: missing join error"
        },
        "a7c671e965437f33b28e21cf6bfe9e89": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find the number of greek orthodox religious background patients who were born before 2085. (Values: GREEK ORTHODOX)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"RELIGION\" = \"GREEK ORTHODOX\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2085\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE RELIGION = 'GREEK ORTHODOX' AND YEAR(DOB) < 2085;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the YEAR(DOB) function, which does not exist in SQLite. This is a localized error in the WHERE clause\u2019s temporal function usage and does not require restructuring the query logic or joins. It can be fixed by directly comparing the DOB_YEAR column or rewriting the function usage.",
            "error_type": "Local: other-temporal error"
        },
        "a61ac4f658e87123f6f32e8d02272ab4": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Specify the code forTopiramate (Topamax)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Topiramate (Topamax)\"",
            "pred_sql": "SELECT DRUG FROM PRESCRIPTIONS WHERE DRUG = 'Topiramate';",
            "gold_answer": "[['TOPI100'], ['TOPI100'], ['TOPI25']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only selects the DRUG column instead of the FORMULARY_DRUG_CD column and filters on 'Topiramate' rather than the exact string 'Topiramate (Topamax)'. These are localized mistakes in the SELECT and WHERE clauses (wrong column reference and wrong literal), but the overall query structure remains correct, so this is a local error.",
            "error_type": "Local: value parsing error"
        },
        "86e5e1517891514bdb1d0018f85f9a34": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many patients admitted before 2112 are diagnosed with retained plastic fragments? (Values: Retained plastic fragments)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2112\" AND DIAGNOSES.\"LONG_TITLE\" = \"Retained plastic fragments\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC DM JOIN DIAGNOSES D ON DM.SUBJECT_ID = D.SUBJECT_ID AND DM.HADM_ID = D.HADM_ID WHERE EXTRACT(YEAR FROM DM.ADMITTIME) < 2112 AND (D.SHORT_TITLE LIKE '%retained plastic fragments%' OR D.LONG_TITLE LIKE '%retained plastic fragments%');",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the EXTRACT(YEAR FROM ...) function, which is not supported in SQLite, causing a syntax error at FROM. This error is confined to the WHERE clause\u2019s temporal function and does not require altering the overall query structure or logic (the join and filtering logic remain intact). Thus, it is a local, function/temporal error.",
            "error_type": "Local: other-temporal error"
        },
        "ade0c7463c725d9219e5712adb45de85": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "count the number of married patients who have acute myocardial infarction of inferolateral wall, initial episode of care diagnoses. (Values: MARRIED, AMI inferolateral, init)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DIAGNOSES.\"SHORT_TITLE\" = \"AMI inferolateral, init\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID WHERE D.MARITAL_STATUS = 'MARRIED' AND Diag.ICD9_CODE = '41062';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error is limited to the WHERE clause filter: it uses Diag.ICD9_CODE = '41062' instead of matching the SHORT_TITLE = 'AMI inferolateral, init'. All other joins and query structure mirror the ground truth. This is a localized mistake in the filter condition and doesn\u2019t require changing the overall query logic or joins.",
            "error_type": "Local: incorrect filter column/value"
        },
        "0b323ac6dda39e3588c51d5acb16b4fc": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many of the patients had hemorrhage complicating a procedure? (Values: Hemorrhage complicating a procedure)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"LONG_TITLE\" = \"Hemorrhage complicating a procedure\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PROCEDURES WHERE ICD9_CODE = '9981';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL entirely uses the PROCEDURES table and the ICD9_CODE column, whereas the question refers to the LONG_TITLE in the DIAGNOSES table. This is not a minor syntax or column name mistake but a fundamental change in the data source and logic of the query. It alters the intended join and target table, leading to incorrect results. Such an error affects the overall query structure and logic, so it is a Global error.",
            "error_type": "Global: wrong-table error"
        },
        "9cfe1b1262ebc011c0cd636a6bd1b9f7": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Calculate the total number of patients on elective admission before the year 2108 (Values: ELECTIVE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2108\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS TOTAL_ELECTIVE_PATIENTS FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'ELECTIVE' AND YEAR(ADMITTIME) < 2108;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the YEAR() function on the ADMITTIME column, which is not supported in SQLite, resulting in the \u201cno such function: YEAR\u201d error. This mistake is confined to a specific part of the WHERE clause and does not affect the overall join structure or query logic. It can be fixed by extracting the year using a supported SQLite function or by comparing the ADMITYEAR field as in the ground truth, without restructuring the query.",
            "error_type": "Local: function usage error"
        },
        "089c48c2efaae378b3bf7ee0d9fa4894": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "count the number of patients admitted before 2200 who had endoscopic removal of stone(s) from biliary tract. (Values: Endoscopic removal of stone(s) from biliary tract)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2200\" AND PROCEDURES.\"LONG_TITLE\" = \"Endoscopic removal of stone(s) from biliary tract\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PROCEDURES P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE P.ICD9_CODE = '3910' AND YEAR(D.ADMITTIME) < 2200;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a non-existent SQLite function YEAR(D.ADMITTIME) and also mismatches on P.ICD9_CODE instead of filtering by the LONG_TITLE. Both issues are confined to specific expressions (a wrong function call and wrong filter column) and do not require restructuring the overall join or query logic. Hence this is a local error.",
            "error_type": "Local: other-temporal error"
        },
        "8905a4c9ce097875ed1f8db4c3ff14bd": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients admitted before 2166 with procedure icd9 code 5011?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2166\" AND PROCEDURES.\"ICD9_CODE\" = \"5011\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PROCEDURES P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE P.ICD9_CODE = '5011' AND EXTRACT(YEAR FROM D.ADMITTIME) < 2166;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to a syntax error when using EXTRACT(YEAR FROM D.ADMITTIME), which SQLite does not support. The rest of the query structure (SELECT, JOIN, WHERE on ICD9_CODE) aligns with the intended logic. This is a localized misuse of a datetime function rather than a structural or multi-clause logical error, so it can be fixed by replacing EXTRACT with the correct SQLite date function.",
            "error_type": "Local: other-temporal error"
        },
        "85c64a20b4ee9ba7566059a53b6fcdb3": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Bring the list of patients younger than 41 years of age who had a blood lab test done. (Values: Blood)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"41\" AND LAB.\"FLUID\" = \"Blood\"",
            "pred_sql": "SELECT d.NAME FROM DEMOGRAPHIC d INNER JOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID AND d.HADM_ID = l.HADM_ID WHERE d.AGE < 41 AND l.FLUID = 'Blood';",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone'], ['Andrea Capone']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended output and query structure. The original task requires returning the count of distinct patients under 41 who had a blood lab test. The predicted query instead selects patient names (with duplicates) rather than computing a distinct count. This is not a simple column reference or syntax mistake\u2014it alters the core aggregation and output of the query. Therefore, this is a global error affecting the overall logic and structure of the SQL.",
            "error_type": "Global: wrong-aggregation/selection error"
        },
        "807ed8b40c34e53869ce863c6ed2489a": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "which lab test has been ordered for patient with patient id 19187?",
            "gold_sql": "SELECT LAB.\"LABEL\" FROM LAB WHERE LAB.\"SUBJECT_ID\" = \"19187\"",
            "pred_sql": "SELECT LABEL, CHARTTIME, VALUE_UNIT, FLAG FROM LAB WHERE SUBJECT_ID = 19187;",
            "gold_answer": "[['Alveolar-arterial Gradient'], ['Atypical Lymphocytes'], ['Bands'], ['Base Excess'], ['Basophils'], ['Bilirubin, Direct'], ['Bilirubin, Indirect'], ['Bilirubin, Total'], ['Calculated Total CO2'], ['Eosinophils'], ['Hematocrit'], ['Hemoglobin'], ['Lymphocytes'], ['MCH'], ['MCHC'], ['MCV'], ['Metamyelocytes'], ['Monocytes'], ['Myelocytes'], ['Neutrophils'], ['Nucleated Red Cells'], ['Other Cells'], ['Platelet Count'], ['RDW'], ['Red Blood Cells'], ['White Blood Cells'], ['pCO2'], ['pH'], ['pO2']]",
            "pred_answer": "[['Alveolar-arterial Gradient', '2170-10-04 14:34:00', '598mm Hg', 'None'], ['Atypical Lymphocytes', '2170-10-04 14:30:00', '4%', 'abnormal'], ['Bands', '2170-10-04 14:30:00', '5%', 'None'], ['Base Excess', '2170-10-04 14:34:00', '-1mEq/L', 'None'], ['Basophils', '2170-10-04 14:30:00', '0%', 'None'], ['Bilirubin, Direct', '2170-10-08 07:20:00', '0.2mg/dL', 'None'], ['Bilirubin, Indirect', '2170-10-08 07:20:00', '8.3mg/dL', 'None'], ['Bilirubin, Total', '2170-10-08 07:20:00', '8.5mg/dL', 'None'], ['Calculated Total CO2', '2170-10-04 14:34:00', '26mEq/L', 'None'], ['Eosinophils', '2170-10-04 14:30:00', '2%', 'None'], ['Hematocrit', '2170-10-04 14:30:00', '35.9%', 'None'], ['Hemoglobin', '2170-10-04 14:30:00', '12.3g/dL', 'None'], ['Lymphocytes', '2170-10-04 14:30:00', '20%', 'abnormal'], ['MCH', '2170-10-04 14:30:00', '36.0pg', 'abnormal'], ['MCHC', '2170-10-04 14:30:00', '34.1%', 'None'], ['MCV', '2170-10-04 14:30:00', '106fL', 'abnormal'], ['Metamyelocytes', '2170-10-04 14:30:00', '0%', 'None'], ['Monocytes', '2170-10-04 14:30:00', '4%', 'None'], ['Myelocytes', '2170-10-04 14:30:00', '0%', 'None'], ['Neutrophils', '2170-10-04 14:30:00', '61%', 'abnormal'], ['Nucleated Red Cells', '2170-10-04 14:30:00', '5%', 'abnormal'], ['Other Cells', '2170-10-04 14:30:00', '4%', 'abnormal'], ['Platelet Count', '2170-10-04 14:30:00', '294K/uL', 'None'], ['RDW', '2170-10-04 14:30:00', '17.5%', 'abnormal'], ['Red Blood Cells', '2170-10-04 14:30:00', '3.40m/uL', 'abnormal'], ['White Blood Cells', '2170-10-04 14:30:00', '20.10K/uL', 'abnormal'], ['pCO2', '2170-10-04 14:34:00', '46mm Hg', 'abnormal'], ['pH', '2170-10-04 14:34:00', '7.34units', 'abnormal'], ['pO2', '2170-10-04 14:34:00', '94mm Hg', 'None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in the SELECT clause by returning extra columns (CHARTTIME, VALUE_UNIT, FLAG) that are not asked for. The table references, filtering condition, and overall query structure remain correct. This is a localized mistake in the projection, not a logical or structural flaw spanning multiple clauses.",
            "error_type": "Local: projection error"
        },
        "9261992dc4b198991f00da4ec688e081": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find out the short title of diagnoses and health insurance by which patient with patient id 9271 is covered.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"9271\"",
            "pred_sql": "SELECT D.INSURANCE AS \"Health Insurance\", Diag.SHORT_TITLE AS \"Diagnosis Short Title\" FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID WHERE D.SUBJECT_ID = 9271;",
            "gold_answer": "[['Medicare', 'Adv eff corticosteroids'], ['Medicare', 'Asthma NOS w (ac) exac'], ['Medicare', 'Av block-2nd degree NEC'], ['Medicare', 'Bipol I cur depres NOS'], ['Medicare', 'CHF NOS'], ['Medicare', 'Chr diastolic hrt fail'], ['Medicare', 'Chr kidney dis stage III'], ['Medicare', 'DMII hprosmlr uncontrold'], ['Medicare', 'Hx-ven thrombosis/embols'], ['Medicare', 'Hy kid NOS w cr kid I-IV'], ['Medicare', 'Parox ventric tachycard'], ['Medicare', 'Pneumonia, organism NOS']]",
            "pred_answer": "[['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare'], ['Medicare']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only returns the insurance values (one column) and fails to produce the diagnosis short titles. This is a mistake confined to the SELECT clause\u2014simply adding the missing column (Diag.SHORT_TITLE) would correct it. The overall join logic and WHERE filter are correct, so this is a localized error in the SELECT list.",
            "error_type": "Local: select-clause error"
        },
        "ace7d9f8651d4d512565d6be3b6a9a6c": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Out of total number of patients who had a lab test for pleural fluid, how many of them were born before 2089? (Values: Pleural)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2089\" AND LAB.\"FLUID\" = \"Pleural\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) AS count_patients FROM LAB L INNER JOIN DEMOGRAPHIC D ON L.SUBJECT_ID = D.SUBJECT_ID AND L.HADM_ID = D.HADM_ID WHERE L.FLUID = 'Pleural' AND YEAR(D.DOB) < 2089;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the non-existent SQLite function YEAR(D.DOB). This error occurs in the WHERE clause when extracting the year from DOB, and it does not alter the overall structure or logic of the join or aggregation. It is a localized syntax/temporal extraction error that can be fixed without restructuring the query (e.g., by using the DOB_YEAR column).",
            "error_type": "Local: other-temporal error"
        },
        "0f6b59272b612f8e29af1fcd07b26294": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Calculate the average age of living patients admitted to the hospital on or after 2121. (Values: 0)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DEMOGRAPHIC.\"ADMITYEAR\" >= \"2121\"",
            "pred_sql": "SELECT AVG(AGE) AS average_age FROM DEMOGRAPHIC WHERE EXPIRE_FLAG = 0 AND YEAR(ADMITTIME) >= 2121;",
            "gold_answer": "[['41.742']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted query matches the intended logic (AVG age, EXPIRE_FLAG filter, admission year filter) but uses the non\u2010existent SQLite function YEAR(), causing a \u201cno such function\u201d error. This is a localized syntax/function call error within the WHERE clause and does not alter the overall query structure or logic.",
            "error_type": "Local: syntax error"
        },
        "8f5493ee584c51f03ef9579a071809eb": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the lab test name and lab test category for subject id 17787?",
            "gold_sql": "SELECT LAB.\"LABEL\",LAB.\"CATEGORY\" FROM LAB WHERE LAB.\"SUBJECT_ID\" = \"17787\"",
            "pred_sql": "SELECT LABEL AS \"Lab Test Name\", CATEGORY AS \"Lab Test Category\" FROM LAB WHERE SUBJECT_ID = 17787;",
            "gold_answer": "[['Atypical Lymphocytes', 'Hematology'], ['Bands', 'Hematology'], ['Basophils', 'Hematology'], ['Bilirubin, Direct', 'Chemistry'], ['Bilirubin, Indirect', 'Chemistry'], ['Bilirubin, Total', 'Chemistry'], ['Eosinophils', 'Hematology'], ['Hematocrit', 'Hematology'], ['Hemoglobin', 'Hematology'], ['Lymphocytes', 'Hematology'], ['MCH', 'Hematology'], ['MCHC', 'Hematology'], ['MCV', 'Hematology'], ['Metamyelocytes', 'Hematology'], ['Monocytes', 'Hematology'], ['Myelocytes', 'Hematology'], ['Neutrophils', 'Hematology'], ['Nucleated Red Cells', 'Hematology'], ['Platelet Count', 'Hematology'], ['RDW', 'Hematology'], ['Red Blood Cells', 'Hematology'], ['White Blood Cells', 'Hematology']]",
            "pred_answer": "[['Atypical Lymphocytes'], ['Bands'], ['Basophils'], ['Bilirubin, Direct'], ['Bilirubin, Indirect'], ['Bilirubin, Total'], ['Eosinophils'], ['Hematocrit'], ['Hemoglobin'], ['Lymphocytes'], ['MCH'], ['MCHC'], ['MCV'], ['Metamyelocytes'], ['Monocytes'], ['Myelocytes'], ['Neutrophils'], ['Nucleated Red Cells'], ['Platelet Count'], ['RDW'], ['Red Blood Cells'], ['White Blood Cells']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only returns the lab test name column, omitting the lab test category even though the alias is present. This is a mistake in the SELECT clause (a missing column), but the overall query structure, table references, and WHERE condition are correct. It doesn\u2019t require reworking JOINs, subqueries, or the logic\u2014just fixing the SELECT to actually include CATEGORY. Therefore it\u2019s a localized column reference error.",
            "error_type": "Local: column reference error"
        },
        "04531e28c38575a4a2bdca04faa8c77b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients are diagnosed with hx traumatic fracture? (Values: Hx traumatic fracture)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Hx traumatic fracture\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) FROM DEMOGRAPHIC WHERE DIAGNOSIS = 'hx traumatic fracture';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the DIAGNOSES table and uses the DEMOGRAPHIC.DIAGNOSIS column instead of joining to DIAGNOSES and filtering on SHORT_TITLE. This is not a simple typo or local fix; it changes the query\u2019s entire logic and table relationships, resulting in incorrect counts. Thus it\u2019s a global error in the query\u2019s structure and join logic.",
            "error_type": "Global: join error"
        },
        "a030ada88d94f5f9ef48362a97aa413d": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the total number of patiemts who had crnry athrscl natve vssl? (Values: Crnry athrscl natve vssl)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Crnry athrscl natve vssl\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) AS TOTAL_PATIENTS FROM DIAGNOSES D WHERE D.SHORT_TITLE LIKE '%CORONARY%' OR D.LONG_TITLE LIKE '%ATHEROSCLEROTIC NATIVE VESSEL%';",
            "gold_answer": "[['19.0']]",
            "pred_answer": "[['11.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s structure (SELECT COUNT DISTINCT from DIAGNOSES) could work since SUBJECT_ID is present there, so no major restructuring is required. The mistake lies solely in the WHERE clause filter: it uses LIKE '%CORONARY%' OR LIKE '%ATHEROSCLEROTIC NATIVE VESSEL%' instead of matching the exact SHORT_TITLE \"Crnry athrscl natve vssl\". This is a localized error in the conditionals (value parsing/matching) and does not affect joins, overall logic, or multi-clause consistency.",
            "error_type": "Local: incorrect filter condition"
        },
        "a5d9467737164f47007345c4893d763f": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "which patients have drug code bactss? (Values: BACTSS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"BACTSS\"",
            "pred_sql": "SELECT D.NAME FROM DEMOGRAPHIC D INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID WHERE P.FORMULARY_DRUG_CD = 'BACTSS';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['Antonio Mccarthy'], ['Antonio Mccarthy'], ['Antonio Mccarthy'], ['Gwendolyn Hager'], ['Jason Walker'], ['Warren Baldwin']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the overall query logic by selecting patient names instead of counting distinct patient IDs, and also uses a different join key (SUBJECT_ID rather than HADM_ID). This is not a simple typo or localized clause mistake but a structural change that alters the intended output and aggregation, so it is a global error.",
            "error_type": "Global: structural-flaw"
        },
        "95832f8e506136d97ce571b7c5bd4e10": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the diagnosis code and diagnosis long title for the patient francis baseler? (Values: Francis Baseler)",
            "gold_sql": "SELECT DIAGNOSES.\"ICD9_CODE\",DIAGNOSES.\"LONG_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Francis Baseler\"",
            "pred_sql": "SELECT D.ICD9_CODE AS \"Diagnosis Code\", D.LONG_TITLE AS \"Diagnosis Long Title\" FROM DEMOGRAPHIC DG INNER JOIN DIAGNOSES D ON DG.SUBJECT_ID = D.SUBJECT_ID AND DG.HADM_ID = D.HADM_ID WHERE DG.NAME = 'Francis Baseler';",
            "gold_answer": "[['2639.0', 'Unspecified protein-calorie malnutrition'], ['2809.0', 'Iron deficiency anemia, unspecified'], ['2869.0', 'Other and unspecified coagulation defects'], ['29634.0', 'Major depressive affective disorder, recurrent episode, severe, specified as with psychotic behavior'], ['30400.0', 'Opioid type dependence, unspecified'], ['33829.0', 'Other chronic pain'], ['34690.0', 'Migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['56962.0', 'Mechanical complication of colostomy and enterostomy'], ['5771.0', 'Chronic pancreatitis'], ['E9530', 'Suicide and self-inflicted injury by hanging'], ['V489', 'Unspecified problem with head, neck, or trunk'], ['V6284', 'Suicidal ideation']]",
            "pred_answer": "[['2639.0'], ['2809.0'], ['2869.0'], ['29634.0'], ['30400.0'], ['33829.0'], ['34690.0'], ['56962.0'], ['5771.0'], ['E9530'], ['V489'], ['V6284']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to return the diagnosis long title even though the ground truth asks for both ICD9_CODE and LONG_TITLE. This is a localized mistake in the SELECT clause (omitting the second column), not a fundamental issue with joins or overall query structure.",
            "error_type": "Local: column reference error"
        },
        "82d3695ec5d890238e93c72e3a314ced": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how any patients have been given nitroglycerin ointment 2%? (Values: Nitroglycerin Ointment  2%)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Nitroglycerin Ointment  2%\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PRESCRIPTIONS WHERE DRUG = 'Nitroglycerin Ointment 2%';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses PRESCRIPTIONS.DRUG = 'Nitroglycerin Ointment 2%' (one space) whereas the actual value in the data is 'Nitroglycerin Ointment  2%' (two spaces). This is a mismatch in the literal string used in the WHERE clause. The rest of the query structure (COUNT DISTINCT on SUBJECT_ID in PRESCRIPTIONS) is fine. The error is confined to an incorrect literal value in the WHERE clause and does not affect the overall query logic or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "a098cd2cff8c0c939b8ee19c13e38c43": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Provide the number of patients that died and had a primary disease of ST elevated myocardial infarction/cardiac cath. (Values: 1, ST ELEVATED MYOCARDIAL INFARCTION\\CARDIAC CATH)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"ST ELEVATED MYOCARDIAL INFARCTION\\CARDIAC CATH\"",
            "pred_sql": "error in generating sql: No",
            "gold_answer": "[['0.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL failed to produce any valid query (\u2018error in generating sql: No\u2019), indicating a localized generation/syntax issue rather than a flaw in the overall query logic or structure. No tables, joins, or conditions were even attempted, so it\u2019s a fixable, localized syntax/generation error.",
            "error_type": "Local: syntax error"
        },
        "95c9f21405c27dccc93ef6c419bf3626": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Which patients have an abnormal test with lab test item id 51255?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51255\" AND LAB.\"FLAG\" = \"abnormal\"",
            "pred_sql": "SELECT DISTINCT D.NAME FROM DEMOGRAPHIC D INNER JOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID AND D.HADM_ID = L.HADM_ID WHERE L.ITEMID = 51255 AND L.FLAG IS NOT NULL;",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['Albert Bauer'], ['Charley Pescatore'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Glen Tanner'], ['Glen Tanner'], ['Glen Tanner'], ['Jerry Deberry'], ['Kenneth Koeller'], ['Wilbur Braatz']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the intended logic in multiple ways. First, it filters on L.FLAG IS NOT NULL instead of explicitly requiring L.FLAG = 'abnormal'. Second, it selects patient names rather than counting distinct subject IDs as the question requests. These errors alter both the filtering criterion and the overall shape/purpose of the query, so the mistake spans multiple clauses and changes the query\u2019s fundamental logic.",
            "error_type": "Global: incorrect filter and projection"
        },
        "94fac89f564ff4ef5cafb4d446f7035d": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients whose admission type is emergency and born before 2066? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2066\"",
            "pred_sql": "SELECT COUNT(*) AS patient_count FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'EMERGENCY' AND YEAR(DOB) < 2066;",
            "gold_answer": "[['18.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the YEAR(DOB) function to extract the year from the DOB column, but SQLite does not support a built-in YEAR() function. This is a localized error within the WHERE clause rather than a flaw in the overall query structure or logic.",
            "error_type": "Local: other-temporal error"
        }
    },
    "qwen3": {
        "0e8272658be25dcd6baac667cfc23de8": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "How many of the patients with sigmoid diverticulitis colovestical fistula/SDA as their primary disease had a hospital stay for more than 26 days? (Values: SIGMOID DIVERTICULITIS, COLOVESTICAL FISTULA/SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"SIGMOID DIVERTICULITIS, COLOVESTICAL FISTULA/SDA\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"26\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DEMOGRAPHIC WHERE DIAGNOSIS IN ('SIGMOID DIVERTICULITIS', 'COLOVESTICAL FISTULA/SDA') AND DAYS_STAY > 26;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DIAGNOSIS IN ('SIGMOID DIVERTICULITIS', 'COLOVESTICAL FISTULA/SDA') instead of matching the single combined diagnosis string 'SIGMOID DIVERTICULITIS, COLOVESTICAL FISTULA/SDA'. This is a local error in the WHERE clause\u2019s value condition, not affecting the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "93afecdf36b6ce7b81f6d82625a068fb": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "which patients have lab test item id 50953?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"50953\"",
            "pred_sql": "SELECT DISTINCT d.* FROM DEMOGRAPHIC d INNER JOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID AND d.HADM_ID = l.HADM_ID WHERE l.ITEMID = 50953;",
            "gold_answer": "[['17.0']]",
            "pred_answer": "[['14755.0', '151669.0', 'Bessie Wood', 'SINGLE', '47.0', '2078-07-15 00:00:00', 'F', 'ENGL', 'NOT SPECIFIED', 'EMERGENCY', '13.0', 'Medicare', 'WHITE', '0.0', 'TRANSFER FROM HOSP/EXTRAM', 'HOME HEALTH CARE', 'PNEUMONIA;HUMAN IMMUNODEFIENCY VIRUS;RULE OUT TUBERCULOSIS', 'None', '2078.0', 'None', '2125-02-09 18:34:00', '2125-02-23 16:30:00', '2125.0'], ['16438.0', '152794.0', 'Sheila Riley', 'None', '70.0', '2049-06-06 00:00:00', 'F', 'None', 'None', 'EMERGENCY', '7.0', 'Medicare', 'OTHER', '1.0', 'TRANSFER FROM HOSP/EXTRAM', 'HOME HEALTH CARE', 'PNEUMONIA', '2126-05-31 00:00:00', '2049.0', '2126.0', '2119-05-26 17:27:00', '2119-06-03 14:39:00', '2119.0'], ['17570.0', '183034.0', 'Paul Dunn', 'DIVORCED', '68.0', '2114-12-30 00:00:00', 'M', 'None', 'PROTESTANT QUAKER', 'EMERGENCY', '27.0', 'Medicare', 'WHITE', '1.0', 'TRANSFER FROM HOSP/EXTRAM', 'REHAB/DISTINCT PART HOSP', 'INFECTED RIGHT THIGH GRAFT', '2183-06-12 00:00:00', '2114.0', '2183.0', '2182-12-27 16:14:00', '2183-01-24 13:00:00', '2182.0'], ['19620.0', '130531.0', 'Dawn Cooper', 'SINGLE', '43.0', '2120-09-25 00:00:00', 'F', 'None', 'NOT SPECIFIED', 'EMERGENCY', '5.0', 'Medicare', 'WHITE', '1.0', 'EMERGENCY ROOM ADMIT', 'HOME HEALTH CARE', 'ACIDOSIS', '2168-05-19 00:00:00', '2120.0', '2168.0', '2163-03-03 15:16:00', '2163-03-08 15:51:00', '2163.0'], ['23602.0', '152293.0', 'Ida Cook', 'WIDOWED', '77.0', '2073-08-06 00:00:00', 'F', 'None', 'CATHOLIC', 'EMERGENCY', '7.0', 'Private', 'WHITE', '0.0', 'TRANSFER FROM HOSP/EXTRAM', 'SNF', 'CORONARY ARTERY DISEASE', 'None', '2073.0', 'None', '2150-03-02 14:30:00', '2150-03-10 12:40:00', '2150.0'], ['28588.0', '141664.0', 'Theresa Vedovelli', 'SINGLE', '89.0', '1850-08-17 00:00:00', 'F', 'ENGL', 'PROTESTANT QUAKER', 'EMERGENCY', '10.0', 'Medicare', 'WHITE', '0.0', 'EMERGENCY ROOM ADMIT', 'SNF', 'COMPLETE HEART BLOCK', 'None', '1850.0', 'None', '2150-08-17 22:51:00', '2150-08-28 13:35:00', '2150.0'], ['29541.0', '103957.0', 'Estrella Carroll', 'SINGLE', '58.0', '2112-06-24 00:00:00', 'F', 'ENGL', 'CATHOLIC', 'EMERGENCY', '5.0', 'Medicare', 'WHITE', '0.0', 'EMERGENCY ROOM ADMIT', 'HOME', 'SEPSIS', 'None', '2112.0', 'None', '2170-03-25 23:12:00', '2170-03-31 14:00:00', '2170.0'], ['29961.0', '196409.0', 'Jerry Deberry', 'SINGLE', '72.0', '2043-02-28 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '20.0', 'Medicare', 'WHITE', '0.0', 'CLINIC REFERRAL/PREMATURE', 'SNF', 'S/P FALL', 'None', '2043.0', 'None', '2115-06-20 17:48:00', '2115-07-10 18:45:00', '2115.0'], ['3623.0', '178444.0', 'Joseph Dillman', 'MARRIED', '63.0', '2101-07-04 00:00:00', 'M', 'ENGL', 'PROTESTANT QUAKER', 'EMERGENCY', '8.0', 'Medicare', 'WHITE', '1.0', 'EMERGENCY ROOM ADMIT', 'HOME HEALTH CARE', 'PNEUMONIA', '2164-02-23 00:00:00', '2101.0', '2164.0', '2164-02-14 16:25:00', '2164-02-22 17:30:00', '2164.0'], ['4333.0', '155027.0', 'Kenneth Koeller', 'MARRIED', '77.0', '2047-11-23 00:00:00', 'M', 'PTUN', 'CATHOLIC', 'EMERGENCY', '29.0', 'Medicare', 'WHITE', '0.0', 'TRANSFER FROM HOSP/EXTRAM', 'REHAB/DISTINCT PART HOSP', 'GUILLAIN BARRE SYNDROME', 'None', '2047.0', 'None', '2124-06-29 15:04:00', '2124-07-28 16:33:00', '2124.0'], ['4333.0', '155027.0', 'Kenneth Koeller', 'MARRIED', '77.0', '2047-11-23 00:00:00', 'M', 'PTUN', 'CATHOLIC', 'EMERGENCY', '29.0', 'Medicare', 'WHITE', '0.0', 'TRANSFER FROM HOSP/EXTRAM', 'REHAB/DISTINCT PART HOSP', 'GUILLAIN BARRE SYNDROME', 'None', '2047.0', 'None', '2124-06-29 15:04:00', '2124-07-28 16:33:00', '2124.0'], ['4333.0', '155027.0', 'Kenneth Koeller', 'MARRIED', '77.0', '2047-11-23 00:00:00', 'M', 'PTUN', 'CATHOLIC', 'EMERGENCY', '29.0', 'Medicare', 'WHITE', '0.0', 'TRANSFER FROM HOSP/EXTRAM', 'REHAB/DISTINCT PART HOSP', 'GUILLAIN BARRE SYNDROME', 'None', '2047.0', 'None', '2124-06-29 15:04:00', '2124-07-28 16:33:00', '2124.0'], ['52012.0', '146937.0', 'Jane Dillard', 'SINGLE', '45.0', '2117-08-30 00:00:00', 'F', 'ENGL', 'NOT SPECIFIED', 'EMERGENCY', '2.0', 'Medicare', 'WHITE', '0.0', 'CLINIC REFERRAL/PREMATURE', 'HOME', 'RASH', 'None', '2117.0', 'None', '2162-10-15 22:53:00', '2162-10-18 11:35:00', '2162.0'], ['55094.0', '135633.0', 'Lawrence Wibbens', 'MARRIED', '56.0', '2126-10-22 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '23.0', 'Medicaid', 'WHITE', '0.0', 'CLINIC REFERRAL/PREMATURE', 'HOME HEALTH CARE', 'ABDOMINAL ABSCESS', 'None', '2126.0', 'None', '2182-02-08 22:31:00', '2182-03-04 14:28:00', '2182.0'], ['55094.0', '135633.0', 'Lawrence Wibbens', 'MARRIED', '56.0', '2126-10-22 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '23.0', 'Medicaid', 'WHITE', '0.0', 'CLINIC REFERRAL/PREMATURE', 'HOME HEALTH CARE', 'ABDOMINAL ABSCESS', 'None', '2126.0', 'None', '2182-02-08 22:31:00', '2182-03-04 14:28:00', '2182.0'], ['62296.0', '193576.0', 'Morgan Mcgee', 'WIDOWED', '83.0', '2085-10-29 00:00:00', 'F', 'ENGL', 'JEWISH', 'EMERGENCY', '16.0', 'Medicare', 'WHITE', '1.0', 'EMERGENCY ROOM ADMIT', 'DEAD/EXPIRED', 'ABDOMINAL PAIN', '2168-10-29 00:00:00', '2085.0', '2168.0', '2168-10-12 11:36:00', '2168-10-29 05:17:00', '2168.0'], ['67965.0', '145657.0', 'Richard Gundlach', 'MARRIED', '74.0', '2041-07-10 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '7.0', 'Medicare', 'WHITE', '1.0', 'CLINIC REFERRAL/PREMATURE', 'DEAD/EXPIRED', 'GANGRENE', '2115-09-24 00:00:00', '2041.0', '2115.0', '2115-09-17 00:51:00', '2115-09-24 12:00:00', '2115.0'], ['85673.0', '136522.0', 'Gus Marques', 'SINGLE', '27.0', '2138-05-21 00:00:00', 'M', 'ENGL', 'JEWISH', 'EMERGENCY', '9.0', 'Private', 'WHITE - RUSSIAN', '0.0', 'CLINIC REFERRAL/PREMATURE', 'HOME', 'HEMOPYTSIS', 'None', '2138.0', 'None', '2165-04-29 23:35:00', '2165-05-09 14:40:00', '2165.0'], ['94762.0', '160707.0', 'Walter Locher', 'MARRIED', '67.0', '2098-04-23 00:00:00', 'M', 'ENGL', 'PROTESTANT QUAKER', 'ELECTIVE', '20.0', 'Medicare', 'WHITE', '1.0', 'PHYS REFERRAL/NORMAL DELI', 'DEAD/EXPIRED', 'NEOPLASM OF UNCERTAIN BEHAVIOR OF OTHER LYMPHATIC AND HEMATOPOIETIC TISSUES\\\\BONE MARROW TRANSPLANT', '2165-06-24 00:00:00', '2098.0', '2165.0', '2165-06-03 11:25:00', '2165-06-24 03:23:00', '2165.0'], ['98220.0', '189416.0', 'Bruce Harris', 'MARRIED', '70.0', '2103-11-14 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '23.0', 'Medicare', 'WHITE', '1.0', 'PHYS REFERRAL/NORMAL DELI', 'HOME HEALTH CARE', 'COLANGITIS', '2173-12-24 00:00:00', '2103.0', '2173.0', '2173-08-25 12:58:00', '2173-09-17 15:40:00', '2173.0']]",
            "is_correct": false,
            "reasoning": "The predicted query returns full demographic rows (SELECT DISTINCT d.*) instead of computing the count of distinct patients, and it omits the COUNT(DISTINCT) aggregation entirely. Fixing it requires restructuring the SELECT clause and adding an aggregate function\u2014this is not a localized typo or syntax mistake but a fundamental change to the query\u2019s intended purpose and output structure.",
            "error_type": "Global: missing aggregation / wrong output structure"
        },
        "8c655677cdaca2684d688418d197aed1": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "find the male patients who have diagnoses icd9 code 5119. (Values: M)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DIAGNOSES.\"ICD9_CODE\" = \"5119\"",
            "pred_sql": "SELECT d.* FROM DEMOGRAPHIC d INNER JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.GENDER = 'M' AND diag.ICD9_CODE = '5119';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['98220.0', '189416.0', 'Bruce Harris', 'MARRIED', '70.0', '2103-11-14 00:00:00', 'M', 'ENGL', 'CATHOLIC', 'EMERGENCY', '23.0', 'Medicare', 'WHITE', '1.0', 'PHYS REFERRAL/NORMAL DELI', 'HOME HEALTH CARE', 'COLANGITIS', '2173-12-24 00:00:00', '2103.0', '2173.0', '2173-08-25 12:58:00', '2173-09-17 15:40:00', '2173.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is joining the tables correctly and applying the right WHERE conditions, but it selects all columns (d.*) instead of returning the COUNT(DISTINCT SUBJECT_ID). This is a mistake confined to the SELECT clause and does not alter the overall query structure or logic of the joins and filters. Therefore, it is a localized error in the projection.",
            "error_type": "Local: wrong-select-clause"
        },
        "93769bf5b268fb4f529260ca8fc17c57": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "how many patients are with death status 0 and with diagnosis chronic diastolic heart failure? (Values: Chronic diastolic heart failure)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"LONG_TITLE\" = \"Chronic diastolic heart failure\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DEMOGRAPHIC WHERE EXPIRE_FLAG = 0 AND DIAGNOSIS = 'Chronic diastolic heart failure';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to join the DIAGNOSES table and instead filters on the DEMOGRAPHIC.DIAGNOSIS column. The correct logic requires an INNER JOIN with DIAGNOSES on HADM_ID and filtering DIAGNOSES.LONG_TITLE. This is not a minor syntax or column name issue but a missing table relationship that fundamentally changes the query\u2019s meaning and results. Such a mistake affects the overall query structure and data associations, so it is a global error.",
            "error_type": "Global: join error"
        },
        "0d8e3bc0c9b32c93056a71cbc2e7882e": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "give the number of patients who were diagnosed with personal history of colonic polyps and lab test result is abnormal. (Values: Personal history of colonic polyps)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DIAGNOSES.\"LONG_TITLE\" = \"Personal history of colonic polyps\" AND LAB.\"FLAG\" = \"abnormal\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DIAGNOSES d JOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID WHERE d.SHORT_TITLE = 'Personal history of colonic polyps' AND l.FLAG = 'abnormal';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses SHORT_TITLE instead of the required LONG_TITLE (a wrong column reference) and omits the HADM_ID join condition, joining only on SUBJECT_ID. Both are localized mistakes in column reference and JOIN clause, fixable without restructuring the overall query logic.",
            "error_type": "Local: table/column reference and join-condition error"
        },
        "adce510d770aef3a1b9e9014e766798c": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "give me the number of patients who had single internal mammary-coronary artery bypass. (Values: 1 int mam-cor art bypass)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"1 int mam-cor art bypass\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PROCEDURES WHERE ICD9_CODE = 352;",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column and value in the WHERE clause: it filters on ICD9_CODE = 352 instead of matching SHORT_TITLE = '1 int mam-cor art bypass'. This is a localized mistake within the WHERE clause (a wrong column/value reference) and does not require restructuring the joins or overall query flow.",
            "error_type": "Local: column reference error"
        },
        "8412f1e49973d10caff572e2e132ff72": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "How many of the patients admitted before 2112 were diagnosed with chest pain nec? (Values: Chest pain NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2112\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Chest pain NEC\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE EXTRACT(YEAR FROM d.ADMITTIME) < 2112 AND diag.SHORT_TITLE = 'Chest pain NEC';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the EXTRACT(YEAR FROM d.ADMITTIME) function, which is not supported in SQLite. This is a localized syntax error in the WHERE clause (temporal extraction), not a flaw in the overall query logic or table relationships. It can be fixed by replacing EXTRACT with a supported SQLite date function or using the ADMITYEAR column as in the ground truth. Therefore this is a Local error.",
            "error_type": "Local: other-temporal error"
        },
        "9391ab36c2bddfe0cf474a3d26392232": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "find the number of russian speaking patients who are diagnosed with proteus infection nos. (Values: RUSS, Proteus infection NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"LANGUAGE\" = \"RUSS\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Proteus infection NOS\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE LANGUAGE = 'RUSS' AND DIAGNOSIS = 'Proteus infection NOS';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly filters on DEMOGRAPHIC.DIAGNOSIS instead of joining to the DIAGNOSES table and filtering on DIAGNOSES.SHORT_TITLE. This is a localized mistake in the WHERE clause/table reference and does not require restructuring the entire query logic\u2014simply correcting the join and column reference would fix it.",
            "error_type": "Local: table/column reference error"
        },
        "0ef6faed83eede01bf4c9bb3f186fe66": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "let me know both the short title and icd9 code of procedure of patient thomas nazario. (Values: Thomas Nazario)",
            "gold_sql": "SELECT PROCEDURES.\"ICD9_CODE\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Thomas Nazario\"",
            "pred_sql": "SELECT p.SHORT_TITLE, p.ICD9_CODE FROM PROCEDURES p JOIN DEMOGRAPHIC d ON p.SUBJECT_ID = d.SUBJECT_ID AND p.HADM_ID = d.HADM_ID WHERE d.NAME = 'Thomas Nazario';",
            "gold_answer": "[['3512.0', 'Opn mitral valvuloplasty'], ['3532.0', 'Chordae tendineae ops'], ['3611.0', 'Aortocor bypas-1 cor art'], ['3615.0', '1 int mam-cor art bypass'], ['3723.0', 'Rt/left heart card cath'], ['3761.0', 'Pulsation balloon implan'], ['3931.0', 'Suture of artery'], ['3961.0', 'Extracorporeal circulat'], ['8856.0', 'Coronar arteriogr-2 cath']]",
            "pred_answer": "[['1 int mam-cor art bypass', '3615.0'], ['Aortocor bypas-1 cor art', '3611.0'], ['Chordae tendineae ops', '3532.0'], ['Coronar arteriogr-2 cath', '8856.0'], ['Extracorporeal circulat', '3961.0'], ['Opn mitral valvuloplasty', '3512.0'], ['Pulsation balloon implan', '3761.0'], ['Rt/left heart card cath', '3723.0'], ['Suture of artery', '3931.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the ground truth and the predicted SQL is the order of columns in the SELECT clause (the predicted SQL returns SHORT_TITLE first and ICD9_CODE second, whereas the ground truth requests ICD9_CODE then SHORT_TITLE). The table joins, filters, and relationships are all correct, so this is a localized issue in the SELECT clause that does not affect the overall query structure or logic.",
            "error_type": "Local: select-clause column order error"
        },
        "a923f3ce780d550018dd0df1f1e55570": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "what number of patients were given the drug oxycodone sr (oxycontin)? (Values: Oxycodone SR (OxyconTIN))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Oxycodone SR (OxyconTIN)\"",
            "pred_sql": "SELECT COUNT(DISTINCT p.SUBJECT_ID) FROM PRESCRIPTIONS p WHERE p.DRUG = 'Oxycodone SR (Oxycontin)';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly counts distinct SUBJECT_IDs but uses the wrong drug string literal ('Oxycodone SR (Oxycontin)' instead of the exact 'Oxycodone SR (OxyconTIN)'). This mismatch in the WHERE clause is a localized value parsing error\u2014it doesn\u2019t change the overall query structure or join logic, only the literal value, so it\u2019s a Local error.",
            "error_type": "Local: value parsing error"
        }
    },
    "llm2sql": {
        "074e704855d20a2abc5a310eb1ebec93": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "posaconazole suspension is administered via which route? (Values: Posaconazole Suspension)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Posaconazole Suspension\"",
            "pred_sql": "SELECT ROUTE FROM PRESCRIPTIONS WHERE DRUG = 'posaconazole suspension';",
            "gold_answer": "[['PO'], ['PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query structure matches the intended query (SELECT ROUTE FROM PRESCRIPTIONS WHERE DRUG = ...), but it fails because of a literal mismatch: it uses single quotes and lowercase 'posaconazole suspension' while the stored DRUG value is 'Posaconazole Suspension' (case-sensitive). This is a localized error in the WHERE clause value parsing, not a structural or logical join issue.",
            "error_type": "Local: value parsing error"
        },
        "9bcb8fd398ac5be28718b9fb311c354e": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Among patients with cerebral aneurysm/sda as their primary disease, calculate the total number of those born before the year 2173. (Values: CEREBRAL ANEURYSM/SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CEREBRAL ANEURYSM/SDA\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2173\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) FROM DEMOGRAPHIC JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.LONG_TITLE = 'CEREBRAL ANEURYSM/SDA' AND DEMOGRAPHIC.DOB_YEAR < 2173;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN to the DIAGNOSES table and filters on DIAGNOSES.LONG_TITLE instead of using the DEMOGRAPHIC.DIAGNOSIS column as specified. This changes the intended source of the \u2018primary disease\u2019 information and the overall query logic (also using DOB_YEAR instead of ADMITYEAR). Fixing this would require removing the unnecessary JOIN and restoring the correct column, which is more than a local syntax or minor reference fix. It fundamentally alters the query\u2019s structure and intended meaning.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "8bec95e32428f06d6b8734cae16910d0": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the insurance and primary disease of james sloan. (Values: James Sloan)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"James Sloan\"",
            "pred_sql": "SELECT D.INSURANCE, DIAG.SHORT_TITLE FROM DEMOGRAPHIC D JOIN DIAGNOSES DIAG ON D.HADM_ID = DIAG.HADM_ID WHERE D.NAME = 'James Sloan';",
            "gold_answer": "[['Private', 'NEWBORN']]",
            "pred_answer": "[['Private', 'Laryngotrach anomaly NEC'], ['Private', 'Need prphyl vc vrl hepat'], ['Private', 'Resp prob after brth NEC'], ['Private', 'Routin child health exam'], ['Private', 'Routine circumcision'], ['Private', 'Single lb in-hosp w/o cs']]",
            "is_correct": false,
            "reasoning": "The ground truth query retrieves the 'DIAGNOSIS' directly from the DEMOGRAPHIC table, which holds the primary disease. The predicted query instead joins to the DIAGNOSES table and selects SHORT_TITLE, returning multiple diagnosis rows per admission. This incorrect join fundamentally alters the intended result (it changes both the source of the disease information and the result cardinality), so it is not a localized fix in a single clause but a flaw in the overall query structure and logic.",
            "error_type": "Global: incorrect join"
        },
        "804d61ea251779e8ec9d8f758e134a6d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients aged below 45 years had celo-vessicle fistula? (Values: CELO-VESSICLE FISTULA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CELO-VESSICLE FISTULA\" AND DEMOGRAPHIC.\"AGE\" < \"45\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID nWHERE DEMOGRAPHIC.AGE < 45 AND DIAGNOSES.LONG_TITLE = 'Celo-vessicle fistula';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is failing due to a syntax error in the SELECT/FROM clause: the alias \u2018n\u2019 is run together with the FROM keyword as \u2018nFROM\u2019, causing the parser to choke at \u2018DEMOGRAPHIC\u2019. This is a localized issue in the query\u2019s syntax and can be fixed by inserting the missing space or correcting the alias placement. The overall query structure and intended logic remain intact, so this is a local error.",
            "error_type": "Local: syntax error"
        },
        "06e87f96c060a82f8f490899e8cd2b2e": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients are diagnosed with primary disease acidosis and are below 79 years of age? (Values: ACIDOSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"ACIDOSIS\" AND DEMOGRAPHIC.\"AGE\" < \"79\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DIAGNOSES d JOIN DEMOGRAPHIC dem ON d.SUBJECT_ID = dem.SUBJECT_ID AND d.HADM_ID = dem.HADM_ID WHERE (d.SHORT_TITLE LIKE '%ACIDOSIS%' OR d.LONG_TITLE LIKE '%ACIDOSIS%') AND dem.AGE < 79;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['8.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining the DIAGNOSES table and filtering on diagnosis titles rather than using the DEMOGRAPHIC.DIAGNOSIS field as the primary disease. This alters the data source and retrieval logic, returning all patients with any diagnosis containing 'ACIDOSIS' instead of those whose primary disease is recorded as 'ACIDOSIS'. Such a change affects the overall structure and purpose of the query, not just a localized clause fix.",
            "error_type": "Global: join error"
        },
        "8c182c6e8b8ec7c9edf817d41ad093ae": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Calculate the average age of spanish speaking patients who have overdose as the primary disease. (Values: SPAN, OVERDOSE)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"LANGUAGE\" = \"SPAN\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"OVERDOSE\"",
            "pred_sql": "SELECT AVG(D.DEMOGRAPHIC.AGE) FROM DEMOGRAPHIC D JOIN DIAGNOSES DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.LANGUAGE = 'SPAN' AND (DI.SHORT_TITLE LIKE '%OVERDOSE%' OR DI.LONG_TITLE LIKE '%OVERDOSE%');",
            "gold_answer": "[['None']]",
            "pred_answer": "no such column: D.DEMOGRAPHIC.AGE",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references a non\u2010existent column alias D.DEMOGRAPHIC.AGE instead of D.AGE. This is a simple column reference mistake in the SELECT clause and does not involve any broader structural or logical flaw in the query.",
            "error_type": "Local: column reference error"
        },
        "0ef1bd0f34de8289d8b8043c9600dbb9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Get me the number of unmarried patients who had tonsil and adenoid biopsy. (Values: SINGLE, Tonsil&adenoid biopsy)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND PROCEDURES.\"SHORT_TITLE\" = \"Tonsil&adenoid biopsy\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN PROCEDURES ON DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID nAND DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID nWHERE DEMOGRAPHIC.MARITAL_STATUS = 'SINGLE' nAND PROCEDURES.LONG_TITLE LIKE '%Tonsil&adenoid biopsy%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains literal 'n' characters before keywords (e.g., 'nFROM', 'nJOIN', 'nWHERE'), causing a syntax error. This is a local, easily fixable formatting/syntax issue that does not alter the overall query logic.",
            "error_type": "Local: syntax error"
        },
        "05c5dbad2999a021a98bc80262a523d1": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the total number of patients who had icd9 code 4601",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"4601\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '4601';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the DIAGNOSES table for ICD9_CODE = '4601' instead of the PROCEDURES table as required by the question. This is not a minor syntax or column reference issue, but a fundamental misuse of the wrong table that changes the entire logic of the query. Fixing it would require replacing the table and potentially adding a join, which is beyond a localized correction.",
            "error_type": "Global: wrong table used"
        },
        "a653e009b44f6b042361642164f758e9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Look for the patients admitted before 2174 that were diagnosed with sepsis. (Values: Sepsis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2174\" AND DIAGNOSES.\"LONG_TITLE\" = \"Sepsis\"",
            "pred_sql": "SELECT DEMOGRAPHIC.SUBJECT_ID, DEMOGRAPHIC.HADM_ID FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.ADMITYEAR < 2174 AND DIAGNOSES.LONG_TITLE LIKE '%Sepsis%'",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['18351.0', '136878.0'], ['29541.0', '103957.0'], ['62296.0', '193576.0'], ['67965.0', '145657.0'], ['71798.0', '122790.0'], ['76508.0', '127570.0'], ['83678.0', '172089.0'], ['98220.0', '189416.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the overall structure and logic: it returns raw SUBJECT_ID and HADM_ID rows instead of counting distinct patients, uses LIKE instead of an exact match, and omits the aggregation entirely. These errors span multiple clauses (SELECT projection, use of aggregation, and WHERE condition) and require rewriting the query logic rather than a localized fix.",
            "error_type": "Global: wrong-aggregation-and-projection"
        },
        "06fdfc5ab31f978981e3b81b323b4d0a": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find the minimum age of patients who stayed in the hospital for 20 days and died before the year 2186. (Values: 2186.0)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DAYS_STAY\" = \"20\" AND DEMOGRAPHIC.\"DOD_YEAR\" < \"2186.0\"",
            "pred_sql": "SELECT MIN(AGE) FROM DEMOGRAPHIC WHERE DAYS_STAY >= 20 AND DOD_YEAR < 2186;",
            "gold_answer": "[['67.0']]",
            "pred_answer": "[['59.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only changes the operator in the WHERE clause from DAYS_STAY = 20 to DAYS_STAY >= 20, thereby altering which patients are considered. This is a localized mistake in the condition and does not affect the overall query structure or joins.",
            "error_type": "Local: where-clause condition error"
        },
        "88bd504917d82cccd38ba49d6ab39d9d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "calculate the number of patients diagnosed with icd9 code 29570 whose death status is unspecified",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"ICD9_CODE\" = \"29570\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '29570';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only queries the DIAGNOSES table and counts distinct SUBJECT_IDs with ICD9_CODE = '29570', completely omitting the join to DEMOGRAPHIC and the EXPIRE_FLAG = '0' filter that ensures death status is unspecified. This missing join and missing WHERE condition significantly change the query\u2019s logic and results, affecting multiple clauses and the overall structure.",
            "error_type": "Global: missing death status filter"
        },
        "926a345db323823260da99f203e0010e": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find the number of patients who are below the age of 41 and lab test category is blood gas. (Values: Blood Gas)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"41\" AND LAB.\"CATEGORY\" = \"Blood Gas\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) nFROM DEMOGRAPHIC d nJOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID nWHERE d.AGE < 41 AND l.CATEGORY = 'Blood Gas';",
            "gold_answer": "[['8.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error: it incorrectly concatenates the alias or newline character \u2018n\u2019 with keywords (nFROM, nJOIN, nWHERE) causing a parsing failure near \u201cDEMOGRAPHIC\u201d. This is a localized formatting/syntax mistake that does not alter the overall query logic or table relationships. It can be fixed by adding the missing spaces/newlines. Therefore it is a Local error.",
            "error_type": "Local: syntax error"
        },
        "00ef3ee46331250704baf2033dad4b10": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients diagnosed under icd9 code 25541 were divorced? (Values: DIVORCED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"DIVORCED\" AND DIAGNOSES.\"ICD9_CODE\" = \"25541\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC AS D nJOIN DIAGNOSES AS DI nON D.SUBJECT_ID = DI.SUBJECT_ID nWHERE DI.ICD9_CODE = '25541' nAND D.MARITAL_STATUS = 'DIVORCED';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to literal 'n' characters inserted before FROM and JOIN, causing a syntax error near \"DEMOGRAPHIC\". This is a localized formatting/syntax mistake in the SELECT/JOIN clause and does not alter the overall query logic or require restructuring of the query.",
            "error_type": "Local: syntax error"
        },
        "9aed3ef959f024e0ffaf5f92500bda5a": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "mention the primary disease and diagnosis short title of the subject name betty campbell. (Values: Betty Campbell)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Betty Campbell\"",
            "pred_sql": "SELECT DIAGNOSES.LONG_TITLE, DIAGNOSES.SHORT_TITLE FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.NAME = 'Betty Campbell';",
            "gold_answer": "[['ANGIOEDEMA', 'Acute respiratry failure'], ['ANGIOEDEMA', 'Acute uri NOS'], ['ANGIOEDEMA', 'Adv eff cardiovasc NEC'], ['ANGIOEDEMA', 'Angioneurotic edema'], ['ANGIOEDEMA', 'Backache NOS'], ['ANGIOEDEMA', 'DMII ophth nt st uncntrl'], ['ANGIOEDEMA', 'Hyperlipidemia NEC/NOS'], ['ANGIOEDEMA', 'Hypertension NOS'], ['ANGIOEDEMA', 'Myopia'], ['ANGIOEDEMA', 'Prolif diab retinopathy']]",
            "pred_answer": "[['Acute respiratory failure', 'Acute respiratry failure'], ['Acute upper respiratory infections of unspecified site', 'Acute uri NOS'], ['Angioneurotic edema, not elsewhere classified', 'Angioneurotic edema'], ['Backache, unspecified', 'Backache NOS'], ['Diabetes with ophthalmic manifestations, type II or unspecified type, not stated as uncontrolled', 'DMII ophth nt st uncntrl'], ['Myopia', 'Myopia'], ['Other and unspecified agents primarily affecting the cardiovascular system causing adverse effects in therapeutic use', 'Adv eff cardiovasc NEC'], ['Other and unspecified hyperlipidemia', 'Hyperlipidemia NEC/NOS'], ['Proliferative diabetic retinopathy', 'Prolif diab retinopathy'], ['Unspecified essential hypertension', 'Hypertension NOS']]",
            "is_correct": false,
            "reasoning": "The predicted query is joining tables correctly (even though it redundantly uses both SUBJECT_ID and HADM_ID), and the WHERE clause is correct. The only mistake is in the SELECT clause: it returns DIAGNOSES.LONG_TITLE instead of the DEMOGRAPHIC.DIAGNOSIS column (the primary disease). This is a localized error in the column selection and doesn\u2019t require changing the overall query structure or join logic.",
            "error_type": "Local: column reference error"
        },
        "04e257060d814171a01a4a75a258d385": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients are primarily suffering from t5 fracture? (Values: T5 FRACTURE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"T5 FRACTURE\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE SHORT_TITLE LIKE '%T5 FRACTURE%' OR LONG_TITLE LIKE '%T5 FRACTURE%';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table with LIKE filters on SHORT_TITLE and LONG_TITLE, whereas the question asks for the primary diagnosis recorded in the DEMOGRAPHIC table\u2019s DIAGNOSIS column. This is not a minor typo or clause mistake but a fundamental change in table choice and logic, altering the intended query structure and meaning.",
            "error_type": "Global: wrong table used"
        },
        "0889b8c37decb7cd8458fd44dc8b8bb5": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients with polycythemia were treated with main drug? (Values: Polycythemia vera, MAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Polycythemia vera\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\"",
            "pred_sql": "SELECT COUNT(DISTINCT diagnoses.SUBJECT_ID) nFROM DIAGNOSES nJOIN PRESCRIPTIONS ON DIAGNOSES.SUBJECT_ID = PRESCRIPTIONS.SUBJECT_ID nAND DIAGNOSES.HADM_ID = PRESCRIPTIONS.HADM_ID nWHERE (diagnoses.SHORT_TITLE LIKE '%Polycythemia vera%' OR diagnoses.LONG_TITLE LIKE '%Polycythemia vera%') nAND PRESCRIPTIONS.DRUG_TYPE = 'MAIN';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \"DIAGNOSES\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a syntax error near \"DIAGNOSES\" due to the stray \"nFROM\" and \"nJOIN\" tokens and missing whitespace. This is a localized syntax mistake in the SELECT/FROM clause and does not affect the overall query logic or table relationships. It can be fixed by correcting the formatting and keywords without restructuring the query.",
            "error_type": "Local: syntax error"
        },
        "0f8c99adf2a5dc590120e3fc5f613dbc": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the insurance and primary disease of the patient id 2560?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT DEMOGRAPHIC.INSURANCE, DIAGNOSES.LONG_TITLE FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID WHERE DEMOGRAPHIC.SUBJECT_ID = 2560;",
            "gold_answer": "[['Private', 'NEWBORN']]",
            "pred_answer": "[['Private', 'Congenital pigmentary anomalies of skin'], ['Private', 'Need for prophylactic vaccination and inoculation against viral hepatitis'], ['Private', 'Observation for suspected infectious condition'], ['Private', 'Routine or ritual circumcision'], ['Private', 'Single liveborn, born in hospital, delivered by cesarean section']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly joins the DIAGNOSES table and selects DIAGNOSES.LONG_TITLE instead of using the DEMOGRAPHIC.DIAGNOSIS column. The extra JOIN and wrong column selection are localized to the SELECT and JOIN parts and do not require restructuring the overall query logic. This is a simple column reference error.",
            "error_type": "Local: table/column reference error"
        },
        "89a0044a0f95cc0d7cc6757f5a6c4d6f": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "among patients treated with base drug, how many of them had ventricular tachycardia as primary disease? (Values: VENTRICULAR TACHYCARDIA, BASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"VENTRICULAR TACHYCARDIA\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"BASE\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM LAB l JOIN DIAGNOSES d ON l.SUBJECT_ID = d.SUBJECT_ID AND l.HADM_ID = d.HADM_ID WHERE l.LABEL = 'Base' AND d.LONG_TITLE = 'Ventricular tachycardia'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the LAB table and DIAGNOSES table instead of PRESCRIPTIONS and DEMOGRAPHIC, respectively. It filters on l.LABEL = 'Base' rather than PRESCRIPTIONS.DRUG_TYPE = 'BASE', and uses d.LONG_TITLE for ventricular tachycardia rather than DEMOGRAPHIC.DIAGNOSIS. This is not a simple column name or syntax mistake but a fundamental misuse of tables and joins, altering the entire query structure and intended logic.",
            "error_type": "Global: incorrect join strategy"
        },
        "0d2d4b9ce812c7d9805538e58b0de30e": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the number of patients with primary disease abdominal pain who were aged less than 68 years? (Values: ABDOMINAL PAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"ABDOMINAL PAIN\" AND DEMOGRAPHIC.\"AGE\" < \"68\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) FROM DEMOGRAPHIC JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID WHERE (DIAGNOSES.SHORT_TITLE LIKE '%ABDOMINAL PAIN%' OR DIAGNOSES.LONG_TITLE LIKE '%ABDOMINAL PAIN%') AND DEMOGRAPHIC.AGE < 68;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the fundamental logic by joining with the DIAGNOSES table and filtering on SHORT_TITLE/LONG_TITLE rather than using the DEMOGRAPHIC.DIAGNOSIS column for primary disease. This join and filter shift alters the intended meaning (primary disease field vs. any diagnosis) and leads to different results. Fixing this requires restructuring the query, not just a local tweak.",
            "error_type": "Global: incorrect join error"
        },
        "0d78e61ca912d643a9233b60b95f4b40": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "What is the primary disease of Kelly Gallardo?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Kelly Gallardo\"",
            "pred_sql": "SELECT D.LONG_TITLE FROM DIAGNOSES D INNER JOIN DEMOGRAPHIC DH ON D.SUBJECT_ID = DH.SUBJECT_ID WHERE DH.NAME = 'Kelly Gallardo';",
            "gold_answer": "[['NEWBORN']]",
            "pred_answer": "[['35-36 completed weeks of gestation'], ['Need for prophylactic vaccination and inoculation against viral hepatitis'], ['Observation for suspected infectious condition'], ['Other preterm infants, 2,000-2,499 grams'], ['Primary apnea of newborn'], ['Single liveborn, born in hospital, delivered by cesarean section']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally misinterprets the source of the \u201cprimary disease\u201d field. Instead of selecting the DIAGNOSIS column from the DEMOGRAPHIC table (which holds the primary disease), it joins to the DIAGNOSES table and retrieves all matching LONG_TITLE entries. This is not a minor syntax or alias mistake but a structural error: the wrong table is used and the join logic is flawed, leading to unintended many-row results rather than the single primary disease value.",
            "error_type": "Global: incorrect-table-join"
        },
        "8762eb0a5667497e60e00c3d154d04a5": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients with infected right thigh graft were of american indian/alaska native ethnicty? (Values: AMERICAN INDIAN/ALASKA NATIVE, INFECTED RIGHT THIGH GRAFT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"AMERICAN INDIAN/ALASKA NATIVE\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"INFECTED RIGHT THIGH GRAFT\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nINNER JOIN DIAGNOSES ON DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID nWHERE DEMOGRAPHIC.ETHNICITY = 'AMERICAN INDIAN/ALASKA NATIVE' nAND (DIAGNOSES.LONG_TITLE = 'INFECTED RIGHT THIGH GRAFT' nOR DIAGNOSES.SHORT_TITLE = 'INFECTED RIGHT THIGH GRAFT')",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error due to the stray 'n' before FROM and INNER JOIN (i.e., 'nFROM', 'nINNER JOIN'), causing SQLite to throw a syntax error. This is a localized mistake in the SELECT/JOIN clause formatting and does not reflect a deeper logical or structural misunderstanding.",
            "error_type": "Local: syntax error"
        },
        "8e5b3b23c0faa725dddd16452c1d83cd": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Find out the number of patients with a newborn primary disease. (Values: NEWBORN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"NEWBORN\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE SHORT_TITLE LIKE '%NEWBORN%' OR LONG_TITLE LIKE '%NEWBORN%';",
            "gold_answer": "[['22.0']]",
            "pred_answer": "[['7.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls from the DIAGNOSES table and uses LIKE on SHORT_TITLE and LONG_TITLE, whereas the question and ground truth use the DEMOGRAPHIC table\u2019s DIAGNOSIS column for an exact match. This changes the overall logic and data source, not just a minor clause, resulting in different patient counts. The error spans the core query structure and intended meaning.",
            "error_type": "Global: incorrect table and logic"
        },
        "02c34ef8864566dbd0a9c9ad006c1b21": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find the primary disease and admission time of subject id 813.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"813\"",
            "pred_sql": "SELECT DIAGNOSES.LONG_TITLE AS primary_disease, DEMOGRAPHIC.ADMITTIME AS admission_time FROM DIAGNOSES INNER JOIN DEMOGRAPHIC ON DIAGNOSES.SUBJECT_ID = DEMOGRAPHIC.SUBJECT_ID WHERE DIAGNOSES.SUBJECT_ID = 813;",
            "gold_answer": "[['GASTROINTESTINAL BLEED', '2179-12-15 18:01:00']]",
            "pred_answer": "[['Acute myocardial infarction of other inferior wall, initial episode of care', '2179-12-15 18:01:00'], ['Atrial fibrillation', '2179-12-15 18:01:00'], ['Cardiac complications, not elsewhere classified', '2179-12-15 18:01:00'], ['Congestive heart failure, unspecified', '2179-12-15 18:01:00'], ['Coronary atherosclerosis of native coronary artery', '2179-12-15 18:01:00'], ['Diverticulosis of colon (without mention of hemorrhage)', '2179-12-15 18:01:00'], ['Hemorrhage of gastrointestinal tract, unspecified', '2179-12-15 18:01:00'], ['Malignant neoplasm of ascending colon', '2179-12-15 18:01:00'], ['Pneumonia, organism unspecified', '2179-12-15 18:01:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by pulling from the DIAGNOSES table instead of using the DEMOGRAPHIC.DIAGNOSIS column for the primary disease, and by joining only on SUBJECT_ID (omitting HADM_ID), it returns multiple diagnosis rows rather than a single primary disease. This is not a minor syntax or reference mistake but a flawed join strategy and table choice that alters the intended meaning of the query.",
            "error_type": "Global: incorrect join"
        },
        "9c0e1d982d6a14ff908ec2ba2546d3f1": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients of black/cape verdean ethnicity are primary diagnosed with femoral artery thrombosis? (Values: BLACK/CAPE VERDEAN, FEMORAL ARTERY THROMBOSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/CAPE VERDEAN\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"FEMORAL ARTERY THROMBOSIS\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) nFROM DEMOGRAPHIC d nJOIN DIAGNOSES diag nON d.SUBJECT_ID = diag.SUBJECT_ID nAND d.HADM_ID = diag.HADM_ID nWHERE d.ETHNICITY = 'BLACK/CAPE VERDEAN' nAND diag.LONG_TITLE = 'FEMORAL ARTERY THROMBOSIS';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error near 'DEMOGRAPHIC' due to a missing space between the alias and the FROM keyword ('nFROM' instead of '\\nFROM'). This is a local, easily fixable syntax mistake that does not affect the overall query structure or logic.",
            "error_type": "Local: syntax error"
        },
        "0b265651dd99d7806feaa26008b010e8": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find out the time of admission and primary disease of patient with patient id 4589.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"4589\"",
            "pred_sql": "SELECT T1.ADMITTIME, T2.SHORT_TITLE FROM DEMOGRAPHIC AS T1 JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID WHERE T1.SUBJECT_ID = 4589;",
            "gold_answer": "[['CONGESTIVE HEART FAILURE', '2111-12-17 19:59:00']]",
            "pred_answer": "[['2111-12-17 19:59:00', 'Aftrcre traum fx low leg'], ['2111-12-17 19:59:00', 'CHF NOS'], ['2111-12-17 19:59:00', 'Candidias urogenital NEC'], ['2111-12-17 19:59:00', 'Cardiac arrest'], ['2111-12-17 19:59:00', 'Cardiac dysrhythmias NEC'], ['2111-12-17 19:59:00', 'Critical illness myopthy'], ['2111-12-17 19:59:00', 'DMI neuro nt st uncntrld'], ['2111-12-17 19:59:00', 'DMI renl nt st uncntrld'], ['2111-12-17 19:59:00', 'End stage renal disease'], ['2111-12-17 19:59:00', 'Food/vomit pneumonitis'], ['2111-12-17 19:59:00', 'Hemododialysis hypotensn'], ['2111-12-17 19:59:00', 'Hyp kid NOS w cr kid V'], ['2111-12-17 19:59:00', 'Hyperpotassemia'], ['2111-12-17 19:59:00', 'Idio periph neurpthy NOS'], ['2111-12-17 19:59:00', 'Int inf clstrdium dfcile'], ['2111-12-17 19:59:00', 'Long-term use of insulin'], ['2111-12-17 19:59:00', 'Morbid obesity'], ['2111-12-17 19:59:00', 'Other pulmonary insuff'], ['2111-12-17 19:59:00', 'Pressure ulcer, low back'], ['2111-12-17 19:59:00', 'Rhabdomyolysis'], ['2111-12-17 19:59:00', 'Subendo infarct, initial'], ['2111-12-17 19:59:00', 'Ulcer of heel & midfoot'], ['2111-12-17 19:59:00', 'Urin tract infection NOS']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DEMOGRAPHIC table with DIAGNOSES, returning every diagnosis record for the patient rather than the single primary disease stored in DEMOGRAPHIC.DIAGNOSIS. This changes the fundamental logic and structure of the query, producing multiple rows instead of one. Fixing this requires removing the join and selecting the correct column from DEMOGRAPHIC, not just a small tweak in a clause.",
            "error_type": "Global: flawed join"
        },
        "aafae9c30494946bbd7ac56453a627b2": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients with primary disease overdose were less than 50 years of age? (Values: OVERDOSE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"OVERDOSE\" AND DEMOGRAPHIC.\"AGE\" < \"50\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DIAGNOSES AS D JOIN DEMOGRAPHIC AS DM ON D.SUBJECT_ID = DM.SUBJECT_ID AND D.HADM_ID = DM.HADM_ID WHERE (D.LONG_TITLE LIKE '%OVERDOSE%' OR D.SHORT_TITLE LIKE '%OVERDOSE%') AND DM.AGE < 50",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the overall logic by joining the DIAGNOSES table to DEMOGRAPHIC and filtering on LONG_TITLE/SHORT_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column directly. This alters the intended meaning and structure of the query, requiring a significant rewrite of the query flow (wrong table usage, join logic, and filtering method).",
            "error_type": "Global: incorrect join logic"
        },
        "00648dbdd1cf6863da5597366c79a6f0": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many of the unmarried patients belonged to white ethnic origin? (Values: SINGLE, WHITE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"WHITE\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE MARITAL_STATUS = 'SINGLE' AND ETHNICITY LIKE '%WHITE%'",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['17.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only does COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID), and it uses ETHNICITY LIKE '%WHITE%' instead of an exact equality check. These are mistakes confined to the SELECT aggregation and WHERE condition clauses and do not require reworking the overall query structure or table relationships. Therefore, this is a local error.",
            "error_type": "Local: missing-distinct-and-wrong-comparison"
        },
        "a85f7ee77ab7acd707cc19c80b4acf63": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Count the number of patients born before 2071 who had an emergency room hospital admission. (Values: EMERGENCY ROOM ADMIT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2071\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DOB_YEAR < 2071 AND ADMISSION_TYPE = 'EMERGENCY ROOM ADMIT';",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column for filtering emergency room admissions: it checks ADMISSION_TYPE instead of ADMISSION_LOCATION. It also uses COUNT(*) rather than COUNT(DISTINCT SUBJECT_ID), but the overall query structure remains correct. These are localized mistakes (incorrect column reference and missing DISTINCT) that do not require changing the join logic or query structure.",
            "error_type": "Local: column reference error"
        },
        "0ad552d2835d64ccd539c3214e2b6d7a": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the icd9 code and marital status of patient id 42067",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"42067\"",
            "pred_sql": "SELECT D.ICD9_CODE, DG.MARITAL_STATUS FROM DEMOGRAPHIC DG JOIN DIAGNOSES D ON DG.SUBJECT_ID = D.SUBJECT_ID WHERE DG.SUBJECT_ID = 42067",
            "gold_answer": "[['SINGLE', 'Anemia in neoplastic dis'], ['SINGLE', 'Atrial fibrillation'], ['SINGLE', 'CHF NOS'], ['SINGLE', 'Chr diastolic hrt fail'], ['SINGLE', 'Crbl art ocl NOS w infrc'], ['SINGLE', 'Crnry athrscl natve vssl'], ['SINGLE', 'DMII wo cmp nt st uncntr'], ['SINGLE', 'Dysthymic disorder'], ['SINGLE', 'Encephalopathy NOS'], ['SINGLE', 'Hyperosmolality'], ['SINGLE', 'Intracerebral hemorrhage'], ['SINGLE', 'Malig neo brain NEC'], ['SINGLE', 'Old myocardial infarct'], ['SINGLE', 'Proteus infection NOS'], ['SINGLE', 'Urin tract infection NOS']]",
            "pred_answer": "[['1918.0', 'SINGLE'], ['25000.0', 'SINGLE'], ['2760.0', 'SINGLE'], ['28522.0', 'SINGLE'], ['3004.0', 'SINGLE'], ['34830.0', 'SINGLE'], ['412.0', 'SINGLE'], ['41401.0', 'SINGLE'], ['416.0', 'SINGLE'], ['42731.0', 'SINGLE'], ['4280.0', 'SINGLE'], ['42832.0', 'SINGLE'], ['431.0', 'SINGLE'], ['43491.0', 'SINGLE'], ['5990.0', 'SINGLE']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES on SUBJECT_ID instead of HADM_ID. This isn\u2019t a simple typo or missing column but a fundamentally incorrect relationship between tables that can change the result set (Cartesian\u2010like duplication or filtering differences) and alters the intended logic of matching diagnoses to the correct hospital admission. Fixing it requires changing the join condition at a structural level, so it\u2019s a global error.",
            "error_type": "Global: incorrect join key"
        },
        "8992cac76fa0ec727f4a010671a5ce78": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the short title of procedure and primary disease of patient steven sepulveda. (Values: Steven Sepulveda)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Steven Sepulveda\"",
            "pred_sql": "SELECT P.SHORT_TITLE AS Procedure_Short_Title, D.SHORT_TITLE AS Primary_Disease_Short_Title nFROM DEMOGRAPHIC DEM nJOIN PROCEDURES P ON DEM.SUBJECT_ID = P.SUBJECT_ID AND DEM.HADM_ID = P.HADM_ID nJOIN DIAGNOSES D ON DEM.SUBJECT_ID = D.SUBJECT_ID AND DEM.HADM_ID = D.HADM_ID nWHERE DEM.NAME = 'Steven Sepulveda';",
            "gold_answer": "[['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', '1 int mam-cor art bypass'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Aortocor bypas-3 cor art'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Dx ultrasound-heart'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Exc/dest hrt lesion open'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Extracorporeal circulat'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Incision of aorta']]",
            "pred_answer": "near \"nFROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a localized syntax mistake: it includes literal \u2018nFROM\u2019, \u2018nJOIN\u2019, and \u2018nWHERE\u2019 tokens instead of proper newline or whitespace separators. This is a parsing error confined to clause formatting and does not change the query\u2019s overall structure or logic. It can be fixed by replacing \u2018nFROM\u2019 with \u2018FROM\u2019 (and similarly for JOIN/WHERE), without altering table relationships or intent.",
            "error_type": "Local: syntax error"
        },
        "a03591944cbd372c579f85a8601246d7": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Provide the number of unmarried patients who have a hematology lab test category. (Values: SINGLE, Hematology)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND LAB.\"CATEGORY\" = \"Hematology\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC D nJOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID nWHERE D.MARITAL_STATUS = 'SINGLE' nAND L.CATEGORY = 'Hematology';",
            "gold_answer": "[['26.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to malformed keywords (\u2018nFROM\u2019, \u2018nJOIN\u2019) indicating a simple syntax/newline insertion mistake. This is a localized error in the query text and does not reflect a misunderstanding of the overall logic or table relationships. Hence it is a local syntax error.",
            "error_type": "Local: syntax error"
        },
        "089a137d1d773bd6f945b3accb8eb318": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many of the patients with item id 50976 remained admitted in hospital for more than 16 days?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"16\" AND LAB.\"ITEMID\" = \"50976\"",
            "pred_sql": "SELECT COUNT(DISTINCT LAB.SUBJECT_ID, LAB.HADM_ID) nFROM LAB nJOIN DEMOGRAPHIC ON LAB.SUBJECT_ID = DEMOGRAPHIC.SUBJECT_ID AND LAB.HADM_ID = DEMOGRAPHIC.HADM_ID nWHERE LAB.ITEMID = 50976 AND DEMOGRAPHIC.DAYS_STAY > 16;",
            "gold_answer": "[['4.0']]",
            "pred_answer": "near \"LAB\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a syntax error: it writes \u201cnFROM LAB nJOIN\u2026\u201d and \u201cnWHERE\u2026\u201d without proper spacing or alias declaration, causing \u201cnear 'LAB': syntax error.\u201d This is a localized mistake in the FROM/JOIN clause formatting and does not alter the overall query logic. It can be fixed by correcting the spacing and alias syntax without restructuring the query.",
            "error_type": "Local: syntax error"
        },
        "84cdf16ce9ced3807228663939f35f2c": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "How many patients with s/p fall as their primary disease had ascitic fluid lab test? (Values: S/P FALL, Ascites)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"S/P FALL\" AND LAB.\"FLUID\" = \"Ascites\"",
            "pred_sql": "SELECT COUNT(DISTINCT DIAG.SUBJECT_ID) nFROM DIAGNOSES AS DIAG nJOIN LAB AS LAB ON DIAG.SUBJECT_ID = LAB.SUBJECT_ID AND DIAG.HADM_ID = LAB.HADM_ID nWHERE (DIAG.LONG_TITLE LIKE '%S/P FALL%' OR DIAG.SHORT_TITLE LIKE '%S/P FALL%') nAND LAB.FLUID = 'Ascites';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DIAGNOSES\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with a syntax error near \"DIAGNOSES\" because the 'nFROM' token is invalid (newline markers were inserted without spaces). This is a localized syntax mistake in the SELECT/FROM clause and does not involve a deeper logical or structural misunderstanding of the schema or intent. It can be fixed by correcting the syntax (e.g., adding a space or removing the 'n'), without altering the overall query logic.",
            "error_type": "Local: syntax error"
        },
        "0ce7ffc2b0832d683b0873a75983ee3b": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the number of patients who were hospitalized for more than 20 days and were later discharged home? (Values: HOME)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"HOME\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"20\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DAYS_STAY > 20 AND DISCHARGE_LOCATION LIKE '%HOME%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only modifies the WHERE and SELECT clauses without changing the overall structure or table relationships. It uses DISCHARGE_LOCATION LIKE '%HOME%' instead of the required exact match (= 'HOME') and COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID). Both are localized errors in the WHERE filter and aggregation, not a global logic or join structure issue.",
            "error_type": "Local: where-clause and aggregation error"
        },
        "85aa76ce561a87a6edc0d482804be238": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "provide the number of patients who had other body fluid lab test and were hospitalized for more than 2 days. (Values: Other Body Fluid)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"2\" AND LAB.\"FLUID\" = \"Other Body Fluid\"",
            "pred_sql": "SELECT COUNT(DISTINCT L.SUBJECT_ID) FROM LAB AS L JOIN DEMOGRAPHIC AS D ON L.SUBJECT_ID = D.SUBJECT_ID WHERE L.CATEGORY = 'Other Body Fluid' AND D.DAYS_STAY > 2;",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only uses the wrong column (LAB.CATEGORY instead of LAB.FLUID) but, more critically, it joins LAB to DEMOGRAPHIC on SUBJECT_ID rather than on the admission key (HADM_ID). This incorrect join relationship fundamentally alters the intended data association between a specific lab test and its corresponding hospital stay, leading to unintended results. Fixing this requires restructuring the join logic, which is a global error.",
            "error_type": "Global: incorrect join relationship"
        },
        "0bc9f228d063f921a03fbc029e61db03": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "How many of the patients admitted before the year 2173 were diagnosed with ac salpingo-oophoritis? (Values: Ac salpingo-oophoritis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2173\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Ac salpingo-oophoritis\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.LONG_TITLE = 'Ac salpingo-oophoritis' AND DEMOGRAPHIC.ADMITYEAR < 2173;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses DIAGNOSES.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE and uses COUNT(*) instead of COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID). These are mistakes confined to specific clauses (SELECT and WHERE) and do not alter the overall join structure or logic of the query.",
            "error_type": "Local: column reference and aggregation error"
        },
        "ae3c7627e3ba7750105510476460e8e4": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "What is the age and primary disease of Steven Sepulveda?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"AGE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Steven Sepulveda\"",
            "pred_sql": "SELECT D.AGE, Diag.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID WHERE D.NAME = 'Steven Sepulveda';",
            "gold_answer": "[['64.0', 'CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA']]",
            "pred_answer": "[['64.0', 'Atrial fibrillation'], ['64.0', 'Cardiac complications, not elsewhere classified'], ['64.0', 'Coronary atherosclerosis of native coronary artery'], ['64.0', 'Other and unspecified hyperlipidemia'], ['64.0', 'Pulmonary collapse'], ['64.0', 'Unspecified essential hypertension'], ['64.0', 'Unspecified sleep apnea']]",
            "is_correct": false,
            "reasoning": "The predicted query introduces a JOIN to the DIAGNOSES table and selects LONG_TITLE, returning multiple diagnosis rows instead of using the single DIAGNOSIS field in DEMOGRAPHIC. This alters the overall logic and structure of the query by changing the source of the primary disease and producing multiple records rather than the one expected. Fixing this requires removing the JOIN and selecting from a different table, which is more than a local correction.",
            "error_type": "Global: incorrect-join"
        },
        "9fbf9e43a2cf665e8cf12a1fddb6f556": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the number of patients who were admitted in the year less that 2187 and had  (aorto)coronary bypass of one coronary artery (Values: (Aorto)coronary bypass of one coronary artery)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2187\" AND PROCEDURES.\"LONG_TITLE\" = \"(Aorto)coronary bypass of one coronary artery\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM PROCEDURES P INNER JOIN DEMOGRAPHIC D ON P.SUBJECT_ID = D.SUBJECT_ID AND P.HADM_ID = D.HADM_ID WHERE P.LONG_TITLE = '(aorto)coronary bypass of one coronary artery' AND D.ADMITYEAR < 2187;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a lowercase string '(aorto)coronary bypass of one coronary artery' in the WHERE clause, whereas the actual LONG_TITLE value in the data is '(Aorto)coronary bypass of one coronary artery' (with an uppercase 'A'). SQLite string comparisons are case-sensitive by default, so this mismatch causes the predicted query to return zero rows. The overall query structure, joins, and logic are correct; only the literal value is incorrectly formatted.",
            "error_type": "Local: value parsing error"
        },
        "ad68c7f37e76a07bb9df8bbd457cb210": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients year of birth is before 1821 with lab test lactate dehydrogenase csf? (Values: Lactate Dehydrogenase, CSF)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"1821\" AND LAB.\"LABEL\" = \"Lactate Dehydrogenase, CSF\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC D nJOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID AND D.HADM_ID = L.HADM_ID nWHERE D.DOB_YEAR < 1821 AND L.LABEL = 'Lactate Dehydrogenase' AND L.CATEGORY = 'CSF';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has two localized mistakes: a syntax error where newline markers (\u2018nFROM\u2019, \u2018nJOIN\u2019, \u2018nWHERE\u2019) are run together causing a parsing failure, and an incorrect WHERE predicate that splits the lab label into L.LABEL and L.CATEGORY instead of matching the single string \u201cLactate Dehydrogenase, CSF\u201d. Both issues are contained within a single clause and do not require restructuring the entire query.",
            "error_type": "Local: syntax and predicate error"
        },
        "0a1cd1f71bf51049de78aa0501e9fa78": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "What is the number of inpatient hospital admission patients who are less than 59 years old? (Values: TRANSFER FROM HOSP/EXTRAM)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRANSFER FROM HOSP/EXTRAM\" AND DEMOGRAPHIC.\"AGE\" < \"59\"",
            "pred_sql": "SELECT COUNT(DISTINCT HADM_ID) FROM DEMOGRAPHIC WHERE AGE < 59;",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['49.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the condition on ADMISSION_LOCATION (TRANSFER FROM HOSP/EXTRAM) that is required by the question. This is a mistake confined to the WHERE clause\u2014a missing filter\u2014without changing the overall query structure.",
            "error_type": "Local: missing WHERE filter"
        },
        "0be50e80b5be87e0c4395c5608c9d39f": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Look for the number of patients with the primary disease syncope telemetry who died in or before 2158. (Values: SYNCOPE;TELEMETRY, 2158.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"SYNCOPE;TELEMETRY\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2158.0\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN DIAGNOSES nON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID nAND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID nWHERE (DIAGNOSES.SHORT_TITLE LIKE '%SYNCOPE%' OR DIAGNOSES.LONG_TITLE LIKE '%TELEMETRY%') nAND DEMOGRAPHIC.EXPIRE_FLAG = 1 nAND DEMOGRAPHIC.DOD_YEAR <= 2158;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a simple syntax mistake: the alias prefix 'n' is inadvertently glued to keywords ('nFROM', 'nJOIN', 'nWHERE'), causing a parse error near 'DEMOGRAPHIC'. This is a local issue within the SELECT/JOIN syntax and can be fixed by inserting proper whitespace or removing the stray alias markers, without changing the overall query logic or structure.",
            "error_type": "Local: syntax error"
        },
        "819886e34e7e0c9516caa63edcf8fa0d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "count the number of patients born before 2065 who had prophylactic administration of vaccine against other diseases. (Values: Prophylactic administration of vaccine against other diseases)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2065\" AND PROCEDURES.\"LONG_TITLE\" = \"Prophylactic administration of vaccine against other diseases\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN PROCEDURES ON DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID nWHERE DEMOGRAPHIC.DOB_YEAR < 2065 nAND PROCEDURES.LONG_TITLE = 'Prophylactic administration of vaccine against other diseases';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces spurious \u2018n\u2019 prefixes (e.g., nFROM, nJOIN, nWHERE) instead of proper newlines or spaces, causing a SQLite syntax error near \u201cDEMOGRAPHIC\u201d. This mistake is confined to the FROM/JOIN clause formatting and does not alter the overall query logic or relationships. It can be fixed by correcting the syntax without restructuring the query.",
            "error_type": "Local: syntax error"
        },
        "aa81098d9bbd712002641a023ad9c2ed": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the number of government health insurance patients who had laryngoscopy and other tracheoscopy procedure? (Values: Government, Laryngoscopy and other tracheoscopy)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Government\" AND PROCEDURES.\"LONG_TITLE\" = \"Laryngoscopy and other tracheoscopy\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC D nJOIN PROCEDURES P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID nWHERE D.INSURANCE LIKE '%Government%' nAND P.LONG_TITLE LIKE '%Laryngoscopy and other tracheoscopy%';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to execute due to a syntax error: it includes 'nFROM' and 'nJOIN' instead of proper newline or space before FROM and JOIN, causing an immediate parse error near 'DEMOGRAPHIC'. This is a localized formatting/syntax mistake in the SELECT/JOIN clauses, not a fundamental mis\u2010join or logic flaw.",
            "error_type": "Local: syntax error"
        },
        "8404478c3a267757514125bb690cca23": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients admitted after year 2146 had ruq pain? (Values: RUQ PAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"RUQ PAIN\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2146\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN DIAGNOSES DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.ADMITYEAR > 2146 AND DI.LONG_TITLE LIKE '%RUQ Pain%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the intended logic by introducing a JOIN to the DIAGNOSES table (where the ground truth simply filters the DEMOGRAPHIC.DIAGNOSIS column). It also reverses the comparison on ADMITYEAR (uses > instead of <) and uses a LIKE filter on DI.LONG_TITLE rather than an exact match on DEMOGRAPHIC.DIAGNOSIS. These alterations span multiple clauses and require restructuring the query\u2019s logic, not just a minor fix in one clause.",
            "error_type": "Global: flawed join strategy"
        },
        "871de8f4f83e7d3c523502445f3264de": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the number of patients less than 56 years who have hyperglycemia primary disease? (Values: HYPERGLYCEMIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERGLYCEMIA\" AND DEMOGRAPHIC.\"AGE\" < \"56\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) FROM DEMOGRAPHIC JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID WHERE DEMOGRAPHIC.AGE < 56 AND DIAGNOSES.LONG_TITLE LIKE '%HYPERGLYCEMIA%';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s structure and logic by joining the DIAGNOSES table and filtering on DIAGNOSES.LONG_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column for the primary disease. This is not a simple syntax or alias issue but a wrong-table usage that changes the meaning of the query, requiring a significant restructuring to fix.",
            "error_type": "Global: incorrect table usage"
        },
        "838ee5f862ddd3c0a19da04e62ab6735": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Calculate the patients who are prescribed additive drugs that have stayed in the hospital for more than 5 days. (Values: ADDITIVE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"5\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"ADDITIVE\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM PRESCRIPTIONS nJOIN DEMOGRAPHIC ON PRESCRIPTIONS.SUBJECT_ID = DEMOGRAPHIC.SUBJECT_ID nAND PRESCRIPTIONS.HADM_ID = DEMOGRAPHIC.HADM_ID nWHERE PRESCRIPTIONS.DRUG_TYPE = 'ADDITIVE' nAND DEMOGRAPHIC.DAYS_STAY > 5",
            "gold_answer": "[['4.0']]",
            "pred_answer": "near \"PRESCRIPTIONS\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax mistake caused by missing spaces/newlines before the FROM and JOIN keywords (e.g., \u2018nFROM PRESCRIPTIONS nJOIN \u2026\u2019), which breaks the parser. This is a simple, localized syntax error in the query formatting rather than a logical or structural issue with table relationships or overall query intent. It can be fixed by adding the proper spaces/newlines around FROM/JOIN without altering the query\u2019s logic.",
            "error_type": "Local: syntax error"
        },
        "80493e9bb6cabefb1d4a01cb9851ea70": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "provide me the number of patients who have been diagnosed with chordae tendinae rupture. (Values: Chordae tendinae rupture)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Chordae tendinae rupture\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE = 'Chordae tendinae rupture';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the LONG_TITLE column instead of the correct SHORT_TITLE column to filter for \u201cChordae tendinae rupture.\u201d This is a mistake in a specific part of the WHERE clause (column reference), and it does not alter the overall structure or join logic of the query. Hence it is a localized error.",
            "error_type": "Local: column-reference error"
        },
        "05343cc69204da73ce2ac4ac4d8295bb": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Show me the number of black /haitian patients who were diagnosed under the diagnoses cicd9 code 2948. (Values: BLACK/HAITIAN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/HAITIAN\" AND DIAGNOSES.\"ICD9_CODE\" = \"2948\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) nFROM DEMOGRAPHIC d nJOIN DIAGNOSES diag nON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID nWHERE diag.ICD9_CODE = '2948' nAND d.ETHNICITY = 'BLACK/HAITIAN';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error due to the literal \u201cnFROM\u201d (and similar \u201cnJOIN\u201d) instead of a proper newline or space before FROM/JOIN, causing \"near \\\"DEMOGRAPHIC\\\": syntax error\". This is a localized syntax/formatting mistake and does not change the overall join logic or query structure.",
            "error_type": "Local: syntax error"
        },
        "0332dda726da118eaa7ca1bd76d407df": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the number of patients whose hospital stay is above 7 days and diagnosed with single ib in-hosp w cs? (Values: Single lb in-hosp w cs)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"7\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Single lb in-hosp w cs\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.DAYS_STAY > 7 AND DIAGNOSES.LONG_TITLE = 'Single lb in-hosp w cs'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL errs only in the SELECT and WHERE clauses: it filters by DIAGNOSES.LONG_TITLE instead of the correct SHORT_TITLE and uses COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID). These mistakes are confined to specific column references and aggregation functions and do not alter the overall join structure or query logic. Thus this is a Local error.",
            "error_type": "Local: column reference error"
        },
        "015ae8de2f7597f3308e719345bd99de": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients are admitted under emergency room and diagnosed with mixed acid-base bal dis? (Values: EMERGENCY ROOM ADMIT, Mixed acid-base bal dis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Mixed acid-base bal dis\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.ADMISSION_TYPE = 'EMERGENCY ROOM ADMIT' AND DI.LONG_TITLE = 'Mixed acid-base bal dis'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong columns in the WHERE clause: it filters on D.ADMISSION_TYPE instead of DEMOGRAPHIC.ADMISSION_LOCATION, and on DI.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE. The overall query structure (JOIN, COUNT DISTINCT) remains correct, so these are isolated column reference mistakes that can be fixed without altering the query\u2019s logic.",
            "error_type": "Local: table/column reference errors"
        },
        "809481545e7a77fa4c823f05ba51741b": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the total number of patients who had icd9 code 4105",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"4105\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '4105';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table instead of the PROCEDURES table and omits the join to DEMOGRAPHIC. This is not a simple typo or missing condition in one clause\u2014it changes the core tables and relationships used in the query, altering the intended logic of counting patients who had the ICD9 procedure code. Such an error affects the overall structure and meaning of the query and requires reworking multiple parts (table selection and joins).",
            "error_type": "Global: incorrect table used"
        },
        "0be089824bb2caf402523cf69d0356a8": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Tell me the procedure icd9 code and the primary disease for the patient Brian Brock.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",PROCEDURES.\"ICD9_CODE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Brian Brock\"",
            "pred_sql": "SELECT P.ICD9_CODE, D.LONG_TITLE n FROM DEMOGRAPHIC AS DEM n JOIN PROCEDURES AS P ON DEM.SUBJECT_ID = P.SUBJECT_ID AND DEM.HADM_ID = P.HADM_ID n JOIN DIAGNOSES AS D ON DEM.SUBJECT_ID = D.SUBJECT_ID AND DEM.HADM_ID = D.HADM_ID n WHERE DEM.NAME = 'Brian Brock';",
            "gold_answer": "[['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3613.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3615.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3893.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3961.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '8872.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9604.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9671.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9904.0']]",
            "pred_answer": "near \"n\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL includes stray 'n' tokens after aliases and column selections (e.g., 'DEM n', 'D.LONG_TITLE n'), causing a syntax error near 'n'. This is a localized mistake in aliasing/syntax rather than a logical issue spanning multiple clauses or tables.",
            "error_type": "Local: syntax error"
        },
        "8e72236c9b6a09cbcea0029a69a799d3": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the drug route of Ascorbic acid? (Values: Ascorbic Acid)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Ascorbic Acid\"",
            "pred_sql": "SELECT ROUTE FROM PRESCRIPTIONS WHERE DRUG = 'Ascorbic acid';",
            "gold_answer": "[['PO'], ['PO'], ['PO'], ['PO'], ['PO'], ['PO/NG'], ['PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has the correct structure and references but uses the literal 'Ascorbic acid' (lowercase 'a') instead of the exact value 'Ascorbic Acid'. This causes a case-sensitive mismatch in the WHERE clause, returning no rows. The error is confined to the literal value and does not affect the query\u2019s overall logic or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "a84dd2cf945be1cd6281de749bbe8da4": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "give  the number of patients whose primary disease is coronary artery disease/coronary artery bypass graft/sda and who were aged below 89 years. (Values: CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA\" AND DEMOGRAPHIC.\"AGE\" < \"89\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DIAGNOSIS IN ('CORONARY ARTERY DISEASE', 'CORONARY ARTERY BYPASS GRAFT', 'SDA') AND AGE < 89",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DIAGNOSIS IN ('CORONARY ARTERY DISEASE', 'CORONARY ARTERY BYPASS GRAFT', 'SDA') instead of matching the exact string 'CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA'. This is a mistake in the WHERE clause filter values, a localized error in the condition, not affecting the overall query structure or joins.",
            "error_type": "Local: incorrect value filter"
        },
        "8232941afbee51d5e20e7dcf879ee83f": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the number of patients who were born in or before 2132. (Values: 2132.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOD_YEAR\" <= \"2132.0\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DOB_YEAR <= 2132",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['77.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a different column (DOB_YEAR) than the ground truth (DOD_YEAR) and also omits the DISTINCT keyword in the COUNT. Both mistakes are confined to the SELECT/WHERE clause and do not affect the overall query structure or table relationships. Therefore, this is a localized error in column reference and aggregation.",
            "error_type": "Local: column reference error"
        },
        "9ead4702111aae4baa6cdeb38d1d16c9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "which patients are suffering from pneumonia primary disease? (Values: PNEUMONIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"PNEUMONIA\"",
            "pred_sql": "SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%PNEUMONIA%';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['14755.0'], ['16438.0'], ['25167.0'], ['28588.0'], ['4333.0'], ['4333.0'], ['62296.0'], ['813.0'], ['81923.0'], ['83678.0'], ['9271.0'], ['94762.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and matches on LONG_TITLE, whereas the ground truth query counts distinct SUBJECT_IDs in the DEMOGRAPHIC table filtered by the primary disease column DEMOGRAPHIC.DIAGNOSIS = 'PNEUMONIA'. The predicted query not only references the wrong table and column (DIAGNOSES.LONG_TITLE instead of DEMOGRAPHIC.DIAGNOSIS) but also changes the intended output from a count to a list of SUBJECT_IDs. This is a structural/logic-level error that alters the fundamental meaning of the query, not a minor localized mistake in a single clause.",
            "error_type": "Global: wrong-table-and-logic error"
        }
    },
    "o4-mini": {
        "a619d5854b9eb9d0df34539f613cafc4": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "give me the number of emergency hospital admission patients who had insertion of 2 vascular stents. (Values: EMERGENCY, Insert 2 vascular stents)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PROCEDURES.\"SHORT_TITLE\" = \"Insert 2 vascular stents\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) AS num_patients FROM demographic d JOIN procedures p ON d.subject_id = p.subject_id AND d.hadm_id = p.hadm_id WHERE d.admission_type = 'EMERGENCY' AND ( UPPER(p.long_title) LIKE '%INSERT%' AND UPPER(p.long_title) LIKE '%2%' AND UPPER(p.long_title) LIKE '%STENT%' );",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the WHERE clause\u2019s filter on the procedures table: it uses UPPER(p.long_title) LIKE conditions (and omits \u201cvascular\u201d) instead of matching PROCEDURES.SHORT_TITLE = 'Insert 2 vascular stents'. The join, aggregation, and table references are correct. This error is confined to a single clause (the filter), so it is a local error.",
            "error_type": "Local: incorrect WHERE clause filter"
        },
        "8e3b5d9cb36613efab2204fe93749087": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "for patient id 30011, specify the icd9 code and procedural long title.",
            "gold_sql": "SELECT DIAGNOSES.\"LONG_TITLE\",PROCEDURES.\"LONG_TITLE\" FROM DIAGNOSES INNER JOIN PROCEDURES on DIAGNOSES.HADM_ID = PROCEDURES.HADM_ID WHERE DIAGNOSES.\"SUBJECT_ID\" = \"30011\"",
            "pred_sql": "SELECT ICD9_CODE, LONG_TITLE FROM PROCEDURES WHERE SUBJECT_ID = 30011;",
            "gold_answer": "[['Cerebral aneurysm, nonruptured', 'Arteriography of cerebral arteries'], ['Cerebral aneurysm, nonruptured', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Osteoporosis, unspecified', 'Arteriography of cerebral arteries'], ['Osteoporosis, unspecified', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Other and unspecified hyperlipidemia', 'Arteriography of cerebral arteries'], ['Other and unspecified hyperlipidemia', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Pure hypercholesterolemia', 'Arteriography of cerebral arteries'], ['Pure hypercholesterolemia', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Unspecified essential hypertension', 'Arteriography of cerebral arteries'], ['Unspecified essential hypertension', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Unspecified gastritis and gastroduodenitis, without mention of hemorrhage', 'Arteriography of cerebral arteries'], ['Unspecified gastritis and gastroduodenitis, without mention of hemorrhage', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['Urinary tract infection, site not specified', 'Arteriography of cerebral arteries'], ['Urinary tract infection, site not specified', 'Endovascular (total) embolization or occlusion of head and neck vessels']]",
            "pred_answer": "[['3972.0', 'Endovascular (total) embolization or occlusion of head and neck vessels'], ['8841.0', 'Arteriography of cerebral arteries']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only queries the PROCEDURES table and selects its ICD9_CODE and LONG_TITLE, completely omitting the DIAGNOSES table and the necessary join on HADM_ID. Correctly answering the question requires combining information from DIAGNOSES and PROCEDURES (a multi-table join). This mistake affects the overall logic and structure of the query, not just a single clause.",
            "error_type": "Global: missing join/table logic error"
        },
        "9599d62908a1ba42d4b04b1abcdbe094": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "How many patients have a diphenhydramine prescription? (Values: DiphenhydrAMINE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"DiphenhydrAMINE\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM PRESCRIPTIONS WHERE UPPER(DRUG) LIKE '%DIPHENHYDRAMINE%';",
            "gold_answer": "[['12.0']]",
            "pred_answer": "[['17.0']]",
            "is_correct": false,
            "reasoning": "The only difference is in the WHERE clause. The ground truth uses an exact match PRESCRIPTIONS.DRUG = \"DiphenhydrAMINE\", whereas the prediction uses UPPER(DRUG) LIKE '%DIPHENHYDRAMINE%', pulling in extra rows. The overall join and counting logic is fine; this is a localized mistake in the filter condition.",
            "error_type": "Local: value parsing error"
        },
        "895ca1f9dc05114c51436d4c28f1dbaa": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "specify complete details of icd9 code 4591",
            "gold_sql": "SELECT PROCEDURES.\"SHORT_TITLE\",PROCEDURES.\"LONG_TITLE\" FROM PROCEDURES WHERE PROCEDURES.\"ICD9_CODE\" = \"4591\"",
            "pred_sql": "SELECT * FROM DIAGNOSES WHERE ICD9_CODE = '4591';",
            "gold_answer": "[['Sm-to-sm bowel anastom', 'Small-to-small intestinal anastomosis']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the DIAGNOSES table instead of the PROCEDURES table specified by the question. This is a simple table reference error: changing the table name to PROCEDURES would fix the query without altering its overall structure or logic.",
            "error_type": "Local: table reference error"
        },
        "aaaa09cf4ad17119ae748e7b44f1d7c6": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients had the procedure under icd9 code 41?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"41\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM PROCEDURES WHERE ICD9_CODE LIKE '41%';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error lies solely in the WHERE clause: it uses LIKE '41%' to match any code starting with 41, instead of an exact match = '41'. This overcounts and produces the wrong result. No overall query structure or joins need to change; it\u2019s a localized mistake in the condition.",
            "error_type": "Local: value-parsing error"
        },
        "a74c0d773d66fce959ed3490677faac0": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "which patients had open and other left hemicolectomy? (Values: Opn lft hemicolectmy NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"Opn lft hemicolectmy NEC\"",
            "pred_sql": "SELECT DISTINCT d.SUBJECT_ID, d.HADM_ID, d.NAME FROM PROCEDURES p JOIN DEMOGRAPHIC d ON p.SUBJECT_ID = d.SUBJECT_ID AND p.HADM_ID = d.HADM_ID WHERE p.SHORT_TITLE = 'Opn lft hemicolectmy NEC';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['81085.0', '169845.0', 'Thomas Jeanbaptiste']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the correct tables, JOIN keys, and WHERE filter but returns individual SUBJECT_ID, HADM_ID, and NAME instead of the COUNT(DISTINCT SUBJECT_ID) required. The mistake is isolated to the SELECT clause (wrong aggregation and columns), without altering the overall query structure or relationships.",
            "error_type": "Local: incorrect-select error"
        },
        "085c097c30ee27375d301af1413d2f6a": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "out of total number of patients who had endoscopic insertion of stent (tube) into bile duct, calculate the number of those aged below 45 (Values: Endoscopic insertion of stent (tube) into bile duct)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"45\" AND PROCEDURES.\"LONG_TITLE\" = \"Endoscopic insertion of stent (tube) into bile duct\"",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS total_patients_with_stent, COUNT(DISTINCT CASE WHEN d.age < 45 THEN p.subject_id END) AS patients_below_45 FROM procedures p JOIN demographic d ON p.subject_id = d.subject_id AND p.hadm_id = d.hadm_id WHERE p.long_title = 'Endoscopic insertion of stent (tube) into bile duct';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0', '0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the correct join and filtering logic for the procedure and age, but it returns two aggregate columns (total patients and patients below 45) instead of the single count of patients below age 45 as requested. This is a mistake confined to the SELECT clause and does not change the overall query structure or table relationships.",
            "error_type": "Local: select-clause error"
        },
        "86f305efdfde01b908615d3b271a35ad": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients were treated with diltiazem? (Values: Diltiazem)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Diltiazem\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM PRESCRIPTIONS WHERE LOWER(DRUG) LIKE '%diltiazem%';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the WHERE clause: it uses LOWER(DRUG) LIKE '%diltiazem%' instead of an exact match on DRUG = 'Diltiazem'. This is a localized mistake in the filter condition, not affecting the overall structure or joins in the query. Thus, it is a Local error (value parsing/filter condition error).",
            "error_type": "Local: value parsing error"
        },
        "04221395dc9c6a9e5c880d1dc78a5031": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients are discharged from disch tran to psych hosp that are under age 77? (Values: DISCH-TRAN TO PSYCH HOSP)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"DISCH-TRAN TO PSYCH HOSP\" AND DEMOGRAPHIC.\"AGE\" < \"77\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC WHERE DISCHARGE_LOCATION LIKE 'DISC-TRAN%PSYCH%' AND AGE < 77;",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the ground truth and the predicted SQL is in the WHERE clause\u2019s string matching: the ground truth uses an exact match on DISCHARGE_LOCATION = 'DISCH-TRAN TO PSYCH HOSP', while the predicted SQL uses a LIKE pattern 'DISC-TRAN%PSYCH%'. This incorrect pattern causes no rows to match. The issue is confined to the value literal and matching method in the WHERE clause and does not alter the overall query structure or joins. Therefore, it is a localized error in the WHERE clause.",
            "error_type": "Local: value parsing/matching error"
        },
        "02984008762913113973753bce29760d": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "which patients have prescription for spironolactone medication? (Values: Spironolactone)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Spironolactone\"",
            "pred_sql": "SELECT DISTINCT d.SUBJECT_ID, d.HADM_ID, d.NAME FROM PRESCRIPTIONS p JOIN DEMOGRAPHIC d ON p.SUBJECT_ID = d.SUBJECT_ID AND p.HADM_ID = d.HADM_ID WHERE LOWER(p.DRUG) LIKE '%spironolactone%';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['18480.0', '156404.0', 'Brian Taylor'], ['6983.0', '173511.0', 'Edward Castillo'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman'], ['91588.0', '173731.0', 'Elizabeth Bateman']]",
            "is_correct": false,
            "reasoning": "The predicted SQL returns SUBJECT_ID, HADM_ID, and NAME instead of a single COUNT of distinct SUBJECT_ID as required. It also uses a LIKE filter rather than an exact match and thus completely changes the query\u2019s structure and intended output. Fixing this is not a localized tweak but requires rewriting the SELECT and WHERE clauses and adding the aggregation, so it is a global structural error.",
            "error_type": "Global"
        },
        "060151fe7a66f01cf5811b6efdc73538": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "find the number of patients who were ordered alanine aminotransferase (alt) lab test. (Values: Alanine Aminotransferase (ALT))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"LABEL\" = \"Alanine Aminotransferase (ALT)\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM LAB WHERE LABEL ILIKE '%alanine aminotransferase%';",
            "gold_answer": "[['51.0']]",
            "pred_answer": "near \"ILIKE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the ILIKE operator, which is not supported in SQLite, resulting in a near \"ILIKE\": syntax error. This is a localized syntax issue within the WHERE clause and does not affect the overall query structure or intended relationships between tables.",
            "error_type": "Local: syntax error"
        },
        "94d03397b2b5cc027c508ab22c8e216e": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients born before the year 2104 were given the drug vancomycin? (Values: Vancomycin)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2104\" AND PRESCRIPTIONS.\"DRUG\" = \"Vancomycin\"",
            "pred_sql": "SELECT COUNT(DISTINCT p.SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC d JOIN PRESCRIPTIONS p ON d.SUBJECT_ID = p.SUBJECT_ID AND d.HADM_ID = p.HADM_ID WHERE d.DOB_YEAR < 2104 AND UPPER(p.DRUG) LIKE '%VANCOMYCIN%';",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['22.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake lies in the WHERE clause: it uses UPPER(p.DRUG) LIKE '%VANCOMYCIN%' instead of an exact match p.DRUG = 'Vancomycin'. This causes over\u2010matching (e.g., 'Vancomycin Phosphate') and inflates the count. The overall query structure (joins, aggregation) remains correct. This is a localized condition error that can be fixed by changing the comparison operator, so it\u2019s a Local error.",
            "error_type": "Local: incorrect value condition"
        },
        "a7f0f51a1b7298fc9b7c2f8214ee4b19": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients diagnosed with chronic diastolic heart failure were hospitalized for more than 30 days? (Values: Chr diastolic hrt fail)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"30\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Chr diastolic hrt fail\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC d JOIN DIAGNOSES dx ON d.SUBJECT_ID = dx.SUBJECT_ID AND d.HADM_ID = dx.HADM_ID WHERE d.DAYS_STAY > 30 AND dx.LONG_TITLE ILIKE '%chronic diastolic heart failure%';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"ILIKE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the ILIKE operator, which is not supported in SQLite, leading to a syntax error. This mistake is localized to the WHERE clause (an operator/syntax issue) and can be fixed without restructuring the overall query, thus it is a local error.",
            "error_type": "Local: syntax error"
        },
        "92fb353d2314296f863997a8c3222e89": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "specify the admission type and icd9 code of patient id 91588",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"91588\"",
            "pred_sql": "SELECT d.ADMISSION_TYPE, diag.ICD9_CODE FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.SUBJECT_ID = 91588;",
            "gold_answer": "[['EMERGENCY', 'Ac on chr syst hrt fail'], ['EMERGENCY', 'CHF NOS'], ['EMERGENCY', 'Chronic sinusitis NOS'], ['EMERGENCY', 'Cyclic neutropenia'], ['EMERGENCY', 'Encephalopathy NOS'], ['EMERGENCY', 'Enterococcus group d'], ['EMERGENCY', 'Fever in other diseases'], ['EMERGENCY', 'Hypothyroidism NOS'], ['EMERGENCY', 'Myelodysplastic synd NOS'], ['EMERGENCY', 'Oth lymp unsp xtrndl org'], ['EMERGENCY', 'Pericardial disease NOS'], ['EMERGENCY', 'Shock NOS'], ['EMERGENCY', 'Thrombocytopenia NOS'], ['EMERGENCY', 'Trnspl status-bne marrow'], ['EMERGENCY', 'Urin tract infection NOS']]",
            "pred_answer": "[['EMERGENCY', '20280.0'], ['EMERGENCY', '23875.0'], ['EMERGENCY', '2449.0'], ['EMERGENCY', '2875.0'], ['EMERGENCY', '28802.0'], ['EMERGENCY', '34830.0'], ['EMERGENCY', '4104.0'], ['EMERGENCY', '4239.0'], ['EMERGENCY', '4280.0'], ['EMERGENCY', '42823.0'], ['EMERGENCY', '4739.0'], ['EMERGENCY', '5990.0'], ['EMERGENCY', '78061.0'], ['EMERGENCY', '78550.0'], ['EMERGENCY', 'V4281']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly joins DEMOGRAPHIC and DIAGNOSES on SUBJECT_ID and HADM_ID and filters by the correct patient ID. The only mistake is in the SELECT clause: it returns ICD9_CODE instead of the required SHORT_TITLE. This is a localized error in selecting the wrong column, without affecting the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "06309c2f77f11d404ae955cf4e39d47b": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "provide the number of patients whose diagnosis short title is dmi keto nt st uncontrolled and lab test category is blood gas. (Values: DMI keto nt st uncntrld, Blood Gas)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"DMI keto nt st uncntrld\" AND LAB.\"CATEGORY\" = \"Blood Gas\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) AS num_patients FROM DIAGNOSES d JOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID AND d.HADM_ID = l.HADM_ID WHERE d.SHORT_TITLE = 'DMI KETO NT ST UNCNTRLD' AND l.CATEGORY = 'Blood Gas';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally correct\u2014it joins the same tables (DIAGNOSES and LAB) on the correct keys and counts distinct SUBJECT_ID. The only difference is that the predicted query uses a differently cased string literal for SHORT_TITLE ('DMI KETO NT ST UNCNTRLD' instead of 'DMI keto nt st uncntrld'), causing no rows to match under SQLite\u2019s default case-sensitive comparison. This is a localized issue in the WHERE clause and can be fixed by adjusting the literal. Therefore it is a Local error.",
            "error_type": "Local: value parsing error"
        },
        "099eef47c1de13d69881354a8d6bcf98": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "give me the number of patients whose death status is 0 and diagnoses title is bph w/o urinaryobs/luts. (Values: BPH w/o urinary obs/LUTS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"SHORT_TITLE\" = \"BPH w/o urinary obs/LUTS\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.EXPIRE_FLAG = 0 AND LOWER(diag.SHORT_TITLE) = 'bph w/o urinaryobs/luts';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only mistake in the predicted SQL is in the WHERE clause literal: they used LOWER(diag.SHORT_TITLE) = 'bph w/o urinaryobs/luts' (missing the space between 'urinary' and 'obs') so it doesn\u2019t match the actual SHORT_TITLE value 'BPH w/o urinary obs/LUTS'. Everything else (JOIN conditions, COUNT, filter on EXPIRE_FLAG) follows the intended structure. This is a localized error in the value filter, not a structural or logical join mistake.",
            "error_type": "Local: value parsing error"
        },
        "9dc0838fea8ceebde01fd608c434210c": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "which patients have buccal route of drug administration? (Values: BUCCAL)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"ROUTE\" = \"BUCCAL\"",
            "pred_sql": "SELECT DISTINCT d.SUBJECT_ID, d.HADM_ID, d.NAME FROM PRESCRIPTIONS p JOIN DEMOGRAPHIC d ON p.SUBJECT_ID = d.SUBJECT_ID AND p.HADM_ID = d.HADM_ID WHERE p.ROUTE = 'BUCCAL';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3623.0', '178444.0', 'Joseph Dillman'], ['3623.0', '178444.0', 'Joseph Dillman'], ['3623.0', '178444.0', 'Joseph Dillman'], ['3623.0', '178444.0', 'Joseph Dillman']]",
            "is_correct": false,
            "reasoning": "The predicted SQL returns individual patient-admission rows (including NAME and HADM_ID) instead of the count of distinct patients as required. It omits the COUNT(DISTINCT) aggregation and selects the wrong columns, fundamentally altering the query\u2019s purpose. This mistake spans both the SELECT clause and absence of aggregation, requiring more than a localized fix.",
            "error_type": "Global: incorrect aggregation and select columns"
        }
    },
    "dinsql": {
        "8434959e3330dc710ffe491da1f8652a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "familiarize me with the death status and primary disease of the patient joseph dillman. (Values: Joseph Dillman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"EXPIRE_FLAG\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Joseph Dillman\"",
            "pred_sql": "SELECT D.EXPIRE_FLAG, DI.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.NAME = 'Joseph Dillman'",
            "gold_answer": "[['1.0', 'PNEUMONIA']]",
            "pred_answer": "[['1.0', 'Acute kidney failure, unspecified'], ['1.0', 'Acute respiratory failure'], ['1.0', 'Acute venous embolism and thrombosis of deep vessels of proximal lower extremity'], ['1.0', 'Aftercare for healing traumatic fracture of lower leg'], ['1.0', 'Anxiety state, unspecified'], ['1.0', 'Asthma, unspecified type, unspecified'], ['1.0', 'Constipation, unspecified'], ['1.0', 'Depressive disorder, not elsewhere classified'], ['1.0', 'Diaphragmatic hernia without mention of obstruction or gangrene'], ['1.0', 'Enthesopathy of hip region'], ['1.0', 'Neurogenic bladder NOS'], ['1.0', 'Other chronic pain'], ['1.0', 'Other pulmonary insufficiency, not elsewhere classified'], ['1.0', 'Other specified disorders of esophagus'], ['1.0', 'Paraplegia'], ['1.0', 'Personal history, urinary (tract) infection'], ['1.0', 'Pneumonitis due to inhalation of food or vomitus'], ['1.0', 'Polyp of nasal cavity'], ['1.0', 'Postinflammatory pulmonary fibrosis'], ['1.0', 'Pressure ulcer, buttock'], ['1.0', 'Pressure ulcer, hip'], ['1.0', 'Pressure ulcer, lower back'], ['1.0', 'Pressure ulcer, stage I'], ['1.0', 'Pressure ulcer, stage IV'], ['1.0', 'Pressure ulcer, unstageable'], ['1.0', 'Unspecified osteomyelitis, other specified sites'], ['1.0', 'Unspecified protein-calorie malnutrition']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and selects DI.LONG_TITLE, thereby returning all diagnosis records for Joseph Dillman rather than the single primary disease stored in the DEMOGRAPHIC.DIAGNOSIS column. This changes the overall query logic and structure, producing multiple rows and incorrect results instead of the intended one-row result. The error is not a minor typo or syntax issue; it fundamentally alters the query\u2019s meaning by joining the wrong table and selecting the wrong field.",
            "error_type": "Global: incorrect join logic"
        },
        "a6cecf07aa5066f7bc97d4afef5e4767": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the minimum days for which patients with brain mass; intracranial hemorrhage were hospitalized? (Values: BRAIN MASS;INTRACRANIAL HEMORRHAGE)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BRAIN MASS;INTRACRANIAL HEMORRHAGE\"",
            "pred_sql": "SELECT MIN(D.DAYS_STAY) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.HADM_ID = DI.HADM_ID WHERE DI.LONG_TITLE = 'BRAIN MASS",
            "gold_answer": "[['4.0']]",
            "pred_answer": "unrecognized token: \"'BRAIN MASS\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to a malformed string literal in the WHERE clause (missing closing quote and incomplete value) leading to an unrecognized token error. This is a localized syntax/value parsing mistake that does not require restructuring the overall query logic. It can be fixed by correctly quoting and completing the literal.",
            "error_type": "Local: value parsing error"
        },
        "abd26d62b8557ca1e9111eb9ffe88c4a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many male patients had the lab test name potassium, whole blood? (Values: M, Potassium, Whole Blood)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND LAB.\"LABEL\" = \"Potassium, Whole Blood\"",
            "pred_sql": "SELECT COUNT(DISTINCT L.SUBJECT_ID) FROM LAB AS L JOIN DEMOGRAPHIC AS D ON L.SUBJECT_ID = D.SUBJECT_ID AND L.HADM_ID = D.HADM_ID WHERE D.GENDER = 'M' AND L.LABEL = 'Potassium' AND L.FLUID = 'Whole Blood'",
            "gold_answer": "[['24.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly splits the lab test name into two conditions (LABEL = 'Potassium' AND FLUID = 'Whole Blood') instead of matching the single LABEL value 'Potassium, Whole Blood'. This is a mistake confined to the WHERE clause value parsing and does not change the overall query structure or join logic.",
            "error_type": "Local: value parsing error"
        },
        "977ecca508723a7bb321e261d6c47f72": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the diagnosis of Tracy Farmer?",
            "gold_sql": "SELECT DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Tracy Farmer\"",
            "pred_sql": "SELECT DISTINCT d.LONG_TITLE FROM DEMOGRAPHIC dem JOIN DIAGNOSES d ON dem.SUBJECT_ID = d.SUBJECT_ID AND dem.HADM_ID = d.HADM_ID WHERE dem.NAME = 'Tracy Farmer'",
            "gold_answer": "[['Infant diabet mother syn'], ['Need prphyl vc vrl hepat'], ['Single lb in-hosp w cs'], ['Ventricular sept defect']]",
            "pred_answer": "[['Need for prophylactic vaccination and inoculation against viral hepatitis'], ['Single liveborn, born in hospital, delivered by cesarean section'], ['Syndrome of \"infant of a diabetic mother\"'], ['Ventricular septal defect']]",
            "is_correct": false,
            "reasoning": "The predicted query differs only in the SELECT clause: it retrieves LONG_TITLE instead of SHORT_TITLE. The JOIN and WHERE conditions correctly mirror the ground truth logic. Since the error is limited to the chosen column in SELECT and does not affect the overall query structure or table relationships, it is a local error.",
            "error_type": "Local: incorrect column reference"
        },
        "03ad1ed58197cc476c639d7af7c662a3": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Calculate the total number of patients who had transient ischemic attack as their primary disease. (Values: TRANSIENT ISCHEMIC ATTACK)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"TRANSIENT ISCHEMIC ATTACK\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE LIKE '%TRANSIENT ISCHEMIC ATTACK%' OR SHORT_TITLE LIKE '%TRANSIENT ISCHEMIC ATTACK%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the DIAGNOSES table instead of the DEMOGRAPHIC table, thereby using the wrong source for the 'primary disease' field. This is a localized mistake in table reference rather than a fundamental restructuring of the query logic.",
            "error_type": "Local: table reference error"
        },
        "ac68bab7e7f4cd79ac543b18d76d24d3": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Calulate the total number of patients who had icd9 code 5771",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"5771\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '5771'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table instead of the PROCEDURES table as in the ground truth. This is not a simple typo but changes the entire logic by querying the wrong source of ICD9 codes, resulting in a different patient count. Fixing this requires altering the overall query structure to join the correct table, so it\u2019s a global error.",
            "error_type": "Global: wrong-table logic"
        },
        "04e471a143d212d6e201d787eea68d7d": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients used the drug name capso fungin? (Values: Caspofungin)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Caspofungin\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PRESCRIPTIONS WHERE DRUG = 'Capso fungin'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL misuses the drug name literal (\u201cCapso fungin\u201d instead of the exact value \u201cCaspofungin\u201d required by the question). This is a localized mistake in the WHERE clause value rather than a change to the overall query structure or logic. Therefore it\u2019s a local error of incorrect literal value parsing.",
            "error_type": "Local: value parsing error"
        },
        "9638fb767e2ce81c243568cce6a0d6fe": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Which patients are medicated via tp route? (Values: TP)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"ROUTE\" = \"TP\"",
            "pred_sql": "SELECT DISTINCT SUBJECT_ID, HADM_ID FROM PRESCRIPTIONS WHERE ROUTE = 'TP'",
            "gold_answer": "[['24.0']]",
            "pred_answer": "[['14755.0', '151669.0'], ['14755.0', '151669.0'], ['17570.0', '183034.0'], ['17570.0', '183034.0'], ['17570.0', '183034.0'], ['22377.0', '120859.0'], ['28588.0', '141664.0'], ['29541.0', '103957.0'], ['29541.0', '103957.0'], ['29961.0', '196409.0'], ['29961.0', '196409.0'], ['29961.0', '196409.0'], ['29961.0', '196409.0'], ['3623.0', '178444.0'], ['3623.0', '178444.0'], ['3623.0', '178444.0'], ['3623.0', '178444.0'], ['3623.0', '178444.0'], ['4333.0', '155027.0'], ['4333.0', '155027.0'], ['4333.0', '155027.0'], ['4333.0', '155027.0'], ['4333.0', '155027.0'], ['4333.0', '155027.0'], ['4589.0', '101114.0'], ['4589.0', '101114.0'], ['4589.0', '101114.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['52012.0', '146937.0'], ['53707.0', '195809.0'], ['62296.0', '193576.0'], ['62296.0', '193576.0'], ['62296.0', '193576.0'], ['65982.0', '141119.0'], ['65982.0', '141119.0'], ['65982.0', '141119.0'], ['66411.0', '178264.0'], ['67965.0', '145657.0'], ['67965.0', '145657.0'], ['71798.0', '122790.0'], ['73843.0', '158983.0'], ['73843.0', '158983.0'], ['83678.0', '172089.0'], ['84129.0', '161253.0'], ['84129.0', '161253.0'], ['8440.0', '137021.0'], ['8440.0', '137021.0'], ['8440.0', '137021.0'], ['8990.0', '113207.0'], ['8990.0', '113207.0'], ['8990.0', '113207.0'], ['8990.0', '113207.0'], ['91588.0', '173731.0'], ['91588.0', '173731.0'], ['94762.0', '160707.0'], ['94762.0', '160707.0'], ['94762.0', '160707.0'], ['98220.0', '189416.0'], ['98220.0', '189416.0'], ['98220.0', '189416.0']]",
            "is_correct": false,
            "reasoning": "The predicted query selects SUBJECT_ID and HADM_ID directly from PRESCRIPTIONS without joining DEMOGRAPHIC and without performing the COUNT DISTINCT aggregation on SUBJECT_ID. This is a structural change: instead of returning a single count of unique patients, it returns many rows of subject\u2010hadm combinations. Fixing this is not a localized tweak (e.g., correcting a column name or syntax); it requires adding the join, changing the SELECT to COUNT(DISTINCT ...), and removing HADM_ID. Therefore, it\u2019s a global error in query structure and logic.",
            "error_type": "Global: structural aggregation error"
        },
        "a95ae3ec84a4478fe2ba16078b12904a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "find the female patients who have procedure icd9 code 3727. (Values: F)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND PROCEDURES.\"ICD9_CODE\" = \"3727\"",
            "pred_sql": "SELECT DISTINCT D.SUBJECT_ID, D.NAME FROM DEMOGRAPHIC AS D INNER JOIN PROCEDURES AS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.GENDER = 'F' AND P.ICD9_CODE = '3727'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['9575.0', 'Melinda Miers']]",
            "is_correct": false,
            "reasoning": "The predicted query uses SELECT DISTINCT D.SUBJECT_ID, D.NAME instead of SELECT COUNT(DISTINCT SUBJECT_ID). The JOIN and WHERE conditions are correct, but the projection/aggregation is wrong. This is an error confined to the SELECT clause and does not require changing the overall query structure or logical flow.",
            "error_type": "Local: select clause error"
        },
        "934102ea4b2c1a74fa970bf8adf637c9": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "count the number of patients whose item id is 51256.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51256\"",
            "pred_sql": "COUNT(DISTINCT SUBJECT_ID) FROM LAB WHERE ITEMID = 51256",
            "gold_answer": "[['74.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the SELECT keyword and so is syntactically invalid (it simply starts with COUNT(\u2026) without a SELECT). This is a minor, localized syntax mistake in the SELECT clause and does not affect the overall query structure or table relationships. It can be fixed by adding the missing SELECT, so it\u2019s a Local error.",
            "error_type": "Local: syntax error"
        },
        "075dfde202da217a38664c06dfb7fc1d": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the maximum number of days of hospital stay for patients with hypertension but not coronary artery disease/cardiac cath? (Values: HYPERTENSION;RULE OUT CORONARY ARTERY DISEASE\\CARDIAC CATH)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERTENSION;RULE OUT CORONARY ARTERY DISEASE\\CARDIAC CATH\"",
            "pred_sql": "SELECT MAX(D.DAYS_STAY) AS max_days_stay FROM DEMOGRAPHIC D WHERE D.SUBJECT_ID IN ( SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%hypertension%' ) AND D.SUBJECT_ID NOT IN ( SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%coronary artery disease%' OR LONG_TITLE LIKE '%cardiac cath%' )",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['69.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely restructures the logic by querying the DIAGNOSES table and using SUBJECT_ID-level LIKE filters instead of directly filtering the DEMOGRAPHIC.DIAGNOSIS column for the exact composite value. It spans multiple clauses (subqueries on DIAGNOSES, SUBJECT_ID filtering) and fundamentally changes the intended meaning of the query. This requires restructuring the query flow, not just fixing a local mistake.",
            "error_type": "Global: join/logic error"
        },
        "0709ceb7a5a37d731d4c3fb50129fc8b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "tell me the time of admission and primary disease of patient bessie giordano. (Values: Bessie Giordano)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Bessie Giordano\"",
            "pred_sql": "SELECT T1.ADMITTIME, T2.LONG_TITLE FROM DEMOGRAPHIC AS T1 INNER JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID AND T1.HADM_ID = T2.HADM_ID WHERE T1.NAME = 'Bessie Giordano'",
            "gold_answer": "[['NEWBORN', '2123-02-24 17:12:00']]",
            "pred_answer": "[['2123-02-24 17:12:00', 'Need for prophylactic vaccination and inoculation against viral hepatitis'], ['2123-02-24 17:12:00', 'Observation for suspected genetic or metabolic condition'], ['2123-02-24 17:12:00', 'Observation for suspected infectious condition'], ['2123-02-24 17:12:00', 'Single liveborn, born in hospital, delivered by cesarean section']]",
            "is_correct": false,
            "reasoning": "The user\u2019s question requests the admission time and the primary disease, which in this schema is stored in the DEMOGRAPHIC.DIAGNOSIS column. The ground truth query selects DEMOGRAPHIC.DIAGNOSIS and DEMOGRAPHIC.ADMITTIME directly. The predicted query instead joins to DIAGNOSES and selects DIAGNOSES.LONG_TITLE, yielding multiple rows of secondary diagnoses rather than the single primary diagnosis field. This is not a simple column reference mistake but a fundamentally incorrect join strategy that changes the query\u2019s logic and intended result. It requires restructuring the query to remove the join and select the correct column from DEMOGRAPHIC, so it is a Global error.",
            "error_type": "Global: join error"
        },
        "9268ab37a2d7fdb001795bef1e30dbe2": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "tell me the number of patients with coronary artery primary disease. (Values: CORONARY ARTERY DISEASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CORONARY ARTERY DISEASE\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DEMOGRAPHIC WHERE DIAGNOSIS LIKE '%CORONARY ARTERY DISEASE%'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['7.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error lies solely in the WHERE clause: it uses a LIKE '%CORONARY ARTERY DISEASE%' pattern match instead of the exact equality comparison required. This mistake broadens the match and inflates the count but does not alter the query\u2019s overall structure, joins, or logic. It can be corrected by replacing LIKE with =, making it a localized fix.",
            "error_type": "Local: incorrect operator in WHERE clause"
        },
        "0cb83c2bc10e573ed3e25a658d8318e4": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what number of patients have been diagnosed with hyperglycemia? (Values: HYPERGLYCEMIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERGLYCEMIA\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE LIKE '%HYPERGLYCEMIA%'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and filters on LONG_TITLE with a LIKE condition, while the ground truth uses the DEMOGRAPHIC table\u2019s DIAGNOSIS column for an exact match. This is not a simple fix within a clause\u2014it fundamentally changes which table is queried and how diagnoses are counted, altering the overall query logic.",
            "error_type": "Global: incorrect table selection"
        },
        "0a7ebbf361bf96cd1fcfb073368908b5": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "find the primary disease of robert clayton. (Values: Robert Clayton)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Robert Clayton\"",
            "pred_sql": "SELECT d.LONG_TITLE FROM DEMOGRAPHIC dem JOIN DIAGNOSES d ON dem.SUBJECT_ID = d.SUBJECT_ID AND dem.HADM_ID = d.HADM_ID WHERE dem.NAME = 'Robert Clayton' GROUP BY d.LONG_TITLE ORDER BY COUNT(*) DESC LIMIT 1",
            "gold_answer": "[['ATRIAL FIBRILLATION\\\\THORACOSCOPIC MAZE PROCEDURE BILATERAL/SDA']]",
            "pred_answer": "[['Personal history of surgery to other organs']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and aggregates LONG_TITLE via a JOIN and GROUP BY, whereas the ground truth simply selects the DIAGNOSIS column from the DEMOGRAPHIC table. This is not a minor syntax or column reference mistake but a fundamentally different strategy: it joins an unrelated table, groups results, and picks the most frequent LONG_TITLE rather than directly retrieving the DEMOGRAPHIC.DIAGNOSIS. This alters the intended logic and table relationships, requiring a restructuring of the query beyond a single clause fix.",
            "error_type": "Global: join error"
        },
        "8e58c53ed2c7d9da3e3d7bb4d73cfdbe": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "find out mor10l medications type and route of administration. (Values: MOR10L)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"DRUG_TYPE\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"MOR10L\"",
            "pred_sql": "SELECT DISTINCT DRUG_TYPE, ROUTE FROM PRESCRIPTIONS WHERE DRUG = 'MOR10L'",
            "gold_answer": "[['MAIN', 'PO'], ['MAIN', 'PO'], ['MAIN', 'PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DRUG column in the WHERE clause instead of the FORMULARY_DRUG_CD column specified by the question. This is a column reference error in a single clause and does not require changing the overall structure or logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "9b7f5c51ee9fdcfbba316fb48e8e6722": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the drug route and drug code of the drug succinylcoline? (Values: Succinylcholine)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Succinylcholine\"",
            "pred_sql": "SELECT ROUTE, FORMULARY_DRUG_CD FROM PRESCRIPTIONS WHERE DRUG = 'Succinylcoline'",
            "gold_answer": "[['SUCC20I', 'IV']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong literal value 'Succinylcoline' instead of the correct 'Succinylcholine'. This is a localized error in the WHERE clause (a value parsing/literal mismatch) and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "844e56db40ebdffe688da456b2860342": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Provide me the number of patients born in or before 2179 who have left internal jugular vein thrombosis; left arm edema primary disease. (Values: LEFT INTERNAL JUGULAR VEIN THROMBOSIS;LEFT ARM EDEMA, 2179.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"LEFT INTERNAL JUGULAR VEIN THROMBOSIS;LEFT ARM EDEMA\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2179.0\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D INNER JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.DOB_YEAR <= 2179 AND DI.LONG_TITLE = 'LEFT INTERNAL JUGULAR VEIN THROMBOSIS",
            "gold_answer": "[['1.0']]",
            "pred_answer": "unrecognized token: \"'LEFT INTERNAL JUGULAR VEIN THROMBOSIS\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to an unclosed string literal in the WHERE clause (`DI.LONG_TITLE = 'LEFT INTERNAL JUGULAR VEIN THROMBOSIS`), leading to a syntax error. This mistake is confined to a specific part of the query and doesn\u2019t require changing the overall structure or logic.",
            "error_type": "Local: syntax error"
        },
        "0039685df6e3097e7f1e2742a86d106a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients below 51 years of age are diagnosed with preterm nec 2500+g? (Values: Preterm NEC 2500+g)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"51\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Preterm NEC 2500+g\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.AGE < 51 AND diag.LONG_TITLE = 'Preterm NEC 2500+g'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only differs in a specific part of the WHERE clause: it filters on diag.LONG_TITLE instead of diag.SHORT_TITLE. It also uses an additional SUBJECT_ID join condition (which is equivalent given the schema) and omits quotes around the numeric literal, but these do not change the overall query structure or logical flow. The main mistake is selecting the wrong column for the diagnosis filter, a localized column reference error.",
            "error_type": "Local: column reference error"
        },
        "8647b4ed3acbcdf65b6a3e4a003a5b5b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the number of patients less than 89 years who are suffering from femoral artery thrombosis primaray disease? (Values: FEMORAL ARTERY THROMBOSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"FEMORAL ARTERY THROMBOSIS\" AND DEMOGRAPHIC.\"AGE\" < \"89\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.AGE < 89 AND DI.LONG_TITLE = 'FEMORAL ARTERY THROMBOSIS'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and filters on DI.LONG_TITLE, whereas the ground truth uses the DEMOGRAPHIC.DIAGNOSIS column to capture the primary disease. This change affects the overall logic of where the diagnosis is sourced and alters the intended meaning of querying primary diagnoses. It is not a simple typo or localized fix but a structural change in table relationships and data source.",
            "error_type": "Global: incorrect join logic"
        },
        "885fc05dd9e2816d96606e43d38a8e31": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the primary disease of the patient name jerry deberry? (Values: Jerry Deberry)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT T2.LONG_TITLE FROM DEMOGRAPHIC AS T1 INNER JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID AND T1.HADM_ID = T2.HADM_ID WHERE T1.NAME = 'Jerry Deberry' LIMIT 1",
            "gold_answer": "[['S/P FALL']]",
            "pred_answer": "[['Abdominal pain, other specified site']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES and selects DIAGNOSES.LONG_TITLE instead of directly retrieving the DEMOGRAPHIC.DIAGNOSIS column. This is not a simple syntax or alias mistake but a fundamental change in data source and query logic, mistakenly using the diagnoses table rather than the primary disease field in DEMOGRAPHIC. Fixing this would require restructuring the query to remove the join and select the correct column.",
            "error_type": "Global: join error"
        },
        "9e3324b9e4876efdcca381640e060400": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients are diagnosed with primary disease sepsis and admitted before the year 2187? (Values: SEPSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"SEPSIS\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2187\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DIAGNOSES d JOIN DEMOGRAPHIC dem ON d.SUBJECT_ID = dem.SUBJECT_ID AND d.HADM_ID = dem.HADM_ID WHERE d.LONG_TITLE LIKE '%sepsis%' AND dem.ADMITYEAR < 2187",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['8.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining the DIAGNOSES table and using a LIKE on LONG_TITLE rather than checking the primary disease column DEMOGRAPHIC.DIAGNOSIS. This changes the scope from primary diagnosis to any recorded diagnosis containing 'sepsis' and requires restructuring the query beyond a simple fix.",
            "error_type": "Global: flawed join strategy"
        },
        "94e979800c12fea24b8722c512dd7405": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients are diagnosed with primary disease bladder cancer/sda? (Values: BLADDER CANCER/SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BLADDER CANCER/SDA\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE = 'BLADDER CANCER/SDA'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query references the DIAGNOSES table and its LONG_TITLE column, but the question asks for the primary disease stored in the DEMOGRAPHIC table\u2019s DIAGNOSIS column. This is a mistake in table/column selection and does not affect the overall query structure or flow.",
            "error_type": "Local: table/column reference error"
        },
        "9fc4764cc38d024d1682c51ce690b975": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "calculate the maximum age of unmarried male patients. (Values: SINGLE, M)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"GENDER\" = \"M\"",
            "pred_sql": "MAX(AGE) FROM DEMOGRAPHIC WHERE MARITAL_STATUS = 'SINGLE' AND GENDER = 'M'",
            "gold_answer": "[['72.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword, making it syntactically invalid. This is a localized syntax issue in the SELECT clause and does not affect the overall query logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "0b5fc111d80c2e60438458ddb5979cf1": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "calculate the number of patients with personality disorder nos who had a hospital stay for more than 5 days. (Values: Personality disorder NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"5\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Personality disorder NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE DI.LONG_TITLE = 'Personality disorder NOS' AND D.DAYS_STAY > 5",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the LONG_TITLE column instead of the SHORT_TITLE column to filter for 'Personality disorder NOS'. This is a localized column reference error and does not require restructuring the overall query logic or joins.",
            "error_type": "Local: column reference error"
        },
        "aa3651ed4202392f2f09a7769737f61b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "provide the number of patients less than 83 years of age who were diagnosed with pneumococcal pneumonia. (Values: Pneumococcal pneumonia)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"83\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Pneumococcal pneumonia\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.AGE < 83 AND DI.LONG_TITLE = 'Pneumococcal pneumonia'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the correct tables and applies the age filter correctly, matching the overall structure of the ground truth. The only difference is that it filters on DI.LONG_TITLE instead of DI.SHORT_TITLE, causing it to miss the intended records. This is a localized mistake in the WHERE clause and does not affect the overall query logic or table relationships.",
            "error_type": "Local: column reference error"
        },
        "0d831e485ec42f39a9ea116cfdbc75d8": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "specify primary disease and icd9 code of patient id 18480",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",PROCEDURES.\"ICD9_CODE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"18480\"",
            "pred_sql": "SELECT SHORT_TITLE, ICD9_CODE FROM DIAGNOSES WHERE SUBJECT_ID = 18480 GROUP BY ICD9_CODE",
            "gold_answer": "[['VENTRICULAR TACHYCARDIA', '8949.0']]",
            "pred_answer": "[['Asthma NOS', '49390.0'], ['Await organ transplnt st', 'V4983'], ['CHF NOS', '4280.0'], ['Long-term use anticoagul', 'V5861'], ['Parox ventric tachycard', '4271.0'], ['Polycythemia vera', '2384.0'], ['Prim cardiomyopathy NEC', '4254.0'], ['Sarcoidosis', '135.0'], ['Status autm crd dfbrltr', 'V4502']]",
            "is_correct": false,
            "reasoning": "The predicted query selects from the DIAGNOSES table and groups by ICD9_CODE, whereas the correct query should retrieve the primary disease from the DEMOGRAPHIC table and the ICD9_CODE from the PROCEDURES table via a join on HADM_ID. This mistake affects the overall logic and table relationships, producing completely different results and requiring restructuring the query. Therefore, it is a Global error.",
            "error_type": "Global: wrong table/join usage"
        },
        "a9e960146f15dc78dfdf7778cf79c3ba": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the primary disease of the patient id 3343?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"3343\"",
            "pred_sql": "SELECT SHORT_TITLE, LONG_TITLE FROM DIAGNOSES WHERE SUBJECT_ID = 3343 ORDER BY HADM_ID LIMIT 1",
            "gold_answer": "[['NEWBORN']]",
            "pred_answer": "[['Need prphyl vc vrl hepat', 'Need for prophylactic vaccination and inoculation against viral hepatitis']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls from the DIAGNOSES table (SHORT_TITLE, LONG_TITLE) instead of selecting the DIAGNOSIS column from the DEMOGRAPHIC table, which is where the primary disease field resides. This mistake fundamentally alters the query\u2019s intent by using the wrong table, requiring restructuring of the query rather than a simple fix in one clause.",
            "error_type": "Global: wrong table reference"
        },
        "9e33f7987cb3bdf5b47e8b870d28d06e": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients were diagnosed with urinary incontinence nos? (Values: Urinary incontinence NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Urinary incontinence NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE = 'Urinary incontinence NOS'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL follows the correct overall logic\u2014counting distinct patients with the specified diagnosis\u2014but it filters on the wrong column (LONG_TITLE instead of SHORT_TITLE). This is a localized mistake in the WHERE clause and does not require restructuring joins or changing the query\u2019s overall flow.",
            "error_type": "Local: column reference error"
        },
        "9b805afb13a94b6007d685fa0474e38f": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the diagnosis of the diagnosis icd9 code 55221?",
            "gold_sql": "SELECT DIAGNOSES.\"LONG_TITLE\" FROM DIAGNOSES WHERE DIAGNOSES.\"ICD9_CODE\" = \"55221\"",
            "pred_sql": "SELECT SHORT_TITLE, LONG_TITLE FROM DIAGNOSES WHERE ICD9_CODE = '55221'",
            "gold_answer": "[['Incisional ventral hernia with obstruction']]",
            "pred_answer": "[['Obstr incisional hernia', 'Incisional ventral hernia with obstruction']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the SELECT clause: it returns both SHORT_TITLE and LONG_TITLE instead of only LONG_TITLE as required by the question. This is a localized error in the SELECT list and does not affect joins, filters, or overall query structure.",
            "error_type": "Local: select-clause error"
        },
        "1017466aa786d9466aa74af6bd0700ec": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "provide me the admission type and primary disease of patient travis hofman. (Values: Travis Hofman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Travis Hofman\"",
            "pred_sql": "SELECT D.ADMISSION_TYPE, DIAG.SHORT_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DIAG ON D.SUBJECT_ID = DIAG.SUBJECT_ID AND D.HADM_ID = DIAG.HADM_ID WHERE D.NAME = 'Travis Hofman'",
            "gold_answer": "[['EMERGENCY', 'GASTROINTESTINAL BLEED']]",
            "pred_answer": "[['EMERGENCY', 'AMI inferior wall, init'], ['EMERGENCY', 'Atrial fibrillation'], ['EMERGENCY', 'CHF NOS'], ['EMERGENCY', 'Crnry athrscl natve vssl'], ['EMERGENCY', 'Dvrtclo colon w/o hmrhg'], ['EMERGENCY', 'Gastrointest hemorr NOS'], ['EMERGENCY', 'Malig neo ascend colon'], ['EMERGENCY', 'Pneumonia, organism NOS'], ['EMERGENCY', 'Surg compl-heart']]",
            "is_correct": false,
            "reasoning": "The predicted query joins the DIAGNOSES table and selects DIAGNOSES.SHORT_TITLE, returning all diagnosis records for the patient, instead of using the DEMOGRAPHIC.DIAGNOSIS column (the primary disease). The incorrect table/column reference is a localized mistake in the SELECT and JOIN; it does not require restructuring the overall query logic beyond fixing the referenced table/column.",
            "error_type": "Local: incorrect table/column reference"
        },
        "8769210e7e36fb15bd7f3441ac74704f": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "tell me the admission type and primary disease of subject id 2560.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT D.ADMISSION_TYPE, G.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS G ON D.SUBJECT_ID = G.SUBJECT_ID AND D.HADM_ID = G.HADM_ID WHERE D.SUBJECT_ID = 2560 LIMIT 1",
            "gold_answer": "[['NEWBORN', 'NEWBORN']]",
            "pred_answer": "[['NEWBORN', 'Congenital pigmentary anomalies of skin']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DIAGNOSES table and selects LONG_TITLE instead of using the DIAGNOSIS column from the DEMOGRAPHIC table, fundamentally changing the query\u2019s logic. This is not a minor syntax issue but a flawed join strategy that retrieves the wrong field and requires restructuring the query to remove the join and use the correct table/column.",
            "error_type": "Global: incorrect join"
        },
        "a41c0a71e44265cf13545eae050408e0": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "specify the primary disease of james cepeda and where he was admitted (Values: James Cepeda)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_LOCATION\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"James Cepeda\"",
            "pred_sql": "SELECT D2.LONG_TITLE AS Primary_Disease, D1.ADMISSION_LOCATION FROM DEMOGRAPHIC AS D1 JOIN DIAGNOSES AS D2 ON D1.SUBJECT_ID = D2.SUBJECT_ID AND D1.HADM_ID = D2.HADM_ID WHERE D1.NAME = 'James Cepeda' LIMIT 1",
            "gold_answer": "[['PHYS REFERRAL/NORMAL DELI', 'CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT WITH MVR; ? MAZE']]",
            "pred_answer": "[['Atrial fibrillation', 'PHYS REFERRAL/NORMAL DELI']]",
            "is_correct": false,
            "reasoning": "The predicted SQL still retrieves James Cepeda\u2019s admission location and links to a diagnosis table, but it selects DIAGNOSES.LONG_TITLE instead of the DEMOGRAPHIC.DIAGNOSIS column that stores the primary disease. The join condition itself is valid, and the overall intent (getting admission location and a disease field) remains unchanged. Only the column reference for the disease is incorrect, which can be fixed locally without altering the query\u2019s structure or join logic.",
            "error_type": "Local: column reference error"
        },
        "07dd887d78325747ddf8d7f6a4cc952b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "For how long the patients admitted to clinic referral (premature) stayed in the hospital? (Values: CLINIC REFERRAL/PREMATURE)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"CLINIC REFERRAL/PREMATURE\"",
            "pred_sql": "SELECT DAYS_STAY FROM DEMOGRAPHIC WHERE ADMISSION_LOCATION = 'CLINIC REFERRAL/PREMATURE'",
            "gold_answer": "[['30.0']]",
            "pred_answer": "[['0.0'], ['1.0'], ['11.0'], ['11.0'], ['15.0'], ['16.0'], ['2.0'], ['2.0'], ['2.0'], ['20.0'], ['23.0'], ['30.0'], ['4.0'], ['4.0'], ['4.0'], ['4.0'], ['5.0'], ['6.0'], ['7.0'], ['7.0'], ['8.0'], ['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted query selects all DAYS_STAY values instead of computing the maximum as required by the question. The mistake lies solely in the SELECT clause (omitting the MAX aggregation) without altering joins, tables, or overall structure.",
            "error_type": "Local: missing-aggregation"
        },
        "0feb6c94275dae4ee5dc08799c8e771b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many male patients had guillain barre syndrome as their primary disease? (Values: M, GUILLAIN BARRE SYNDROME)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"GUILLAIN BARRE SYNDROME\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.GENDER = 'M' AND diag.LONG_TITLE = 'GUILLAIN BARRE SYNDROME'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the source of the disease information by joining to the DIAGNOSES table and filtering on diag.LONG_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column which represents the primary disease. This alters the overall logic of the query\u2014shifting from primary diagnosis in DEMOGRAPHIC to any occurrence in DIAGNOSES\u2014and thus cannot be fixed by a small local tweak.",
            "error_type": "Global: incorrect table join altering query logic"
        },
        "a601dcef90267006aea6385d22fd36d3": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "give the number of patients whose procedure short title is cardiac rhythm conv nec. (Values: Cardiac rhythm conv NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"Cardiac rhythm conv NEC\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PROCEDURES WHERE SHORT_TITLE = 'Cardiac rhythm conv nec'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the WHERE clause literal: it uses 'Cardiac rhythm conv nec' (lowercase 'nec') instead of 'Cardiac rhythm conv NEC'. In SQLite, string comparisons are case-sensitive by default (BINARY collation), so the predicted query fails to match the intended rows. This is a localized issue in the WHERE clause value, not affecting the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "88918368bee11a289ddd9a04f1958b6b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "get me the number of elective hospital admission patients who had coronary artery primary disease. (Values: ELECTIVE, CORONARY ARTERY DISEASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"CORONARY ARTERY DISEASE\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE ADMISSION_TYPE = 'ELECTIVE' AND DIAGNOSIS LIKE '%CORONARY ARTERY DISEASE%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and uses COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID), fundamentally altering the query\u2019s logic. The original question only requires filtering on the DEMOGRAPHIC table\u2019s DIAGNOSIS column and counting distinct patients. By joining DIAGNOSES, the predicted query creates extra row duplication and changes the source of the diagnosis filter, leading to an incorrect result. Fixing this requires removing the join and restoring the distinct count, which is more than a localized tweak.",
            "error_type": "Global: incorrect join strategy"
        },
        "9de792fe2a88120b9c4b104e3ef20a27": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "mention the disease that jerry deberry is primarily suffering from and his date of birth as well. (Values: Jerry Deberry)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DOB\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT D.DOB, DI.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.NAME = 'Jerry Deberry' ORDER BY DI.HADM_ID ASC LIMIT 1",
            "gold_answer": "[['2043-02-28 00:00:00', 'S/P FALL']]",
            "pred_answer": "[['2043-02-28 00:00:00', 'Abdominal pain, other specified site']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and selects LONG_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column as intended. This changes the overall logic of the query\u2014bringing in unrelated diagnosis records and fundamentally altering the result. Fixing this requires removing the JOIN and reverting to the DEMOGRAPHIC table\u2019s DIAGNOSIS column, which is a structural change beyond a simple typo or local clause correction.",
            "error_type": "Global: join error"
        },
        "a772fe669af1ccf30732303ee43d3e8e": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "What is the number of patients with abdominal pain as their primary disease who were discharged to snf? (Values: SNF, ABDOMINAL PAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"SNF\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"ABDOMINAL PAIN\"",
            "pred_sql": "SELECT COUNT(DISTINCT D1.SUBJECT_ID) FROM DIAGNOSES AS D1 INNER JOIN DEMOGRAPHIC AS D2 ON D1.HADM_ID = D2.HADM_ID WHERE D1.LONG_TITLE LIKE '%abdominal pain%' AND D2.DISCHARGE_LOCATION = 'SNF'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by pulling data from the DIAGNOSES table and using a LIKE filter on LONG_TITLE instead of using the primary DIAGNOSIS field in the DEMOGRAPHIC table. It also introduces a join that was not in the original intent, thereby altering the meaning of the question (primary disease vs any recorded diagnosis). This error spans multiple clauses and changes the overall structure and intended relationships of the query.",
            "error_type": "Global: incorrect table and join logic"
        },
        "034ba91dc253e7717c3102b2a97d2f49": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what number of patients with abdominal abscess had drug type as main? (Values: ABDOMINAL ABSCESS, MAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"ABDOMINAL ABSCESS\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\"",
            "pred_sql": "SELECT COUNT(DISTINCT P.SUBJECT_ID) FROM DIAGNOSES AS D JOIN PRESCRIPTIONS AS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.LONG_TITLE = 'Abdominal abscess' AND P.DRUG_TYPE = 'MAIN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and filters on D.LONG_TITLE rather than using the DEMOGRAPHIC table\u2019s DIAGNOSIS field. This changes the table relationship and the source of the diagnosis filter, leading to a fundamentally different result (0 vs 1). This is not a simple typo or localized mistake but a structural error in how tables are joined and which table provides the diagnosis data. Therefore, it is a global error.",
            "error_type": "Global: incorrect join"
        },
        "a75e1aa8b393a93c5a51986d68dc15a6": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Give the number of patients whose primary disease is cerebral aneurysm/sda and were admitted before the year 2200. (Values: CEREBRAL ANEURYSM/SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CEREBRAL ANEURYSM/SDA\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2200\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DIAGNOSES d JOIN DEMOGRAPHIC dem ON d.SUBJECT_ID = dem.SUBJECT_ID AND d.HADM_ID = dem.HADM_ID WHERE d.LONG_TITLE = 'CEREBRAL ANEURYSM/SDA' AND dem.ADMITYEAR < 2200",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The user\u2019s intent is to count patients whose primary disease (stored in DEMOGRAPHIC.DIAGNOSIS) equals \u201cCEREBRAL ANEURYSM/SDA\u201d and were admitted before 2200. The ground truth query reads directly from the DEMOGRAPHIC table. The predicted query instead joins to the DIAGNOSES table on SUBJECT_ID and HADM_ID and filters on DIAGNOSES.LONG_TITLE. This not only uses the wrong source for \u201cprimary disease\u201d but also changes the join logic and result set fundamentally. It is not a simple typo or missing alias; it alters the overall query structure and relationships between tables. Thus this is a global logical/structural error (using an incorrect table and join strategy) rather than a local fixable mistake.",
            "error_type": "Global: incorrect join"
        },
        "95729f0dfd914c9590bd8c1e29b1c9b9": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "What is the number of patients less than 85 years who have liver transplant as their primary disease? (Values: LIVER TRANSPLANT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"LIVER TRANSPLANT\" AND DEMOGRAPHIC.\"AGE\" < \"85\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.AGE < 85 AND DI.LONG_TITLE LIKE '%LIVER TRANSPLANT%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining to the DIAGNOSES table and filtering on DIAGNOSES.LONG_TITLE, rather than using the primary disease stored in DEMOGRAPHIC.DIAGNOSIS. This incorrect join and table choice alters the meaning of the query and cannot be fixed by a small tweak in a single clause\u2014it requires restructuring the query to use the DEMOGRAPHIC table\u2019s DIAGNOSIS column directly.",
            "error_type": "Global: incorrect join/table selection"
        },
        "9c185af46be35cab5c7f0bc760c202b5": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients have been diagnosed with unspecified pseudomonas infection? (Values: Pseudomonas infect NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Pseudomonas infect NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE = 'Pseudomonas infect NOS'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on DIAGNOSES.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE. The ground truth uses SHORT_TITLE = 'Pseudomonas infect NOS' to match the exact wording; using LONG_TITLE yields no rows. Everything else (counting distinct SUBJECT_ID and table usage) is structurally fine. This is a localized mistake in the column reference within the WHERE clause.",
            "error_type": "Local: incorrect column reference"
        },
        "04f6025046849b72bcf92ac808479159": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients were discharged to home after skin closure nec? (Values: HOME, Skin closure NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"HOME\" AND PROCEDURES.\"SHORT_TITLE\" = \"Skin closure NEC\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) FROM DEMOGRAPHIC JOIN PROCEDURES ON DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID AND DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID WHERE DEMOGRAPHIC.DISCHARGE_LOCATION = 'HOME' AND PROCEDURES.LONG_TITLE = 'Skin closure NEC'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses PROCEDURES.LONG_TITLE instead of PROCEDURES.SHORT_TITLE to filter on 'Skin closure NEC', which is a wrong column choice. This misreference causes the mismatch in results and can be fixed by changing LONG_TITLE to SHORT_TITLE without altering the overall query structure or join logic. Therefore, it is a localized column reference error.",
            "error_type": "Local: column reference error"
        },
        "0c99cbdfd068bcf1ca99f14d7e69b1fc": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "What is average age of patients that survived and had a primary disease of COPD exacerbation? (Values: 0, COPD EXACERBATION)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"COPD EXACERBATION\"",
            "pred_sql": "SELECT AVG(AGE) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.EXPIRE_FLAG = 0 AND DI.LONG_TITLE = 'COPD Exacerbation'",
            "gold_answer": "[['50.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic by introducing a JOIN to the DIAGNOSES table and filtering on DIAGNOSES.LONG_TITLE instead of using the DEMOGRAPHIC.DIAGNOSIS column as the primary disease. This isn\u2019t a minor syntax or reference mistake but a structural/semantic mistake that alters the intended meaning of the query (joining an extra table and filtering on the wrong column), resulting in no matching rows. This requires a significant logical restructuring to correct, so it is a Global error.",
            "error_type": "Global"
        }
    }
}