{
    "codes": {
        "8c6bcade1f9fdf7786a7a64151da5b73": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients uses the drug code hepa10syr? (Values: HEPA10SYR)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"HEPA10SYR\"",
            "pred_sql": "SELECT count(hadm_id) FROM prescriptions WHERE formulary_drug_cd = 'HEPA10SYR'",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply counts rows of PRESCRIPTIONS (using HADM_ID) rather than counting distinct patients (SUBJECT_ID) as required. Fixing this is not a minor syntax or reference tweak; it requires changing the aggregation and adding (or using) the correct patient identifier, and potentially joining DEMOGRAPHIC. This impacts the core logic and intended meaning of the query.",
            "error_type": "Global: incorrect aggregation and missing DISTINCT on SUBJECT_ID"
        },
        "04f70250fa8b8c71fcf64afe5f04935c": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find .the number of patients who are female and were diagnosed with transient ischemic attack? (Values: F, TRANSIENT ISCHEMIC ATTACK)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"TRANSIENT ISCHEMIC ATTACK\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.gender = 'F' AND diagnoses.short_title = 'TRANSIENT ISCHEMIC ATTACK'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the DIAGNOSES table and filters on diagnoses.short_title, whereas the correct diagnosis value resides in DEMOGRAPHIC.DIAGNOSIS. This join changes the query\u2019s logic and results, as it looks for the diagnosis in the wrong table rather than simply filtering DEMOGRAPHIC.DIAGNOSIS. Fixing this would require removing the join and correcting the column reference, which is a structural change affecting overall query logic rather than a minor clause tweak.",
            "error_type": "Global: incorrect join"
        },
        "acc06fbcc04ac4f4e727cebaf4062a9f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Tell me the type of admission and status of death of the patient with patient id 10317.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"EXPIRE_FLAG\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"10317\"",
            "pred_sql": "SELECT admission_type, discharge_location FROM demographic WHERE subject_id = 10317",
            "gold_answer": "[['EMERGENCY', '0.0']]",
            "pred_answer": "[['EMERGENCY', 'HOME']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the wrong second column (discharge_location) instead of the intended expire_flag (status of death). This is a column reference error localized within the SELECT clause and does not require restructuring the query or altering joins.",
            "error_type": "Local: column reference error"
        },
        "82fe69e357204798f6fa15a9b66761ff": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients younger than 70 years have got the cerebrospinal fluid (csf) lab test done? (Values: Cerebrospinal Fluid (CSF))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"70\" AND LAB.\"FLUID\" = \"Cerebrospinal Fluid (CSF)\"",
            "pred_sql": "SELECT count(DISTINCT lab.subject_id) FROM lab INNER JOIN demographic ON lab.subject_id = demographic.subject_id WHERE demographic.age < 70 AND lab.label = 'Cerebrospinal Fluid (CSF)'",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on lab.label instead of lab.fluid, so it never matches \u2018Cerebrospinal Fluid (CSF)\u2019 and returns zero. This is a localized mistake in the WHERE clause (wrong column reference), and does not require changing the overall query structure or logic. Hence it is a local error.",
            "error_type": "Local: column reference error"
        },
        "8620eaa43239b46cc9bde99b6286773a": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "For how many days Stephanie Sechan stayed in the hospital? (Values: Stephanie Suchan)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Stephanie Suchan\"",
            "pred_sql": "SELECT days_stay FROM demographic WHERE name = 'Stephanie Sechan'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has the correct structure and references the correct table and column, but the literal in the WHERE clause is misspelled (\u2018Stephanie Sechan\u2019 instead of the required \u2018Stephanie Suchan\u2019). This is a localized error in the value rather than a change in the overall query logic or structure.",
            "error_type": "Local: value parsing error"
        },
        "80d0e73a185289625d60921e65b0f2d8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Among the patients admitted before the year 2172, how many had contact dermatitis and other eczema with an unspecified cause? (Values: Contact dermatitis and other eczema, unspecified cause)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2172\" AND DIAGNOSES.\"LONG_TITLE\" = \"Contact dermatitis and other eczema, unspecified cause\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.admittime < '2172-01-01' AND diagnoses.short_title LIKE 'Contact dermatitis and other eczema, unspecified cause'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall structure remains the same\u2014it performs a JOIN, applies filters, and counts patients. However, it uses the wrong join key (demographic.subject_id instead of hadm_id), the wrong date column (admittime instead of admityear), and the wrong diagnosis column (short_title with LIKE instead of long_title with equality). These are localized reference errors in the JOIN and WHERE clauses that can be fixed without restructuring the entire query logic.",
            "error_type": "Local: join/column reference error"
        },
        "8a8314389b88631016bc7ce9cefe7351": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the average age of male patients with medicaid insurance? (Values: M, Medicaid)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"INSURANCE\" = \"Medicaid\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.insurance = 'Medicaid' AND demographic.gender = 'M'",
            "gold_answer": "[['20.833']]",
            "pred_answer": "[['47.884']]",
            "is_correct": false,
            "reasoning": "The predicted query introduces an INNER JOIN with the PRESCRIPTIONS table on SUBJECT_ID, even though the question only asks for demographics. This join filters out male Medicaid patients who have no prescriptions and duplicates rows for those with multiple prescriptions, skewing the average age. Fixing this requires removing the join and changing the overall query structure, which affects the query\u2019s logic across clauses. Hence, this is a global error.",
            "error_type": "Global: join error"
        },
        "aa3c4803c0aca1c991037d46ab8891a2": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "for patient sheila riley, specify the duration of hospital stay (Values: Sheila Riley)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Sheila Riley\"",
            "pred_sql": "SELECT sum(demographic.days_stay) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.name = 'Sheila Riley'",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['56.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the DIAGNOSES table (and sums DAYS_STAY), which duplicates the DAYS_STAY row for each diagnosis record and then sums them, rather than just selecting the single DAYS_STAY value. This join error changes the overall logic and requires restructuring beyond a single clause. Therefore it is a global error.",
            "error_type": "Global: join error"
        },
        "b9b1e272af0cd9a909d5b78a51ebf792": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients with drug code beth5 died in or before the year 2122? (Values: 2122.0, BETH5)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOD_YEAR\" <= \"2122.0\" AND PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"BETH5\"",
            "pred_sql": "SELECT count(DISTINCT demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'BETH5' AND strftime('%Y', demographic.dod) < '2122'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL makes several mistakes confined to specific clauses rather than altering the overall query structure. 1) It uses prescriptions.drug instead of PRESCRIPTIONS.FORMULARY_DRUG_CD to filter on the drug code BETH5 (column reference error). 2) It applies strftime('%Y', demographic.dod) < '2122' instead of checking DOD_YEAR <= 2122.0 and also omits the equality, excluding deaths in 2122 (where\u2010clause error). 3) It joins on demographic.subject_id = prescriptions.subject_id instead of hadm_id, but this is still a valid join and doesn\u2019t change the overall structure. All errors are localized to SELECT/WHERE clauses and can be fixed without restructuring the query.",
            "error_type": "Local: column reference error"
        }
    },
    "dinsql": {
        "8434959e3330dc710ffe491da1f8652a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "familiarize me with the death status and primary disease of the patient joseph dillman. (Values: Joseph Dillman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"EXPIRE_FLAG\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Joseph Dillman\"",
            "pred_sql": "SELECT D.EXPIRE_FLAG, DI.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.NAME = 'Joseph Dillman'",
            "gold_answer": "[['1.0', 'PNEUMONIA']]",
            "pred_answer": "[['1.0', 'Acute kidney failure, unspecified'], ['1.0', 'Acute respiratory failure'], ['1.0', 'Acute venous embolism and thrombosis of deep vessels of proximal lower extremity'], ['1.0', 'Aftercare for healing traumatic fracture of lower leg'], ['1.0', 'Anxiety state, unspecified'], ['1.0', 'Asthma, unspecified type, unspecified'], ['1.0', 'Constipation, unspecified'], ['1.0', 'Depressive disorder, not elsewhere classified'], ['1.0', 'Diaphragmatic hernia without mention of obstruction or gangrene'], ['1.0', 'Enthesopathy of hip region'], ['1.0', 'Neurogenic bladder NOS'], ['1.0', 'Other chronic pain'], ['1.0', 'Other pulmonary insufficiency, not elsewhere classified'], ['1.0', 'Other specified disorders of esophagus'], ['1.0', 'Paraplegia'], ['1.0', 'Personal history, urinary (tract) infection'], ['1.0', 'Pneumonitis due to inhalation of food or vomitus'], ['1.0', 'Polyp of nasal cavity'], ['1.0', 'Postinflammatory pulmonary fibrosis'], ['1.0', 'Pressure ulcer, buttock'], ['1.0', 'Pressure ulcer, hip'], ['1.0', 'Pressure ulcer, lower back'], ['1.0', 'Pressure ulcer, stage I'], ['1.0', 'Pressure ulcer, stage IV'], ['1.0', 'Pressure ulcer, unstageable'], ['1.0', 'Unspecified osteomyelitis, other specified sites'], ['1.0', 'Unspecified protein-calorie malnutrition']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and selects DI.LONG_TITLE, thereby returning all diagnosis records for Joseph Dillman rather than the single primary disease stored in the DEMOGRAPHIC.DIAGNOSIS column. This changes the overall query logic and structure, producing multiple rows and incorrect results instead of the intended one-row result. The error is not a minor typo or syntax issue; it fundamentally alters the query\u2019s meaning by joining the wrong table and selecting the wrong field.",
            "error_type": "Global: incorrect join logic"
        },
        "a6cecf07aa5066f7bc97d4afef5e4767": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the minimum days for which patients with brain mass; intracranial hemorrhage were hospitalized? (Values: BRAIN MASS;INTRACRANIAL HEMORRHAGE)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BRAIN MASS;INTRACRANIAL HEMORRHAGE\"",
            "pred_sql": "SELECT MIN(D.DAYS_STAY) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.HADM_ID = DI.HADM_ID WHERE DI.LONG_TITLE = 'BRAIN MASS",
            "gold_answer": "[['4.0']]",
            "pred_answer": "unrecognized token: \"'BRAIN MASS\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to a malformed string literal in the WHERE clause (missing closing quote and incomplete value) leading to an unrecognized token error. This is a localized syntax/value parsing mistake that does not require restructuring the overall query logic. It can be fixed by correctly quoting and completing the literal.",
            "error_type": "Local: value parsing error"
        },
        "abd26d62b8557ca1e9111eb9ffe88c4a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many male patients had the lab test name potassium, whole blood? (Values: M, Potassium, Whole Blood)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND LAB.\"LABEL\" = \"Potassium, Whole Blood\"",
            "pred_sql": "SELECT COUNT(DISTINCT L.SUBJECT_ID) FROM LAB AS L JOIN DEMOGRAPHIC AS D ON L.SUBJECT_ID = D.SUBJECT_ID AND L.HADM_ID = D.HADM_ID WHERE D.GENDER = 'M' AND L.LABEL = 'Potassium' AND L.FLUID = 'Whole Blood'",
            "gold_answer": "[['24.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly splits the lab test name into two conditions (LABEL = 'Potassium' AND FLUID = 'Whole Blood') instead of matching the single LABEL value 'Potassium, Whole Blood'. This is a mistake confined to the WHERE clause value parsing and does not change the overall query structure or join logic.",
            "error_type": "Local: value parsing error"
        },
        "977ecca508723a7bb321e261d6c47f72": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the diagnosis of Tracy Farmer?",
            "gold_sql": "SELECT DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Tracy Farmer\"",
            "pred_sql": "SELECT DISTINCT d.LONG_TITLE FROM DEMOGRAPHIC dem JOIN DIAGNOSES d ON dem.SUBJECT_ID = d.SUBJECT_ID AND dem.HADM_ID = d.HADM_ID WHERE dem.NAME = 'Tracy Farmer'",
            "gold_answer": "[['Infant diabet mother syn'], ['Need prphyl vc vrl hepat'], ['Single lb in-hosp w cs'], ['Ventricular sept defect']]",
            "pred_answer": "[['Need for prophylactic vaccination and inoculation against viral hepatitis'], ['Single liveborn, born in hospital, delivered by cesarean section'], ['Syndrome of \"infant of a diabetic mother\"'], ['Ventricular septal defect']]",
            "is_correct": false,
            "reasoning": "The predicted query differs only in the SELECT clause: it retrieves LONG_TITLE instead of SHORT_TITLE. The JOIN and WHERE conditions correctly mirror the ground truth logic. Since the error is limited to the chosen column in SELECT and does not affect the overall query structure or table relationships, it is a local error.",
            "error_type": "Local: incorrect column reference"
        }
    },
    "deepseek": {
        "bba6972bfb39756cdb923e4518ec3895": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients whose discharge location is short term hospital and who were admitted to the hospital before year 2182. (Values: SHORT TERM HOSPITAL)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"SHORT TERM HOSPITAL\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2182\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE DISCHARGE_LOCATION = 'SHORT TERM HOSPITAL' AND EXTRACT(YEAR FROM ADMITTIME) < 2182;",
            "gold_answer": "[['2.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the EXTRACT(YEAR FROM ADMITTIME) function, which is not supported by SQLite and causes a syntax error in the WHERE clause. This is a localized issue within the WHERE clause that does not affect the overall query structure or logic\u2014only the function call needs correction (e.g., using ADMITYEAR or a supported date function). Therefore, it is a local error.",
            "error_type": "Local: syntax error"
        },
        "8df86af35155206db971854cf41e6139": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "specify the icd9 code for patient id 18480",
            "gold_sql": "SELECT PROCEDURES.\"SHORT_TITLE\" FROM PROCEDURES WHERE PROCEDURES.\"SUBJECT_ID\" = \"18480\"",
            "pred_sql": "SELECT ICD9_CODE FROM DIAGNOSES WHERE SUBJECT_ID = 18480;",
            "gold_answer": "[['AICD check']]",
            "pred_answer": "[['135.0'], ['2384.0'], ['4254.0'], ['4271.0'], ['4280.0'], ['49390.0'], ['V4502'], ['V4983'], ['V5861']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls ICD9_CODEs from the DIAGNOSES table, whereas the ground truth and the question context require retrieving data from the PROCEDURES table. This is not a simple typo or minor syntax fix; it fundamentally alters the source of the data and thus the overall logic of the query. Correcting it requires changing the table (and column) in the FROM clause, which is a structural change affecting the query\u2019s meaning.",
            "error_type": "Global: incorrect table selection"
        }
    },
    "llm2sql": {
        "8bec95e32428f06d6b8734cae16910d0": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the insurance and primary disease of james sloan. (Values: James Sloan)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"James Sloan\"",
            "pred_sql": "SELECT D.INSURANCE, DIAG.SHORT_TITLE FROM DEMOGRAPHIC D JOIN DIAGNOSES DIAG ON D.HADM_ID = DIAG.HADM_ID WHERE D.NAME = 'James Sloan';",
            "gold_answer": "[['Private', 'NEWBORN']]",
            "pred_answer": "[['Private', 'Laryngotrach anomaly NEC'], ['Private', 'Need prphyl vc vrl hepat'], ['Private', 'Resp prob after brth NEC'], ['Private', 'Routin child health exam'], ['Private', 'Routine circumcision'], ['Private', 'Single lb in-hosp w/o cs']]",
            "is_correct": false,
            "reasoning": "The ground truth query retrieves the 'DIAGNOSIS' directly from the DEMOGRAPHIC table, which holds the primary disease. The predicted query instead joins to the DIAGNOSES table and selects SHORT_TITLE, returning multiple diagnosis rows per admission. This incorrect join fundamentally alters the intended result (it changes both the source of the disease information and the result cardinality), so it is not a localized fix in a single clause but a flaw in the overall query structure and logic.",
            "error_type": "Global: incorrect join"
        },
        "804d61ea251779e8ec9d8f758e134a6d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients aged below 45 years had celo-vessicle fistula? (Values: CELO-VESSICLE FISTULA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CELO-VESSICLE FISTULA\" AND DEMOGRAPHIC.\"AGE\" < \"45\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID nWHERE DEMOGRAPHIC.AGE < 45 AND DIAGNOSES.LONG_TITLE = 'Celo-vessicle fistula';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is failing due to a syntax error in the SELECT/FROM clause: the alias \u2018n\u2019 is run together with the FROM keyword as \u2018nFROM\u2019, causing the parser to choke at \u2018DEMOGRAPHIC\u2019. This is a localized issue in the query\u2019s syntax and can be fixed by inserting the missing space or correcting the alias placement. The overall query structure and intended logic remain intact, so this is a local error.",
            "error_type": "Local: syntax error"
        }
    },
    "macsql": {
        "8eb82af011a6e0a42adfe3d93215d014": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "give me the marital status and date of birth of subject id 21796.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DEMOGRAPHIC.\"DOB\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT `MARITAL_STATUS`, `DOB` FROM `DEMOGRAPHIC` WHERE `SUBJECT_ID` = 21796 AND `MARITAL_STATUS` IS NOT NULL AND `DOB` IS NOT NULL",
            "gold_answer": "[['None', '2197-01-15 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs locally by adding extra WHERE conditions `AND MARITAL_STATUS IS NOT NULL AND DOB IS NOT NULL`. These additional filters are not required by the question and cause no rows to be returned, but they do not alter the overall query structure or table relationships. This is therefore a localized mistake in the WHERE clause.",
            "error_type": "Local: where-clause error"
        },
        "979a734ab882143afcde05bf1b13cbd9": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many of the patients with bradycardia had abnormal lab test results? (Values: BRADYCARDIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"BRADYCARDIA\" AND LAB.\"FLAG\" = \"abnormal\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `SHORT_TITLE` = 'Bradycardia' OR `LONG_TITLE` LIKE '%Bradycardia%' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM LAB WHERE `FLAG` = 'abnormal' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table (with SHORT_TITLE/LONG_TITLE filters) instead of the DEMOGRAPHIC table\u2019s DIAGNOSIS column as specified by the question and assumptions. This is a wrong table/column reference in the FROM clause and can be fixed by referencing the correct table/column; it does not require restructuring the entire query logic or joins beyond that correction.",
            "error_type": "Local: table reference error"
        },
        "86d07eae693396f6aead63e691ef2627": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "which language did patient anna johnson speak? (Values: Anna Johnson)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"LANGUAGE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Anna Johnson\"",
            "pred_sql": "SELECT LANGUAGE FROM DEMOGRAPHIC WHERE NAME = 'Anna Johnson' AND LANGUAGE IS NOT NULL",
            "gold_answer": "[['None']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds an extra WHERE condition \u201cAND LANGUAGE IS NOT NULL,\u201d which filters out the row that the ground truth returns (where LANGUAGE is NULL). This mistake is confined to the WHERE clause and does not require changing the overall query structure, joins, or logic. Hence, it is a localized error in the filtering condition.",
            "error_type": "Local: where-condition error"
        }
    }
}