{
    "codes": {
        "804ca29d119c7ae443c69c0e0f4bee74": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what number of patients under the age of 51 were given the drug metoprolol tartrate? (Values: Metoprolol Tartrate)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"51\" AND PRESCRIPTIONS.\"DRUG\" = \"Metoprolol Tartrate\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE prescriptions.drug = 'Metoprolol Tartrate' AND demographic.age < 51",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['16.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC and PRESCRIPTIONS on SUBJECT_ID instead of HADM_ID, and it omits COUNT(DISTINCT ...) so it counts duplicate patient entries across multiple admissions. This wrong join key and missing DISTINCT fundamentally alters the patient-level aggregation and the logical relationship between tables, leading to an inflated count. Fixing this requires changing the join condition and aggregation approach, not just a local tweak.",
            "error_type": "Global: incorrect join condition"
        },
        "a6a732ecc77a93f6fac9453d69cce42b": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the insurance and diagnosis icd9 code of subject id 21796?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"INSURANCE\",DIAGNOSES.\"ICD9_CODE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"21796\"",
            "pred_sql": "SELECT DISTINCT demographic.insurance, demographic.diagnosis FROM demographic INNER JOIN diagnoses ON demographic.hadm_id = diagnoses.hadm_id WHERE demographic.subject_id = 21796",
            "gold_answer": "[['Medicaid', '75261.0'], ['Medicaid', '76517.0'], ['Medicaid', 'V3101']]",
            "pred_answer": "[['Medicaid', 'NEWBORN'], ['Medicaid', 'NEWBORN'], ['Medicaid', 'NEWBORN']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects demographic.diagnosis instead of diagnoses.icd9_code. The JOIN and filter are correct, but the wrong column is chosen for the ICD9 code, leading to incorrect values. This is a localized column reference error and does not affect the overall query structure or logic.",
            "error_type": "Local: incorrect-column-reference"
        },
        "827f8123c25ca3f714d96faaa00d1b84": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many pateints aged below 77 had undergone the procedure with icd9 code 4632?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"77\" AND PROCEDURES.\"ICD9_CODE\" = \"4632\"",
            "pred_sql": "SELECT count(procedures.subject_id) FROM procedures INNER JOIN demographic ON procedures.subject_id = demographic.subject_id WHERE procedures.icd9_code = 4632 AND demographic.age < 77",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an incorrect join condition and aggregation, joining PROCEDURES to DEMOGRAPHIC on SUBJECT_ID rather than HADM_ID, and then counting all procedure records rather than distinct patients. This fundamentally alters the query logic and leads to overcounting. Fixing it requires changing the join key and aggregation approach, which affects multiple clauses and the overall structure.",
            "error_type": "Global: incorrect join key and aggregation"
        },
        "81b5be8c3f3f6d80a55a59f385e43659": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients whose drug route is tp and admission year is before 2167? (Values: TP)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2167\" AND PRESCRIPTIONS.\"ROUTE\" = \"TP\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE route = 'TP' AND strftime('%Y', admittime) < '2167'",
            "gold_answer": "[['17.0']]",
            "pred_answer": "no such column: admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'admittime' in the PRESCRIPTIONS table and fails to join with DEMOGRAPHIC to filter by admission year. The core mistake is a column reference error (using a column that doesn\u2019t exist in the referenced table), which is localized to the WHERE clause and does not require restructuring the entire query logic.",
            "error_type": "Local: table/column reference error"
        },
        "94b7c34d08a1a683e0a44fc75d03d1ed": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the discharge location and icu stay id of subject id 42820?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DISCHARGE_LOCATION\",PRESCRIPTIONS.\"ICUSTAY_ID\" FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"42820\"",
            "pred_sql": "SELECT discharge_location, icustay_id FROM demographic WHERE subject_id = 42820",
            "gold_answer": "[['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', '203692.0'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None'], ['HOME', 'None']]",
            "pred_answer": "no such column: icustay_id",
            "is_correct": false,
            "reasoning": "The predicted SQL tries to select icustay_id directly from the DEMOGRAPHIC table without joining PRESCRIPTIONS, but icustay_id is not a column in DEMOGRAPHIC. This is a column reference error in the SELECT clause that can be fixed by adding the correct JOIN; it does not require restructuring the entire query logic.",
            "error_type": "Local: table/column reference error"
        },
        "0e6c9aa4ec70962176e94cd1b787f404": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "get me the number of private insurance patients who had phys referral/normal deli admission. (Values: Private, PHYS REFERRAL/NORMAL DELI)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Private\" AND DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"PHYS REFERRAL/NORMAL DELI\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN diagnoses ON demographic.subject_id = diagnoses.subject_id WHERE demographic.insurance = 'Private' AND diagnoses.short_title = 'PHYSIO/ENT REFERRAL' AND diagnoses.icd9_code = 3140 AND demographic.admission_type = 'PHYS REFERRAL/NORMAL DELI'",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the DIAGNOSES table and applies filters on diagnoses.short_title and diagnoses.icd9_code, none of which are relevant to counting patients by insurance and admission location. It also filters demographic.admission_type instead of demographic.admission_location. These mistakes span the FROM and WHERE clauses, fundamentally altering the query\u2019s logic and requiring restructuring (removing the join, changing columns, and dropping irrelevant conditions).",
            "error_type": "Global: incorrect join and filter logic"
        },
        "93e5a26d56fc80b963506bc71b380d70": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "give me the number of government health insurance patients who had elective hospital admission. (Values: ELECTIVE, Government)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"INSURANCE\" = \"Government\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE demographic.admission_type = 'ELECTIVE'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['73.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN on the PROCEDURES table and omits the required INSURANCE = 'Government' filter. This join alters the intended data relationships (restricting to patients with procedures) and fundamentally changes the query logic. Fixing this would require removing the join and adding the missing WHERE condition, not just tweaking a single clause. Therefore, it\u2019s a global logical/structural error.",
            "error_type": "Global: incorrect join logic"
        },
        "8314ff25189b1cf1389172288628eccb": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "count the number of emergency hospital admission patients who are taking main drug type prescription. (Values: EMERGENCY, MAIN)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.hadm_id = demographic.hadm_id WHERE demographic.admission_type = 'EMERGENCY' AND prescriptions.drug_type = 'MAIN'",
            "gold_answer": "[['53.0']]",
            "pred_answer": "[['4092.0']]",
            "is_correct": false,
            "reasoning": "Both queries join DEMOGRAPHIC and PRESCRIPTIONS correctly and apply the same WHERE filters on ADMISSION_TYPE and DRUG_TYPE. The only difference is in the SELECT clause: the ground truth uses COUNT(DISTINCT demographic.subject_id) to count unique patients, whereas the predicted query uses COUNT(prescriptions.subject_id), counting every prescription record (including duplicates for the same patient). This is a localized mistake in the aggregation function and does not alter the overall structure or table relationships.",
            "error_type": "Local: missing DISTINCT"
        },
        "0db846be36da67fce1f489804cad086f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "get the number of jehovah's witness religious background patients who are less than 71 years of age. (Values: JEHOVAH'S WITNESS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"RELIGION\" = \"JEHOVAH'S WITNESS\" AND DEMOGRAPHIC.\"AGE\" < \"71\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE religion = 'JEHOVAH' AND age < 71",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses religion = 'JEHOVAH' instead of the exact value 'JEHOVAH'S WITNESS'. This is a mistake in the literal value used in the WHERE clause and does not involve changing joins or overall query structure. Hence it is a localized value parsing error.",
            "error_type": "Local: value parsing error"
        }
    },
    "macsql": {
        "9374426f7a7a5514195f42d67b10ae3a": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "which patients have lab test item id 51200?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51200\"",
            "pred_sql": "SELECT DISTINCT T1.`SUBJECT_ID`, T2.`NAME`, T2.`AGE`, T2.`GENDER` FROM LAB AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`ITEMID` = 51200",
            "gold_answer": "[['74.0']]",
            "pred_answer": "[['10317.0', 'Wayne Shelton', '54.0', 'M'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['11221.0', 'Herlinda Thomson', '0.0', 'F'], ['14680.0', 'Roxanna Weaver', '0.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['14755.0', 'Bessie Wood', '47.0', 'F'], ['15061.0', 'Lue White', '55.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['16438.0', 'Sheila Riley', '70.0', 'F'], ['17519.0', 'Shirley Mckenney', '80.0', 'F'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17570.0', 'Paul Dunn', '68.0', 'M'], ['17772.0', 'Jesse Ran', '0.0', 'M'], ['17787.0', 'Charley Pescatore', '0.0', 'M'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18112.0', 'Anna Johnson', '0.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18351.0', 'Andrea Capone', '30.0', 'F'], ['18372.0', 'Billy Brewer', '0.0', 'M'], ['18480.0', 'Brian Taylor', '48.0', 'M'], ['19187.0', 'David Trammell', '0.0', 'M'], ['19620.0', 'Dawn Cooper', '43.0', 'F'], ['2110.0', 'Chandra Schulman', '0.0', 'F'], ['21796.0', 'Erik Dickerson', '0.0', 'M'], ['22999.0', 'Orville Ford', '0.0', 'M'], ['24425.0', 'Wilbur Braatz', '0.0', 'M'], ['25167.0', 'Cynthia Robinson', '86.0', 'F'], ['25167.0', 'Cynthia Robinson', '86.0', 'F'], ['25543.0', 'Michael Witt', '0.0', 'M'], ['2560.0', 'Shawn Green', '0.0', 'M'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['28588.0', 'Theresa Vedovelli', '89.0', 'F'], ['29541.0', 'Estrella Carroll', '58.0', 'F'], ['29767.0', 'Warren Baldwin', '50.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['29961.0', 'Jerry Deberry', '72.0', 'M'], ['32418.0', 'Michelle Skidmore', '0.0', 'F'], ['3284.0', 'Kevin Jones', '0.0', 'M'], ['3343.0', 'Bessie Giordano', '0.0', 'F'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['3623.0', 'Joseph Dillman', '63.0', 'M'], ['42820.0', 'Evelyn Lawrence', '74.0', 'F'], ['42963.0', 'Marilyn Norvell', '51.0', 'F'], ['43220.0', 'Thomas Nazario', '82.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4333.0', 'Kenneth Koeller', '77.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4342.0', 'Charles Deshay', '0.0', 'M'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['4589.0', 'Tina Henderson', '59.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['52012.0', 'Jane Dillard', '45.0', 'F'], ['53707.0', 'Stephanie Suchan', '56.0', 'F'], ['5506.0', 'Gonzalo Wasson', '83.0', 'M'], ['55094.0', 'Lawrence Wibbens', '56.0', 'M'], ['55094.0', 'Lawrence Wibbens', '56.0', 'M'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['62296.0', 'Morgan Mcgee', '83.0', 'F'], ['64208.0', 'Darlene Martin', '81.0', 'F'], ['65652.0', 'Troy Friedman', '20.0', 'M'], ['65652.0', 'Troy Friedman', '20.0', 'M']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the correct tables and join conditions and applies the correct WHERE filter on ITEMID = 51200, so the overall structure and relationships are fine. Its mistake is confined to the SELECT clause: it returns raw subject_id, name, age, gender rows (with duplicates) instead of the count of distinct SUBJECT_IDs as specified by the ground truth. Fixing this is a matter of changing the SELECT clause to COUNT(DISTINCT SUBJECT_ID), without altering the joins or WHERE logic.",
            "error_type": "Local: incorrect projection/aggregation"
        },
        "0dd083c0247abcf28d92f74623eae302": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients who stayed in the hospital for more than 3 days had procedure icd9 code 5451?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"3\" AND PROCEDURES.\"ICD9_CODE\" = \"5451\"",
            "pred_sql": "SELECT COUNT(DISTINCT `SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS D INNER JOIN PROCEDURES AS P ON D.`HADM_ID` = P.`HADM_ID` WHERE D.`DAYS_STAY` > 3 AND P.`ICD9_CODE` = 5451",
            "gold_answer": "[['1.0']]",
            "pred_answer": "ambiguous column name: SUBJECT_ID",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with an 'ambiguous column name: SUBJECT_ID' error because the SELECT clause references SUBJECT_ID without qualifying it by table alias. Both DEMOGRAPHIC and PROCEDURES have a SUBJECT_ID column, so the parser cannot resolve which one is intended. This is a localized issue in the column reference and can be fixed simply by prefixing SUBJECT_ID with the correct alias (e.g. D.SUBJECT_ID). The overall query structure, joins, and logic remain correct.",
            "error_type": "Local: column reference error"
        },
        "ade3038af3cee4bf7d6dad698f9a4b7b": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients are diagnosed with opioid type dependence (unspecified) with td drug route? (Values: Opioid type dependence, unspecified, TD)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"LONG_TITLE\" = \"Opioid type dependence, unspecified\" AND PRESCRIPTIONS.\"ROUTE\" = \"TD\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `ICD9_CODE` = '30400' -- Assuming '30400' is the ICD-9 code for \"Opioid type dependence, unspecified\" ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM PRESCRIPTIONS WHERE `ROUTE` = 'TD' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['0.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL violates the assumption that the exact LONG_TITLE value (\"Opioid type dependence, unspecified\") must be used. Instead it filters on ICD9_CODE = '30400'. This is a localized mistake in the WHERE clause (wrong column/value) rather than a fundamental restructuring of the query. Therefore it is a Local error (incorrect column reference in the filter).",
            "error_type": "Local: incorrect-column-in-WHERE"
        }
    },
    "dinsql": {
        "04f6025046849b72bcf92ac808479159": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients were discharged to home after skin closure nec? (Values: HOME, Skin closure NEC)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"HOME\" AND PROCEDURES.\"SHORT_TITLE\" = \"Skin closure NEC\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) FROM DEMOGRAPHIC JOIN PROCEDURES ON DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID AND DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID WHERE DEMOGRAPHIC.DISCHARGE_LOCATION = 'HOME' AND PROCEDURES.LONG_TITLE = 'Skin closure NEC'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses PROCEDURES.LONG_TITLE instead of PROCEDURES.SHORT_TITLE to filter on 'Skin closure NEC', which is a wrong column choice. This misreference causes the mismatch in results and can be fixed by changing LONG_TITLE to SHORT_TITLE without altering the overall query structure or join logic. Therefore, it is a localized column reference error.",
            "error_type": "Local: column reference error"
        },
        "0c99cbdfd068bcf1ca99f14d7e69b1fc": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "What is average age of patients that survived and had a primary disease of COPD exacerbation? (Values: 0, COPD EXACERBATION)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"COPD EXACERBATION\"",
            "pred_sql": "SELECT AVG(AGE) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.EXPIRE_FLAG = 0 AND DI.LONG_TITLE = 'COPD Exacerbation'",
            "gold_answer": "[['50.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic by introducing a JOIN to the DIAGNOSES table and filtering on DIAGNOSES.LONG_TITLE instead of using the DEMOGRAPHIC.DIAGNOSIS column as the primary disease. This isn\u2019t a minor syntax or reference mistake but a structural/semantic mistake that alters the intended meaning of the query (joining an extra table and filtering on the wrong column), resulting in no matching rows. This requires a significant logical restructuring to correct, so it is a Global error.",
            "error_type": "Global"
        }
    },
    "llm2sql": {
        "8e72236c9b6a09cbcea0029a69a799d3": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the drug route of Ascorbic acid? (Values: Ascorbic Acid)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Ascorbic Acid\"",
            "pred_sql": "SELECT ROUTE FROM PRESCRIPTIONS WHERE DRUG = 'Ascorbic acid';",
            "gold_answer": "[['PO'], ['PO'], ['PO'], ['PO'], ['PO'], ['PO/NG'], ['PO/NG']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has the correct structure and references but uses the literal 'Ascorbic acid' (lowercase 'a') instead of the exact value 'Ascorbic Acid'. This causes a case-sensitive mismatch in the WHERE clause, returning no rows. The error is confined to the literal value and does not affect the query\u2019s overall logic or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "a84dd2cf945be1cd6281de749bbe8da4": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "give  the number of patients whose primary disease is coronary artery disease/coronary artery bypass graft/sda and who were aged below 89 years. (Values: CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA\" AND DEMOGRAPHIC.\"AGE\" < \"89\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DIAGNOSIS IN ('CORONARY ARTERY DISEASE', 'CORONARY ARTERY BYPASS GRAFT', 'SDA') AND AGE < 89",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses DIAGNOSIS IN ('CORONARY ARTERY DISEASE', 'CORONARY ARTERY BYPASS GRAFT', 'SDA') instead of matching the exact string 'CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS GRAFT /SDA'. This is a mistake in the WHERE clause filter values, a localized error in the condition, not affecting the overall query structure or joins.",
            "error_type": "Local: incorrect value filter"
        },
        "8232941afbee51d5e20e7dcf879ee83f": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the number of patients who were born in or before 2132. (Values: 2132.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOD_YEAR\" <= \"2132.0\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DOB_YEAR <= 2132",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['77.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a different column (DOB_YEAR) than the ground truth (DOD_YEAR) and also omits the DISTINCT keyword in the COUNT. Both mistakes are confined to the SELECT/WHERE clause and do not affect the overall query structure or table relationships. Therefore, this is a localized error in column reference and aggregation.",
            "error_type": "Local: column reference error"
        },
        "9ead4702111aae4baa6cdeb38d1d16c9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "which patients are suffering from pneumonia primary disease? (Values: PNEUMONIA)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"PNEUMONIA\"",
            "pred_sql": "SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%PNEUMONIA%';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['14755.0'], ['16438.0'], ['25167.0'], ['28588.0'], ['4333.0'], ['4333.0'], ['62296.0'], ['813.0'], ['81923.0'], ['83678.0'], ['9271.0'], ['94762.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and matches on LONG_TITLE, whereas the ground truth query counts distinct SUBJECT_IDs in the DEMOGRAPHIC table filtered by the primary disease column DEMOGRAPHIC.DIAGNOSIS = 'PNEUMONIA'. The predicted query not only references the wrong table and column (DIAGNOSES.LONG_TITLE instead of DEMOGRAPHIC.DIAGNOSIS) but also changes the intended output from a count to a list of SUBJECT_IDs. This is a structural/logic-level error that alters the fundamental meaning of the query, not a minor localized mistake in a single clause.",
            "error_type": "Global: wrong-table-and-logic error"
        }
    },
    "deepseek": {
        "95c9f21405c27dccc93ef6c419bf3626": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Which patients have an abnormal test with lab test item id 51255?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51255\" AND LAB.\"FLAG\" = \"abnormal\"",
            "pred_sql": "SELECT DISTINCT D.NAME FROM DEMOGRAPHIC D INNER JOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID AND D.HADM_ID = L.HADM_ID WHERE L.ITEMID = 51255 AND L.FLAG IS NOT NULL;",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['Albert Bauer'], ['Charley Pescatore'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Elizabeth Bateman'], ['Glen Tanner'], ['Glen Tanner'], ['Glen Tanner'], ['Jerry Deberry'], ['Kenneth Koeller'], ['Wilbur Braatz']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the intended logic in multiple ways. First, it filters on L.FLAG IS NOT NULL instead of explicitly requiring L.FLAG = 'abnormal'. Second, it selects patient names rather than counting distinct subject IDs as the question requests. These errors alter both the filtering criterion and the overall shape/purpose of the query, so the mistake spans multiple clauses and changes the query\u2019s fundamental logic.",
            "error_type": "Global: incorrect filter and projection"
        },
        "94fac89f564ff4ef5cafb4d446f7035d": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients whose admission type is emergency and born before 2066? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2066\"",
            "pred_sql": "SELECT COUNT(*) AS patient_count FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'EMERGENCY' AND YEAR(DOB) < 2066;",
            "gold_answer": "[['18.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the YEAR(DOB) function to extract the year from the DOB column, but SQLite does not support a built-in YEAR() function. This is a localized error within the WHERE clause rather than a flaw in the overall query structure or logic.",
            "error_type": "Local: other-temporal error"
        }
    }
}