{
    "codes": {
        "9960a0660d0754d95bd4252a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the number of times patient 10017492 had a lab test for sodium since 03/2100.",
            "gold_sql": "SELECT COUNT(*) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10017492 ) AND strftime('%Y-%m',labevents.charttime) >= '2100-03'",
            "pred_sql": "SELECT count(row_id) FROM labevents WHERE subject_id = 10017492 AND itemid = 50992 AND strftime('%Y-%m-%d', charttime) >= '2021-03-01'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistakes are confined to the WHERE clause: it hard-codes an incorrect itemid instead of looking up d_labitems to find sodium, uses the wrong date literal (\u20182021-03-01\u2019 instead of filtering from \u20182100-03\u2019), and doesn\u2019t follow the assumption to filter by hadm_id via admissions. These are localized errors in value references and literals, not a fundamental join or structural logic issue.",
            "error_type": "Local: value parsing error"
        },
        "dfd3a721ed94f94f630146ab": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the top four most commonly prescribed medications within 2 months to patients in their 20s who are diagnosed with other postprocedural status this year.",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other postprocedural status' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.age BETWEEN 20 AND 29 AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT prescriptions.drug_name FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE strftime('%Y', admissions.admittime) = '2100' AND strftime('%Y', prescriptions.starttime) = '2100' AND strftime('%Y', patients.dob) = '2100' AND strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) >= 20 AND prescriptions.drug_name IS NOT NULL GROUP BY prescriptions.drug_name ORDER BY count(prescriptions.drug_name) DESC LIMIT 4",
            "gold_answer": "[['5% dextrose'], ['acetaminophen'], ['acetaminophen iv'], ['albumin 5% (25g / 500ml)'], ['aspirin ec'], ['bisacodyl'], ['docusate'], ['furosemide'], ['iso-osmotic dextrose'], ['ketorolac'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['polyethylene glycol'], ['senna'], ['sodium chloride 0.9%  flush'], ['syringe']]",
            "pred_answer": "no such column: prescriptions.drug_name",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `prescriptions.drug_name`, causing a column reference error. This is a localized issue that can be fixed by using the correct column name (`prescriptions.drug`), without needing to alter the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "e0f2d92c8357f7bb2d8b8b14": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the lab test that was last given to patient 10002495 since 06/2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10002495 ) AND strftime('%Y-%m',labevents.charttime) >= '2100-06' ORDER BY labevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT microbiologyevents.test_name FROM labevents INNER JOIN microbiologyevents ON labevents.row_id = microbiologyevents.row_id WHERE labevents.subject_id = 10002495 AND labevents.charttime >= '2100-06-01'",
            "gold_answer": "[['ptt']]",
            "pred_answer": "[['mrsa screen'], ['mrsa screen']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally alters the intended logic by joining labevents with microbiologyevents and retrieving microbiology test names instead of lab test labels from d_labitems. It also uses subject_id directly on labevents rather than filtering by hadm_id from admissions, and never orders or limits to get the last event. These mistakes span multiple clauses and change the overall structure and meaning of the query.",
            "error_type": "Global: incorrect join/structure error"
        },
        "5bf8a233011a0df867e45191": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was patient 10018501's last alcohol detoxification treatment on the last hospital visit?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'alcohol detoxification' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018501 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.charttime FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id INNER JOIN transfers ON admissions.hadm_id = transfers.hadm_id WHERE transfers.subject_id = 10018501 ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['2100-08-11 00:00:00']]",
            "pred_answer": "[['2100-08-11 22:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses chartevents and transfers tables instead of procedures_icd and admissions to filter for the 'alcohol detoxification' procedure on the last hospital visit. It omits the necessary filter on d_icd_procedures.long_title and uses the wrong table for charttime, fundamentally altering the query logic and the intended result. This error spans multiple clauses (FROM, JOIN, WHERE) and changes the overall meaning of the query, not just a minor syntax issue.",
            "error_type": "Global: incorrect table and join logic"
        },
        "4d7de56400e4472e829c0dc3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the name of the output patient 10015931 had first on this month/09?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015931 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') AND strftime('%d',outputevents.charttime) = '09' ORDER BY outputevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT outputevents.value FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN outputevents ON admissions.hadm_id = outputevents.hadm_id WHERE patients.subject_id = 10015931 AND strftime('%Y-%m',outputevents.charttime) = '2100-09' ORDER BY outputevents.charttime ASC LIMIT 1",
            "gold_answer": "[['foley']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails to retrieve the item name (d_items.label), instead selecting outputevents.value without joining d_items. It also uses the wrong date filter (strftime('%Y-%m',\u2026) = '2100-09' instead of filtering to day '09' of the current month) and joins on admissions.hadm_id rather than restricting to the ICU stay_id. These mistakes alter the overall logic and structure, not just a minor clause fix. Therefore, this is a global error in the query\u2019s join and filtering strategy.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "65fa1808fd08f18ce3e040db": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 11 months ago, what was patient 10038081's last weight measurement?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-11 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.valuenum FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10038081 AND d_items.label = 'daily weight' AND strftime('%Y-%m', chartevents.charttime) > strftime('%Y-%m', 'now', '-11 month') AND strftime('%Y-%m', chartevents.charttime) < strftime('%Y-%m', 'now', '-10 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['67.3']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main issue is in the WHERE clause time filtering: it uses strftime('%Y-%m', ..., 'now') instead of the fixed '2100-12-31 23:59:00' reference and restricts results to the narrow window between 11 and 10 months ago, rather than all measurements since 11 months ago. This is a localized error in the date condition and does not require restructuring the entire query.",
            "error_type": "Local: other-temporal error"
        },
        "c8d18210963921f83f907052": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the top three most commonly diagnosed conditions since 1 year ago?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d_icd_diagnoses.long_title FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE diagnoses_icd.charttime >= datetime('now','1 year') GROUP BY d_icd_diagnoses.long_title ORDER BY count(d_icd_diagnoses.long_title) DESC LIMIT 3",
            "gold_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main fault is in the WHERE clause\u2019s date filter: it uses datetime('now','1 year') instead of the required fixed timestamp '2100-12-31 23:59:00' and a '-1 year' offset. This violates the temporal assumptions but does not change the overall query structure or join logic. Hence, it\u2019s a localized temporal value error.",
            "error_type": "Local: other-temporal error"
        },
        "ba881a6d357833bd3eadf250": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the total volume of input for patient 10035185 on 05/19/this year.",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10035185 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '05-19'",
            "pred_sql": "SELECT sum(amount) FROM inputevents WHERE subject_id = 10035185 AND amount IS NOT NULL AND itemid IN (211, 220739, 225312) AND strftime('%Y-%m-%d', starttime) = '2100-05-19'",
            "gold_answer": "[['2300.0']]",
            "pred_answer": "no such column: amount",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'amount' instead of the correct column 'totalamount'. This is a simple column reference mistake that does not alter the overall query logic or require restructuring of joins or subqueries. It can be fixed locally by using the correct column name.",
            "error_type": "Local: table/column reference error"
        },
        "3739c9896a4cfc33d62e978b": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last occasion that patient 10021118 underwent a excision of right saphenous vein, percutaneous endoscopic approach procedure on the first hospital visit?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021118 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT procedures_icd.charttime FROM procedures_icd INNER JOIN icustays ON procedures_icd.hadm_id = icustays.hadm_id WHERE procedures_icd.subject_id = 10021118 AND procedures_icd.icd_code = '41614100' ORDER BY icustays.intime LIMIT 1",
            "gold_answer": "[['2100-12-04 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally deviates from the intended logic. It joins procedures_icd to icustays instead of filtering procedures by the patient\u2019s first hospital admission via the admissions table. It also hardcodes the ICD code rather than looking it up and never restricts to the first visit. Moreover, it orders by icustays.intime instead of the procedure charttime and returns the earliest ICU stay rather than the last procedure on the first hospital visit. These issues span multiple clauses and disrupt the overall query structure, not just a single clause.",
            "error_type": "Global: incorrect join and missing first visit filter"
        },
        "13bd5a624734c3047cfd61ff": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first sao2 value that patient 10039831 had today?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT chartevents.value FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.hadm_id = 10039831 AND d_items.label = 'SaO2' AND chartevents.charttime LIKE '2100-03-01%' ORDER BY chartevents.charttime LIMIT 1",
            "gold_answer": "[['97.0']]",
            "pred_answer": "no such column: chartevents.value",
            "is_correct": false,
            "reasoning": "The predicted query fails due to a column reference error: it selects chartevents.value, but the correct column name is valuenum. This is a localized mistake in the SELECT clause and does not require reworking the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "b73cc134ff0fb035aea49ce3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 1 year ago, what are the top five most commonly taken specimens for patients within the same hospital visit after the diagnosis of disruption of external operation (surgical) wound?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'disruption of external operation (surgical) wound' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT inputevents.itemid FROM inputevents INNER JOIN icustays ON inputevents.hadm_id = icustays.hadm_id WHERE icustays.row_id = 1 AND inputevents.itemid IN (30046, 30049, 30015, 30227) AND inputevents.starttime > icustays.intime AND icustays.los >= 1 AND icustays.los <= 14 GROUP BY icustays.hadm_id ORDER BY count(icustays.hadm_id) DESC LIMIT 5",
            "gold_answer": "[['blood culture'], ['catheter tip-iv'], ['pleural fluid'], ['swab'], ['tissue']]",
            "pred_answer": "no such column: icustays.los",
            "is_correct": false,
            "reasoning": "The predicted SQL uses entirely the wrong tables and columns (inputevents and icustays with a non\u2010existent los column) instead of microbiologyevents and spec_type_desc. This is not a small typo or local reference fix: it fundamentally alters the query\u2019s logic and intended meaning, joining unrelated tables and filtering by incorrect fields. Hence it is a global structural/join error.",
            "error_type": "Global: join error"
        },
        "16cba15cbeaf0d1e2eb9b512": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last height record for patient 10003046?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003046 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'height (cm)' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = 10003046 AND itemid = 223835 ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['173.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL commits errors confined to specific clauses without altering the overall query structure. It selects the wrong column (charttime instead of valuenum) and uses a hard-coded itemid rather than joining through d_items to filter 'height (cm)'. These are localized reference mistakes in the SELECT and WHERE clauses that can be corrected without restructuring the query logic.",
            "error_type": "Local: column reference error"
        },
        "3721cde992fe90dd17278b65": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have passed since the patient 10021487 was admitted to the hospital?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT strftime('%J', admissions.admittime) - strftime('%J', admissions.dischtime) AS days FROM admissions WHERE admissions.subject_id = 10021487",
            "gold_answer": "[['599.6']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL stays within the same admissions table and does not alter the overall query structure or logical flow (no additional joins or subqueries). Its mistakes are confined to specific parts: it subtracts admissions.dischtime (instead of using the fixed current time) from admissions.admittime, fails to filter for the current admission (dischtime IS NULL), and omits multiplying the day difference by 24 to get hours. These are localized errors in column reference, filtering, and calculation, not a structural or multi-clause logic flaw.",
            "error_type": "Local: column reference and calculation errors"
        },
        "6a5ef8d7e70eca0770704d97": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of patients who were tested this year for absolute basophil count.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'absolute basophil count' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM microbiologyevents WHERE strftime('%Y', charttime) = '2100' AND test_name = 'Absolute basophil count (abs)'",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the microbiologyevents table and the test_name column to find 'Absolute basophil count (abs)', whereas the question and ground truth require checking labevents.itemid via d_labitems.label for 'absolute basophil count'. This is not a simple typo or missing alias but a completely incorrect table and column choice that changes the entire logic of the query.",
            "error_type": "Global: incorrect-table selection"
        },
        "90e45765e5cde71e74fdd56f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "among patients in their 50s , what are the top three prescribed drugs?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT prescriptions.drug FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE strftime('%Y', patients.dob) > strftime('%Y', 'now') - 50 ORDER BY prescriptions.starttime DESC LIMIT 3",
            "gold_answer": "[['0.9% sodium chloride'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['glucagon'], ['insulin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets both the age filter and the ranking requirement. It uses a birth\u2010year comparison instead of filtering admissions.age between 50 and 59, and it orders by prescription start time rather than counting and ranking drugs by frequency. This is not a small fix within a single clause but a pervasive logic error affecting the overall structure and intended meaning of the query.",
            "error_type": "Global: incorrect age filter and ranking logic"
        },
        "c7cc751a4a76814634ed9b9a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What does a procedure under the name excision of axillary lymph node cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'procedures_icd' AND cost.event_id IN ( SELECT procedures_icd.row_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of axillary lymph node' ) )",
            "pred_sql": "SELECT procedures_icd.icd_code FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.icd_code = procedures_icd.icd_code WHERE d_icd_procedures.long_title = 'Excision of axillary lymph node'",
            "gold_answer": "[['8.16']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the cost table and the filtering on cost.event_type, selecting only the ICD code instead of retrieving the cost. Fixing this would require adding the cost table, its WHERE clause, and the nested subquery logic, which alters the overall structure and purpose of the query rather than a minor clause adjustment.",
            "error_type": "Global"
        },
        "425379e15087e80b17ca3cdd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time that patient 10002495 had dextrose 5% input on the first icu visit?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10002495 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'dextrose 5%' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime ASC LIMIT 1",
            "pred_sql": "SELECT inputevents.starttime FROM inputevents INNER JOIN icustays ON inputevents.stay_id = icustays.stay_id WHERE inputevents.subject_id = 10002495 AND inputevents.itemid IN (221906, 226559) LIMIT 1",
            "gold_answer": "[['2100-06-01 20:18:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the key requirement of restricting to the patient\u2019s first ICU visit. Instead of identifying the first icustay via ordering and then filtering inputevents for that specific stay, it simply joins icustays without filtering by the earliest intime or ensuring outtime IS NOT NULL. It also fails to map the 'dextrose 5%' label to itemid via d_items. These omissions fundamentally alter the query\u2019s logic and intended result, not just a minor syntax or reference fix.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "27a8bcc8f3251601e02a710f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Can you give me patient 10029291's daily maximum respiratory rate on the first icu visit?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE chartevents.itemid IN (618, 220179) AND icustays.subject_id = 10029291",
            "gold_answer": "[['24.0'], ['25.0'], ['27.0'], ['28.0'], ['28.0'], ['36.0'], ['36.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the subquery logic that isolates the first ICU stay for patient 10029291 (filtering on icustays.outtime IS NOT NULL, ordering by intime, and LIMIT 1), and it also fails to group results by day. Instead, it simply joins chartevents and icustays for all ICU stays of the patient and returns a single maximum value (which even yields None). This error affects the overall structure and intent of the query\u2014the core logic of isolating the first ICU visit and grouping by date is lost\u2014requiring a top\u2010to\u2010bottom rewrite rather than a minor local fix.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "a367fc08b430041367378cb8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have there been since patient 10021487's admission to the hospital?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT strftime('%J', cost.chargetime) - strftime('%J', admissions.admittime) AS days_since_admission FROM admissions INNER JOIN cost ON admissions.hadm_id = cost.hadm_id WHERE admissions.subject_id = 10021487",
            "gold_answer": "[['599.6']]",
            "pred_answer": "[['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.046'], ['0.046'], ['0.046'], ['0.046'], ['0.046'], ['0.067'], ['0.067'], ['0.067'], ['0.067'], ['0.116'], ['0.151'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic: instead of using the fixed current time ('2100-12-31 23:59:00') and multiplying the day difference by 24, it joins the cost table on hadm_id, uses cost.chargetime as the reference time, and fails to filter for the current admission (dischtime IS NULL). This is not a minor syntax or column reference error but a flaw in the overall query structure and data relationships, causing multiple rows of incorrect day differences rather than a single correct hour count.",
            "error_type": "Global: join logic error"
        },
        "cecffd7d3bfbe9c0797c0931": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since patient 10018081's admission to the icu, how many hours have passed?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', icustays.intime) ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL",
            "pred_sql": "SELECT strftime('%J', icustays.outtime) - strftime('%J', icustays.intime) AS days FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10018081",
            "gold_answer": "[['126.817']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly uses icustays.outtime instead of the fixed current timestamp ('2100-12-31 23:59:00') and fails to filter for ongoing ICU stays (outtime IS NULL). This is a mistake in a specific part of the SELECT/WHERE logic (wrong column reference/time calculation) and can be fixed locally without restructuring the entire query.",
            "error_type": "Local: value parsing/column reference error"
        },
        "5e679fe0b5ad3749c2fca7de": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "In 2100, when was the last time that patient 10022017 was diagnosed with ulcerative colitis, unspecified, without complications?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'ulcerative colitis, unspecified, without complications' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd INNER JOIN patients ON diagnoses_icd.subject_id = patients.subject_id WHERE patients.subject_id = 10022017 AND diagnoses_icd.icd_code = 'icd9|416' GROUP BY diagnoses_icd.charttime ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "gold_answer": "[['2100-10-02 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in its WHERE clause: it hardcodes an incorrect ICD code ('icd9|416') instead of looking up the code by long_title, omits the year filter (strftime('%Y') = '2100'), and unnecessarily groups by charttime. These mistakes do not require reworking the overall JOIN and ordering structure of the query\u2014only corrections within the WHERE clause are needed. Thus, this is a localized filter error.",
            "error_type": "Local: where-clause filter error"
        },
        "8b816a044e4c20979a4ae614": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients stayed on vascular this year?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT transfers.hadm_id FROM transfers WHERE transfers.careunit = 'vascular' AND datetime(transfers.intime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT count(icustays.subject_id) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE strftime('%Y', chartevents.charttime) = '2100' AND d_items.label = 'vascular'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "no such column: d_items.label",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references d_items.label without including the d_items table in the FROM/JOIN clause. This is a column reference error\u2014 a localized mistake that does not affect the overall query logic but simply a missing table/column reference.",
            "error_type": "Local: table/column reference error"
        },
        "43f5aab1de5e20213e7c6581": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Show me the total amount of patient 10020187's output on 02/04/2100.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-02-04'",
            "pred_sql": "SELECT sum(outputevents.value) FROM outputevents INNER JOIN patients ON outputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y-%m-%d', outputevents.charttime) = '2010-02-04'",
            "gold_answer": "[['2275.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main mistake is using the wrong date literal: it filters on '2010-02-04' instead of '2100-02-04'. This is a localized error in the WHERE clause value, not a structural or multi-clause logical issue.",
            "error_type": "Local: other-temporal error"
        },
        "df83a028a4e46a89dbf24e19": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Reveal the drug name that was first prescribed to patient 10004733 via the pr route on the current hospital visit?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.route = 'pr' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.drug FROM prescriptions INNER JOIN transfers ON prescriptions.subject_id = transfers.subject_id INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE prescriptions.subject_id = 10004733 AND prescriptions.route = 'pr' AND admissions.dischtime IS NULL",
            "gold_answer": "[['bisacodyl']]",
            "pred_answer": "[['bisacodyl'], ['bisacodyl'], ['bisacodyl'], ['bisacodyl']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended query logic by introducing unnecessary JOINs with the transfers table and failing to restrict to a single result via ORDER BY and LIMIT. Instead of selecting the patient\u2019s current admission directly from admissions and then finding the earliest pr prescription, it joins through transfers (using subject_id rather than hadm_id) and omits ordering and limiting. This is not a simple syntax or value error but a structural flaw in how tables are related, requiring a rework of the query\u2019s join strategy.",
            "error_type": "Global: incorrect-join error"
        },
        "b9a2ff88ca010b7574afeae1": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Show me patient 10015272's daily maximum weight since 1 year ago.",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015272 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10015272 AND chartevents.itemid IN (226739) AND chartevents.valuenum IS NOT NULL AND chartevents.charttime >= datetime('now', 'start of day', '-1 year')",
            "gold_answer": "[['53.2']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in local parts of the query: it fails to GROUP BY day to produce daily maxima, uses the wrong datetime reference (\u2018now\u2019 instead of the fixed '2100-12-31 23:59:00'), and joins unnecessarily on patients.subject_id rather than filtering stays. None of these require reworking the overall query structure or table relationships\u2014they are localized mistakes in the SELECT, WHERE, and GROUP BY logic.",
            "error_type": "Local: missing-group-by error"
        },
        "ee927aa21353efb9824a2eec": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the last medication that was prescribed to patient 10021312 on the first hospital visit?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.drug FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.subject_id = 10021312 ORDER BY prescriptions.starttime LIMIT 1",
            "gold_answer": "[['morphine sulfate (oral solution) 2 mg/ml']]",
            "pred_answer": "[['alprazolam']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict prescriptions to the first hospital visit and omits the necessary join or subquery on the admissions table to identify that visit. Instead, it simply joins patients to prescriptions by subject_id and orders by starttime (ascending by default), returning the patient\u2019s very first prescription across all admissions. This is a structural flaw affecting multiple clauses (missing admissions filter, wrong ORDER BY direction) and changes the intended logical flow of the query. Therefore, it is a global error.",
            "error_type": "Global: incorrect join logic"
        },
        "a17e722e5ac11cdc8a2d53bb": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10003400 come for the last time to the hospital?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10003400 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admissions.dischtime FROM transfers INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE transfers.subject_id = 10003400 ORDER BY transfers.intime DESC LIMIT 1",
            "gold_answer": "[['2100-06-14 02:25:00']]",
            "pred_answer": "[['2100-06-15 15:05:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the transfers table and its intime field rather than the admissions table and the admittime field. It also omits the filter to exclude ongoing admissions (dischtime IS NOT NULL). By joining the wrong table and selecting the wrong timestamp, it fundamentally alters the query logic and intended meaning. Fixing this requires restructuring the query (changing the FROM table, JOIN, selected column, and WHERE condition), not just a minor tweak to one clause.",
            "error_type": "Global: join error"
        },
        "1197269430f8f6a19263132a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last time when patient 10011398 was prescribed simvastatin and aspirin at the same time on the first hospital visit?",
            "gold_sql": "SELECT T1.starttime FROM ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND prescriptions.drug = 'simvastatin' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND prescriptions.drug = 'aspirin' ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) ORDER BY T1.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN transfers ON transfers.subject_id = prescriptions.subject_id WHERE prescriptions.subject_id = 10011398 AND prescriptions.drug IN ('simvastatin', 'aspirin') ORDER BY transfers.intime DESC LIMIT 1",
            "gold_answer": "[['2100-12-27 10:00:00']]",
            "pred_answer": "[['2100-12-26 10:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets the question\u2019s logic. It joins the prescriptions table to the transfers table (irrelevant for determining the first hospital visit from admissions), fails to restrict to the first hospital admission, and uses a simple IN clause rather than ensuring simvastatin and aspirin were prescribed at the exact same time. Fixing this would require restructuring the query with subqueries or self-joins on prescriptions and correctly targeting admissions, not just a minor tweak in a single clause.",
            "error_type": "Global: join error"
        },
        "ff6f325e6403ec05870aa381": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Is the mcv value second measured on the first hospital visit in patient 10006053 less than the value from the first measurement on the first hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mcv' ) ORDER BY labevents.charttime ASC LIMIT 1 OFFSET 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mcv' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT CASE WHEN sum(CASE WHEN itemid = 723 AND valuenum < 0 THEN 1 ELSE 0 END) > 0 THEN 'Yes' ELSE 'No' END FROM labevents WHERE subject_id = 10006053 AND valuenum < 0",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['No']]",
            "is_correct": false,
            "reasoning": "The predicted SQL does not address the comparison between the second and first MCV measurements on the first hospital visit. Instead, it filters for negative valuenum values and sums them, completely ignoring the required ordering, item filtering for 'mcv', and visit selection logic. This error spans the entire query logic, not just a small syntactic mistake, and thus requires a fundamental restructuring to match the intended meaning.",
            "error_type": "Global: logical inconsistency"
        },
        "398bf7703c6f1d987464d94f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find out how many patients were discharged from the hospital this year.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is NOT NULL AND datetime(admissions.dischtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE dischtime BETWEEN '2100-1-1 00:00:00' AND '2100-12-31 23:59:59'",
            "gold_answer": "[['90.0']]",
            "pred_answer": "[['38.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id), causing it to count admissions rather than unique patients. This is a mistake in the aggregation clause and can be fixed by adding DISTINCT without changing the overall query structure or logic.",
            "error_type": "Local: missing DISTINCT in COUNT"
        },
        "e3bb0f8bf5aadcee103960b1": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 223 days ago, what was the average value of the sao2 of patient 10038933?",
            "gold_sql": "SELECT AVG(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038933 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-223 day')",
            "pred_sql": "SELECT avg(outputevents.value) FROM patients INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10038933 AND outputevents.itemid IN (50824, 50825) AND outputevents.charttime > (DATETIME('now') - 223)",
            "gold_answer": "[['96.437']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the outputevents table joined to patients, whereas the ground truth correctly uses chartevents (where SpO2 is stored) filtered by stay_id and the appropriate item label. The predicted SQL not only picks the wrong table and columns (outputevents.value instead of chartevents.valuenum) but also misuses the time filter (DATETIME('now') rather than the fixed '2100-12-31 23:59:00') and incorrect item IDs. These mistakes fundamentally change the data source and logic of the query, so fixing them would require restructuring the query beyond a single clause.",
            "error_type": "Global: wrong table usage"
        },
        "ba36ddb9a0893f283433afe3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10018081 experience the last instance where their diastolic blood pressure was less than 44.0 yesterday?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum < 44.0 AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.charttime FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10018081 AND chartevents.itemid IN (51, 456) AND patients.dob = ( SELECT min(patients.dob) FROM patients INNER JOIN chartevents ON patients.subject_id = chartevents.subject_id WHERE patients.subject_id = 10018081 AND chartevents.itemid IN (51, 456) AND strftime('%Y %m %d',chartevents.charttime) = strftime('%Y %m %d',admissions.admittime) - 1 AND chartevents.valuenum1 < 44 AND patients.gender = 'f' )",
            "gold_answer": "[['2100-12-30 14:00:00']]",
            "pred_answer": "no such column: admissions.admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL completely breaks the intended query structure: it joins chartevents directly to patients rather than going through admissions \u2192 icustays, omits the stay_id filter, and even references admissions.admittime without including the admissions table. This is not a small fix within a clause but a fundamentally flawed join strategy and overall logic that fails to capture the patient\u2019s ICU stay and the 'yesterday' date filter correctly.",
            "error_type": "Global: incorrect join relationships"
        },
        "404fb4798c41a903136d9468": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate patient 10004720's length of stay in days for the last icu stay.",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT sum(strftime('%J', admissions.dischtime) - strftime('%J', admissions.admittime)) FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10004720",
            "gold_answer": "[['5.056']]",
            "pred_answer": "[['5.02']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by using the admissions table instead of icustays, summing all hospital stays rather than computing the length of the last ICU stay. It also omits the ORDER BY ... DESC LIMIT 1 to pick the last stay and does not restrict to non-null ICU outtimes. These are structural flaws that change the intended meaning and require major query restructuring.",
            "error_type": "Global: incorrect table join and aggregation logic"
        },
        "8f1d7f8d06674bc4fcf77de8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much is the minimum total hospital cost associated with postprocedural fever this year?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'postprocedural fever' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) FROM cost INNER JOIN procedures_icd ON cost.event_id = procedures_icd.row_id WHERE procedures_icd.icd_code = '294.5' AND strftime('%Y', cost.chargetime) = '2100'",
            "gold_answer": "[['1125.6']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misidentifies the event table and join relationship. Instead of filtering costs for the diagnosis of 'postprocedural fever' via diagnoses_icd and its ICD code lookup, it joins cost to procedures_icd and uses a hard-coded code. This alters the core logic of which hospital costs are aggregated and does not even group by admission to compute a minimum. Fixing this requires restructuring the query to join the correct diagnosis table and apply the proper yearly filter and grouping, not just a minor syntax tweak.",
            "error_type": "Global: join error"
        },
        "83a6c418c954d7546628bd66": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the number of patients who were prescribed olanzapine (disintegrating tablet) since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'olanzapine (disintegrating tablet)' AND strftime('%Y',prescriptions.starttime) >= '2100' )",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE drug = 'olanzapine' AND stoptime > '2100-01-01'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to follow the overall intended logic on multiple fronts: 1) It filters on drug = 'olanzapine' instead of the full name 'olanzapine (disintegrating tablet)', violating the drug-name requirement. 2) It uses stoptime > '2100-01-01' instead of selecting prescriptions by starttime in year \u2265 2100. 3) It counts non\u2010distinct subject_id rows directly from prescriptions, rather than counting distinct patients via admissions.hadm_id. 4) It omits the necessary join or subquery through admissions.subject_id. These errors span several clauses (SELECT, FROM, WHERE) and fundamentally change the query\u2019s meaning, so this is a global logic error.",
            "error_type": "Global: logic error"
        },
        "964ce5a64b7651431e5f28ea": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the number of days since patient 10004733 was first diagnosed with other and unspecified alcohol dependence, unspecified on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other and unspecified alcohol dependence, unspecified' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%J', admissions.admittime) - strftime('%J', patients.dob)) - (strftime('%J', admissions.dischtime) - strftime('%J', patients.dob)) AS days FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id WHERE admissions.subject_id = 10004733",
            "gold_answer": "[['9.522']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely ignores the diagnosis and current\u2010visit requirements. It never joins or filters on diagnoses_icd or the specific ICD code, and it uses admissions.admittime, admissions.dischtime, and patients.dob in a nonsensical subtraction that bears no relation to \u201cdays since first diagnosis\u201d on the current visit. This is not a minor typo or wrong column reference but a fundamentally incorrect query structure and logic, so it is a global error.",
            "error_type": "Global: incorrect overall logic"
        },
        "da0ea1dc89bd6b674a9f17f2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the cost of a diagnosis of adverse effect of sulfonamides, initial encounter?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'adverse effect of sulfonamides, initial encounter' ) )",
            "pred_sql": "SELECT cost.cost FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code INNER JOIN cost ON diagnoses_icd.row_id = cost.event_id WHERE d_icd_diagnoses.long_title = 'adverse effect of sulfonamides, initial encounter'",
            "gold_answer": "[['6.62']]",
            "pred_answer": "[['13.86'], ['6.62'], ['8.08']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins diagnoses_icd to cost via event_id but fails to restrict cost.event_type = 'diagnoses_icd'. Without that filter, cost rows from other event types whose event_id numerically matches a diagnoses_icd.row_id are also returned, producing extra costs (13.86 and 8.08). Adding the WHERE cost.event_type = 'diagnoses_icd' condition would fix it without changing the overall query structure or joins.",
            "error_type": "Local: missing event_type filter"
        },
        "1a7ff2b0da25e36ad3c72ba2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How is isosorbide dinitrate typically consumed?",
            "gold_sql": "SELECT DISTINCT prescriptions.route FROM prescriptions WHERE prescriptions.drug = 'isosorbide dinitrate'",
            "pred_sql": "SELECT DISTINCT CASE WHEN d_items.label IS NULL THEN NULL ELSE d_items.label END FROM d_items INNER JOIN inputevents_cv WHERE d_items.itemid = inputevents_cv.itemid AND d_items.label IS NOT NULL",
            "gold_answer": "[['po/ng']]",
            "pred_answer": "no such table: inputevents_cv",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent table inputevents_cv, which is a table reference error. This mistake is confined to one part of the query and does not change the overall logic or require restructuring, so it is a local error.",
            "error_type": "Local: table reference error"
        },
        "4d1ec5f37505470d5d68c654": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10006580 get input for the last time?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006580 ) ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT min(charttime) FROM chartevents WHERE subject_id = 10006580",
            "gold_answer": "[['2100-08-20 03:55:00']]",
            "pred_answer": "[['2100-08-19 16:23:00']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong table (chartevents instead of inputevents), applies the wrong aggregation (min instead of max/last), and omits the necessary joins through admissions and icustays. These errors span across multiple clauses and fundamentally change the query\u2019s intent, so they cannot be fixed by a minor local tweak.",
            "error_type": "Global: wrong table and aggregation"
        },
        "8a0fa0a4a8e13bb1dbb8d82f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the weight difference of patient 10004720's body last measured on the last hospital visit compared to the value second to last measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT sum(CASE WHEN charttime = ( SELECT max(charttime) FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720 ) THEN value ELSE 0 END) - sum(CASE WHEN charttime = ( SELECT max(charttime) - 1 FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720 ) THEN value ELSE 0 END) AS weight_difference FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'value' in the chartevents table instead of using the correct 'valuenum' column. This is a simple column reference error and does not change the overall structure or logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "f789a1f2bea694b1a95b300a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of current patients whose ages fall within the 20s.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age BETWEEN 20 AND 29",
            "pred_sql": "SELECT count(DISTINCT patients.subject_id) FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id WHERE patients.dob LIKE '20%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['94.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause by filtering on patients.dob with a LIKE pattern instead of using the admissions.age BETWEEN 20 AND 29 condition. The overall query structure (JOIN, COUNT, DISTINCT) is correct, and no major re-structuring is needed\u2014only the filtering logic in the WHERE clause is wrong.",
            "error_type": "Local: incorrect filter condition"
        },
        "eb2c3da7830258ad243c4ce5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What did patient 10020740 receive on 03/25/this year as their last input?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT inputevents.itemid FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '03-25' ORDER BY inputevents.starttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT inputevents.itemid FROM inputevents INNER JOIN patients ON inputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020740 AND strftime('%m/%d/%Y', inputevents.charttime) = '03/25/2100'",
            "gold_answer": "[['insulin - regular']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non\u2010existent column inputevents.charttime (the inputevents table uses starttime, not charttime), causing a column reference error. This is a mistake localized to a specific part of the query and can be corrected without changing the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "1bf033967053a3e88b60ffec": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many current patients are there?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is null",
            "pred_sql": "SELECT count(DISTINCT icustays.subject_id) FROM icustays INNER JOIN patients ON icustays.subject_id = patients.subject_id WHERE patients.dod IS NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['64.0']]",
            "is_correct": false,
            "reasoning": "The predicted query completely changes the intent by using icustays and filtering on patients.dod IS NULL (alive patients) rather than using admissions.dischtime IS NULL (current hospital admissions). It joins the wrong tables and applies the wrong condition, altering the overall logic of what constitutes a \u201ccurrent patient.\u201d This is not a minor syntax or reference fix but a fundamental misuse of tables and conditions that requires restructuring the query.",
            "error_type": "Global: wrong table and logic"
        },
        "cc60f6274f29d0fdfd7ff5a3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the name of the lab test that patient 10027445 received first on the first hospital visit?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027445 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT labevents.itemid FROM labevents INNER JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE labevents.subject_id = 10027445 ORDER BY labevents.charttime ASC LIMIT 1",
            "gold_answer": "[['chloride']]",
            "pred_answer": "[['50902.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to the patient's first hospital visit (it uses subject_id instead of hadm_id and omits the admissions/dischtime filter) and also selects the itemid rather than the lab test label. These mistakes are not localized to a single clause but alter the fundamental logic and table relationships of the query, requiring structural changes (adding subqueries, correct joins, and correct SELECT target) to fix.",
            "error_type": "Global: join error"
        },
        "ae2f9d639134a58571aa307a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients had a immunology (cmv) test administered since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'immunology (cmv)' AND strftime('%Y',microbiologyevents.charttime) >= '2100' )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM chartevents WHERE itemid IN (612, 613, 614, 615, 618)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the chartevents table with arbitrary itemids and omits any filter on spec_type_desc or charttime, completely ignoring the microbiologyevents table and the time constraint since 2100. This is not a minor fix in a single clause but a fundamental mis\u2010use of the wrong table and logic, altering the intent of the query. Therefore, this is a Global error.",
            "error_type": "Global: incorrect table/logical error"
        },
        "5fd041e7cb3585b0f8296e59": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Can you tell me the maximum total hospital cost if it involves a rbc, pleural lab test in 2100?",
            "gold_sql": "SELECT MAX(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'rbc, pleural' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) AS num FROM cost INNER JOIN labevents ON cost.event_id = labevents.row_id WHERE labevents.itemid IN (40549, 50512)",
            "gold_answer": "[['15682.43']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the query structure: it joins cost to labevents via cost.event_id = labevents.row_id (missing cost.event_type filter), omits the year filter on cost.chargetime, fails to group by hadm_id to compute per-admission totals, and never takes the MAX of those totals. These are not isolated typos but a fundamentally different and incorrect join/aggregation approach, requiring extensive restructuring to fix. Therefore this is a Global error.",
            "error_type": "Global: incorrect join logic"
        },
        "7f27ac59bfdf5715227dfcb0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the total multivitamins amount given for patient 10038933 on the last icu visit?",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038933 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'multivitamins' AND d_items.linksto = 'inputevents' )",
            "pred_sql": "SELECT sum(inputevents.totalamount) FROM icustays INNER JOIN inputevents ON icustays.stay_id = inputevents.stay_id WHERE inputevents.subject_id = 10038933 AND inputevents.itemid IN (226559, 226560, 226561, 226562, 226584, 226585, 226586, 226587, 226588, 226589, 226590, 226591, 227510, 227511, 227512, 227513, 227514, 227515, 227517, 227518, 227519, 227520, 227521, 227522, 22752",
            "gold_answer": "[['1001.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the logic to identify the patient\u2019s last ICU stay (no filter on icustays.outtime, no ORDER BY icustays.intime DESC LIMIT 1). Instead it simply joins all stays and filters by subject_id, fundamentally changing the query\u2019s meaning. Correcting this requires restructuring the query to select the single latest stay, not just fixing a minor clause.",
            "error_type": "Global: join-logic error"
        },
        "4e1182880796183b551f7b41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last procedure patient 10018081 had since 2100?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT procedures_icd.icd_code FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ORDER BY procedures_icd.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.row_id = procedures_icd.icd_code WHERE procedures_icd.subject_id = 10018081 ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "gold_answer": "[['enteral infusion of concentrated nutritional substances']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has two localized mistakes: it joins d_icd_procedures.row_id to procedures_icd.icd_code (instead of matching on icd_code) and it omits the WHERE clause filtering procedures since 2100. Both errors are confined to specific clauses (JOIN and WHERE) and do not require restructuring the overall query logic or table relationships, so this is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "69100638c56508dbe7fa9a28": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the total number of patients diagnosed with unspecified disease of pericardium who didn't revisit the hospital within the same month this year.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disease of pericardium' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disease of pericardium' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND datetime(T2.charttime,'start of month') = datetime(admissions.admittime,'start of month') )",
            "pred_sql": "SELECT count(icustays.subject_id) FROM icustays INNER JOIN diagnoses_icd ON icustays.hadm_id = diagnoses_icd.hadm_id WHERE diagnoses_icd.icd_code = '25005' AND strftime('%Y-%m', icustays.intime) != strftime('%Y-%m', diagnoses_icd.charttime)",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally departs from the intended logic. The question asks for counting patients diagnosed with \u2018unspecified disease of pericardium\u2019 within this year (using the d_icd_diagnoses lookup) who did not have another hospital admission in the same month of their diagnosis. The ground truth uses admissions and diagnoses_icd, filters by the diagnosis long_title, restricts to this year, and subtracts those with a same-month readmission. The predicted SQL instead: \u2022 Uses icustays rather than admissions to track hospital visits. \u2022 Hard-codes icd_code = '25005' instead of looking up the code by long_title. \u2022 Joins icustays to diagnoses_icd on hadm_id, counting ICU stays not in the same year-month rather than counting patients without a same-month readmission. \u2022 Uses a simple strftime comparison for month inequality, which does not implement the \u201cno revisit in the same month\u201d logic correctly. These errors span multiple clauses (wrong tables, wrong code lookup, wrong joins, and the wrong filtering strategy) and require reworking the entire query structure to align with the question\u2019s intent. This is a Global error.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "307575c4925d593d6397e60f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What time was patient 10005817 prescribed a drug for the first time this month?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005817 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.subject_id = 10005817 AND strftime('%Y-%m', prescriptions.starttime) = '2100-12'",
            "gold_answer": "[['2100-12-20 02:00:00']]",
            "pred_answer": "[['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 03:00:00'], ['2100-12-20 05:00:00'], ['2100-12-20 10:00:00'], ['2100-12-20 11:00:00'], ['2100-12-20 11:00:00'], ['2100-12-20 12:00:00'], ['2100-12-20 17:00:00'], ['2100-12-20 18:00:00'], ['2100-12-20 20:00:00'], ['2100-12-21 08:00:00'], ['2100-12-21 08:00:00'], ['2100-12-21 12:00:00'], ['2100-12-21 13:00:00'], ['2100-12-21 14:00:00'], ['2100-12-21 22:00:00'], ['2100-12-22 08:00:00'], ['2100-12-22 12:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 22:00:00'], ['2100-12-24 03:00:00'], ['2100-12-24 08:00:00'], ['2100-12-24 08:00:00'], ['2100-12-24 09:00:00'], ['2100-12-24 09:00:00'], ['2100-12-24 10:00:00'], ['2100-12-24 11:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 14:00:00'], ['2100-12-24 17:00:00'], ['2100-12-24 17:00:00'], ['2100-12-24 22:00:00'], ['2100-12-25 08:00:00'], ['2100-12-25 08:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-26 03:00:00'], ['2100-12-26 03:00:00'], ['2100-12-26 04:00:00'], ['2100-12-26 05:00:00'], ['2100-12-26 07:00:00'], ['2100-12-26 08:00:00'], ['2100-12-26 10:00:00'], ['2100-12-26 13:00:00'], ['2100-12-26 14:00:00'], ['2100-12-26 16:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters prescriptions for patient 10005817 in the current month but fails to order the results and apply a LIMIT 1 to return only the first prescription time. This is a mistake localized to the SELECT/ORDER BY/LIMIT clauses and does not alter the overall query structure or relationships.",
            "error_type": "Local: missing-order-limit error"
        },
        "0f28b6cf83f9be5d009b09d5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the name of the medication that patient 10031757 received two or more times in 10/this year?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',prescriptions.starttime) = '10' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10031757 AND strftime('%Y', starttime) = '2100' GROUP BY drug HAVING count(drug) > 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['bisacodyl'], ['ciprofloxacin iv'], ['docusate sodium'], ['iso-osmotic dextrose'], ['lactated ringers'], ['lidocaine viscous 2%'], ['methylnaltrexone'], ['metronidazole (flagyl)'], ['ns'], ['ondansetron'], ['pantoprazole'], ['phosphorus'], ['phytonadione'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['trazodone'], ['vancomycin'], ['vial']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['ciprofloxacin iv'], ['docusate sodium'], ['iso-osmotic dextrose'], ['lactated ringers'], ['methylnaltrexone'], ['metronidazole (flagyl)'], ['ns'], ['ondansetron'], ['pantoprazole'], ['potassium chloride (powder)'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['trazodone'], ['vancomycin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to restrict to October (\u201810\u2019) of the current year and misuses the HAVING clause (count > 2 instead of >= 2). These mistakes are confined to the WHERE and HAVING clauses (temporal and threshold filters) and do not require restructuring the JOINs or overall logic.",
            "error_type": "Local: other-temporal error"
        },
        "e9e653553f144c1fef6b60f3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the number of patients diagnosed with anoxic brain damage, not elsewhere classified since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'anoxic brain damage, not elsewhere classified' ) AND strftime('%Y',diagnoses_icd.charttime) >= '2100' )",
            "pred_sql": "SELECT count(DISTINCT patients.subject_id) FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'anoxic brain damage'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the time filter (`strftime('%Y', diagnoses_icd.charttime) >= '2100'`) and also uses the wrong diagnosis title ('anoxic brain damage' instead of 'anoxic brain damage, not elsewhere classified'). These mistakes are confined to the WHERE clause\u2019s filter conditions and do not require restructuring the joins or overall query logic.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "8caac1d5d3cc5f0858bd498e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Today, what was the first value of sao2 for patient 10039831?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT DISTINCT sao2 FROM chartevents WHERE subject_id = 10039831 ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['97.0']]",
            "pred_answer": "no such column: sao2",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent column \u201csao2\u201d instead of using the correct schema (e.g., chartevents.valuenum after filtering by d_items.label). This is a simple column reference error that does not alter the overall structure or logic of the query\u2014it\u2019s a misspelled/incorrect column name. Therefore it is a local error.",
            "error_type": "Local: column reference error"
        },
        "74ba8675c1d922c076631bf5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many times was sertraline medication prescribed since 1 year ago?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'sertraline' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(row_id) FROM prescriptions WHERE drug = 'sertraline' AND starttime > datetime('2100 12 31 23:59:00','start of day',' -1 year')",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the assumptions in the WHERE clause by misformatting the datetime literal ('2100 12 31 23:59:00' instead of '2100-12-31 23:59:00') and by injecting an unnecessary 'start of day' modifier, as well as using '>' instead of '>='. These are localized mistakes in the value parsing and comparison condition that do not alter the overall query structure or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "99494ba824b48fe796158434": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Pull up the ids of patients who were diagnosed with other specified forms of hearing loss in 2100.",
            "gold_sql": "SELECT DISTINCT admissions.subject_id FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified forms of hearing loss' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT patients.subject_id FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id WHERE diagnoses_icd.icd_code IN ( '428.9', '429.3', '430.1', '430.3', '431.0', '431.1', '431.9', '432.0', '432.1', '432.2', '432.3', '432.9', '433.0', '433.1', '433.9', '434.0', '434.1', '434.9', '435.0', '435.1', '435.9', '436.0', '436.1', '436.9', '437.2', '437.3', '437.4', '437.5', '437.6', '437.7",
            "gold_answer": "[['10021666.0']]",
            "pred_answer": "unrecognized token: \"'437.7\"",
            "is_correct": false,
            "reasoning": "The failure occurs at parse time due to a malformed IN\u2010list literal (\u2018437.7 without a closing quote or parenthesis). This is a localized syntax/value parsing mistake in the WHERE clause and does not require restructuring the overall query logic or its joins.",
            "error_type": "Local: value parsing error"
        },
        "3d479874efc2d645f83e1a5e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Give me the details of the time patient 10005348 was admitted to the hospital.",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10005348",
            "pred_sql": "SELECT admissions.admittime, admissions.dischtime FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE icustays.subject_id = 10005348",
            "gold_answer": "[['2100-09-12 08:30:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query introduces an unnecessary INNER JOIN with the icustays table, altering the intended logic. By joining on admissions.hadm_id = icustays.hadm_id and then filtering on icustays.subject_id, the query only returns admissions associated with ICU stays. Since the patient may have no ICU stay, this join filters out the desired record. The error affects the overall structure and logic of the query, not just a single clause, and thus is a global issue.",
            "error_type": "Global: incorrect-join error"
        },
        "d27c81e4091ff4f9207c8f58": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10020187 have an input for the last time on 02/01/this year?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '02-01' ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT inputevents.starttime FROM inputevents INNER JOIN patients ON inputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y %m %d', inputevents.starttime) = '2100 02 01'",
            "gold_answer": "[['2100-02-01 20:39:00']]",
            "pred_answer": "[['2100-02-01 05:15:00'], ['2100-02-01 14:16:00'], ['2100-02-01 16:26:00'], ['2100-02-01 17:09:00'], ['2100-02-01 19:45:00'], ['2100-02-01 19:45:00'], ['2100-02-01 19:45:00'], ['2100-02-01 20:39:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict inputevents to the patient\u2019s ICU stay (it joins directly to patients instead of filtering by icustays via hadm_id and stay_id) and omits the ORDER BY ... DESC LIMIT 1 needed to return only the last event. This is not a simple typo or isolated condition error but a fundamental flaw in table relationships and overall query structure, altering the intended logic of the query.",
            "error_type": "Global: join and structural logic error"
        },
        "582d2a38c554375225e177ea": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the ways to consume sodium chloride?",
            "gold_sql": "SELECT DISTINCT prescriptions.route FROM prescriptions WHERE prescriptions.drug = 'sodium chloride'",
            "pred_sql": "SELECT d_items.label FROM d_items INNER JOIN inputevents ON d_items.itemid = inputevents.itemid WHERE inputevents.value = 'sodium chloride'",
            "gold_answer": "[['po']]",
            "pred_answer": "no such column: inputevents.value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column inputevents.value. The inputevents table does not have a 'value' column, so this is a simple column reference error. This mistake is localized and does not alter the overall query logic or require restructuring across multiple clauses.",
            "error_type": "Local: column reference error"
        },
        "cb3541829199c9ac184de2b0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find out how many patients were admitted to the hospital.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions",
            "pred_sql": "SELECT count(DISTINCT hadm_id) FROM admissions WHERE dischtime IS NULL",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the query\u2019s intent: it counts distinct admissions (hadm_id) rather than distinct patients (subject_id) and additionally filters only current admissions (dischtime IS NULL). This alters the overall logic and structure, producing a result that does not answer the original question. Fixing this requires changing multiple clauses (SELECT and WHERE), not just a minor typo or syntax issue.",
            "error_type": "Global: wrong aggregation and filtering"
        },
        "99050f13d8de8d68cab8de5a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was patient 10004733's last sao2 on 12/24/this year?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '12-24' ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10004733 AND strftime('%Y-%m-%d', chartevents.charttime) = '2012-12-24' AND chartevents.itemid = 5080",
            "gold_answer": "[['96.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s core mistake is a bad date filter: it hard-codes strftime('%Y-%m-%d',chartevents.charttime) = '2012-12-24' rather than matching month-day against \u2018\u201812-24\u2019\u2019 in the current \u2018\u20182100\u2019\u2019 year as per the assumptions. This is a localized value\u2010parsing/temporal error within the WHERE clause; the rest of the query structure remains intact. Therefore this is a Local error.",
            "error_type": "Local: other-temporal error"
        },
        "3256b9f34e2964c1987aa36e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the 3-month survival rate for chronic kidney disease, stage 3 (moderate) patients who were prescribed tobramycin sulfate after the diagnosis?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T3.charttime) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage 3 (moderate)' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'tobramycin sulfate' GROUP BY admissions.subject_id HAVING MAX(prescriptions.starttime) = prescriptions.starttime ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime ) AS T3 JOIN patients ON T3.subject_id = patients.subject_id )",
            "pred_sql": "SELECT cast(sum(CASE WHEN patients.dod IS NULL THEN 1 ELSE 0 END) AS REAL) / count(patients.subject_id) FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.dod IS NULL AND prescriptions.drug = 'tobramycin sulfate' AND patients.subject_id IN ( SELECT subject_id FROM diagnoses_icd WHERE icd9_code LIKE '585.3%' OR icd9_code LIKE '585.4%' OR icd9_code LIKE '585.5%' )",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: icd9_code",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non\u2010existent column icd9_code in the diagnoses_icd table (the correct column is icd_code). This is a localized column reference error and does not require restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "c6bdaeca60ccfe1176848cdb": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "On the current icu visit, what was the maximum value of the systolic blood pressure for patient 10039831?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' )",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE icustays.subject_id = 10039831 AND chartevents.itemid IN (211, 220045)",
            "gold_answer": "[['125.0']]",
            "pred_answer": "[['107.0']]",
            "is_correct": false,
            "reasoning": "The predicted query joins chartevents to icustays on stay_id and filters by icustays.subject_id, but it never restricts to the current ICU stay (missing icustays.outtime IS NULL). It also uses hard-coded itemids (211, 220045) instead of looking up the single correct itemid for 'arterial blood pressure systolic' in d_items. These mistakes alter the overall logic and data scope of the query, producing an incorrect result.",
            "error_type": "Global: flawed join and filtering logic"
        },
        "3761294f8cbb1cb4f643e89c": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients were given senna within the same hospital visit after receiving a extraction of other tooth procedure?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'senna' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT count(procedures_icd.subject_id) FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.icd_code = procedures_icd.icd_code WHERE d_icd_procedures.long_title = 'extraction of other tooth' AND d_icd_procedures.icd_code = 'icd9|4134'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only counts the number of 'extraction of other tooth' procedures, completely ignoring the requirement to find patients who were given senna afterward in the same hospital visit. It does not join the prescriptions table, lacks the temporal condition (procedure before prescription), and does not restrict to the same hadm_id or use a DISTINCT count on subject_id. These omissions fundamentally break the query\u2019s logic rather than fixable within a single clause.",
            "error_type": "Global: incorrect join and missing temporal logic"
        },
        "73393e923fe780f8a485bb41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Provide the id list of patients who were diagnosed with multiple sclerosis in 2100.",
            "gold_sql": "SELECT DISTINCT admissions.subject_id FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'multiple sclerosis' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT diagnoses_icd.subject_id FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'multiple sclerosis' AND diagnoses_icd.charttime LIKE '2100%' GROUP BY diagnoses_icd.subject_id HAVING count(diagnoses_icd.subject_id) > 1",
            "gold_answer": "[['10001217.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a GROUP BY and HAVING clause to filter patients with more than one 'multiple sclerosis' diagnosis in 2100, whereas the question only asks for patients diagnosed at least once that year. This misuse of aggregation/HAVING is confined to a specific clause and can be corrected by removing or altering that clause without restructuring the whole query. Therefore, it is a Local error.",
            "error_type": "Local: incorrect-aggregation error"
        },
        "8e6c18649967bdf6b995b323": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How frequently were extraction of other tooth procedures performed in 2100?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) AND strftime('%Y',procedures_icd.charttime) = '2100'",
            "pred_sql": "SELECT count(procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Extraction of other tooth' AND strftime('%Y', procedures_icd.charttime) = '2100'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the two queries is the casing of the procedure name literal: the ground truth uses 'extraction of other tooth' (all lowercase) while the predicted SQL uses 'Extraction of other tooth' (capital 'E'). In SQLite, string equality is binary\u2010sensitive by default, so the join predicate fails and returns zero rows. All other parts of the query (JOIN logic, COUNT usage, year filter) are correct. This is a localized issue with the string literal matching, not the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "f1d174e3c9a7f5b7db8c6d51": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the top five most frequent medications prescribed within 2 months to the patients in their 60 or above after they are diagnosed with nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.age >= 60 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT prescriptions.drug FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE admissions.admittime < datetime('now', 'start of month') AND admissions.admittime > datetime('now', '-2 months') AND patients.dod IS NOT NULL AND patients.dod > datetime('now', '-2 months') AND patients.gender = 'f' AND patients.age >= 60 AND admissions.admission_type = 'emergency' AND admissions.discharge_location = 'Nephrology' AND admissions.diagnosis = 'nephritis AND nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney' LIMIT 5",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['albuterol 0.083% neb soln'], ['albuterol inhaler'], ['bisacodyl'], ['docusate sodium'], ['donepezil'], ['ezetimibe'], ['fleet enema'], ['fluticasone-salmeterol diskus (100/50)'], ['glucagon'], ['heparin'], ['insulin'], ['levetiracetam'], ['lidocaine jelly 2% (urojet)'], ['metoprolol tartrate'], ['mirtazapine'], ['multivitamin iv'], ['neutra-phos'], ['nimodipine'], ['ondansetron'], ['polyethylene glycol'], ['potassium chl 20 meq / 1000 ml ns'], ['potassium chloride (powder)'], ['pravastatin'], ['readi-cat 2 (barium sulfate 2% suspension)'], ['senna'], ['sertraline'], ['sodium chloride 0.9%  flush'], ['torsemide']]",
            "pred_answer": "no such column: patients.age",
            "is_correct": false,
            "reasoning": "The predicted SQL immediately fails with \u201cno such column: patients.age,\u201d which indicates a column reference error (patients.age doesn\u2019t exist in the patients table). This is a localized issue within the WHERE clause and does not require restructuring the overall query logic\u2014just fixing the column reference. Therefore, it is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "fb64b71e8c133b3f07389d03": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many days has it been since patient 10021487 got their first prescription of sodium polystyrene sulfonate on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'sodium polystyrene sulfonate' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT julianday(admissions.dischtime) - julianday(admissions.admittime) AS days_since_sodium FROM admissions INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE prescriptions.drug = 'sodium polystyrene sulfonate' AND admissions.subject_id = 10021487",
            "gold_answer": "[['24.416']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to (1) limit to the current hospital visit (no filter admissions.dischtime IS NULL), (2) compute the days since the first prescription (it instead computes dischtime\u2013admittime), and (3) reference the prescription start time and the fixed current date. These are not minor clause fixes but rather a fundamentally incorrect join/filter strategy and date calculation. This error spans multiple clauses and alters the intended logic, so it is a global error.",
            "error_type": "Global: structural logic error"
        },
        "95ef5e98f26a8f7f39540db8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much does the cholesterol ratio (total/hdl) lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'labevents' AND cost.event_id IN ( SELECT labevents.row_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'cholesterol ratio (total/hdl)' ) )",
            "pred_sql": "SELECT cost.cost FROM d_items INNER JOIN labevents ON d_items.itemid = labevents.itemid INNER JOIN cost ON cost.event_id = labevents.row_id WHERE d_items.label = 'cholesterol ratio (total/hdl)'",
            "gold_answer": "[['8.84']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query joins d_items to labevents and then to cost, but cholesterol ratio is a lab test whose labels are stored in d_labitems, not d_items. This is a fundamental join error that breaks the intended table relationships, causing no rows to be returned. Fixing it requires changing the join strategy and table references, not just a minor tweak in a clause.",
            "error_type": "Global: incorrect join"
        },
        "ce4841cfeae7a3eabc232b3f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last height reading for patient 10021487 this month?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'height (cm)' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.valuenum FROM patients INNER JOIN chartevents ON patients.subject_id = chartevents.subject_id WHERE patients.subject_id = 10021487 AND patients.gender = 'm' AND chartevents.itemid = 220277 AND strftime('%Y-%m', chartevents.charttime) = '2017-03' AND chartevents.valuenum > 0 ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['185.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic and structure: it joins patients to chartevents on subject_id rather than filtering by ICU stays and admission IDs, hard-codes an incorrect itemid instead of looking up 'height (cm)' in d_items, filters by an unrelated gender condition, and uses a fixed 2017-03 month instead of the current month rule. These mistakes span multiple clauses and require restructuring the query\u2019s joins, filters, and date handling, making it a Global error.",
            "error_type": "Global: flawed join strategy"
        },
        "de36ead34dfb7547293729b8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find the number of current patients in the 40s age group.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age BETWEEN 40 AND 49",
            "pred_sql": "SELECT count(subject_id) FROM patients WHERE strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) >= 40 AND strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) <= 49",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: admissions.admittime",
            "is_correct": false,
            "reasoning": "The predicted query simply selects from patients and tries to reference admissions.admittime without including the admissions table or a JOIN. This is a column/table reference mistake localized to the FROM clause and column usage, not a fundamental restructuring or logical flaw across multiple clauses.",
            "error_type": "Local: table/column reference error"
        },
        "c0bd2660fb0bdea2e7a08d76": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last time that patient 10026255 has been prescribed medication via the po/ng route since 02/2100?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026255 ) AND prescriptions.route = 'po/ng' AND strftime('%Y-%m',prescriptions.starttime) >= '2100-02' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN transfers ON prescriptions.subject_id = transfers.subject_id WHERE prescriptions.route = 'po/ng' AND prescriptions.subject_id = 10026255 AND transfers.intime LIKE '2100-02%' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "gold_answer": "[['2100-10-20 20:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query logic by joining prescriptions to transfers on subject_id, rather than restricting prescriptions by the patient's hospital admission (hadm_id) and filtering prescriptions.starttime by month. It also uses transfers.intime LIKE '2100-02%' instead of strftime on prescriptions.starttime to capture all dates since February 2100. This incorrect join and date filtering changes the intended relationships and cannot be fixed by a minor, localized edit; it requires restructuring the query logic.",
            "error_type": "Global: join error"
        },
        "fa62adda98cb0b27d5a3c37b": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Get me patient 10020187's total output volume on 02/04/2100.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-02-04'",
            "pred_sql": "SELECT sum(outputevents.amount) FROM patients INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y-%m-%d', outputevents.charttime) = '2021-04-02'",
            "gold_answer": "[['2275.0']]",
            "pred_answer": "no such column: outputevents.amount",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a non\u2010existent column outputevents.amount instead of outputevents.value, causing a column reference error. It also incorrectly filters the date as '2021-04-02' instead of '2100-02-04', but the primary failure is the wrong column name. These are confined to specific parts (SELECT and WHERE) and do not alter the overall query structure or table relationships.",
            "error_type": "Local: table/column reference error"
        },
        "ddda565150df1f4e6bed69dd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10021118 get his/her first prescription of atorvastatin this month?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021118 ) AND prescriptions.drug = 'atorvastatin' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN patients ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10021118 AND prescriptions.drug = 'atorvastatin' AND strftime('%Y-%m', prescriptions.starttime) = '2100-11' LIMIT 1",
            "gold_answer": "[['2100-12-01 20:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s main logical error is in the WHERE clause time filter: it uses strftime('%Y-%m') = '2100-11', which filters for November instead of December (\u201cthis month\u201d). This is a misinterpretation of the \u2018this month\u2019 requirement. The rest of the query structure (JOIN, subject_id filter, drug filter, LIMIT) is intact, so fixing the month literal or time expression corrects the query without altering its overall structure or joins. This is therefore a localized error in the temporal filter.",
            "error_type": "Local: temporal filter error"
        },
        "95ce217cd523e13855314bb7": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many prescriptions for miconazole 2% cream were made since 1 year ago?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'miconazole 2% cream' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN ( SELECT prescriptions.subject_id FROM prescriptions WHERE drug = 'miconazole 2% cream' AND strftime('%Y', prescriptions.starttime) < strftime('%Y', 'now', '-1 year') GROUP BY prescriptions.subject_id ) AS t2 ON prescriptions.subject_id = t2.subject_id",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restructures the query entirely by using an INNER JOIN over a subquery of subject_ids instead of directly filtering prescriptions by starttime. It also misuses strftime('%Y') comparisons with 'now' rather than the fixed current time and the \u2018-1 year\u2019 offset. This is not a simple typo or missing table reference but a fundamental change in how filtering is applied (using a join with an incorrect subquery) and will require rewriting the query logic, so it is a global error.",
            "error_type": "Global: incorrect join logic"
        },
        "2ed3308d7d44c7cbc0bd7cdd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of patients that were admitted to the hospital since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE strftime('%Y',admissions.admittime) >= '2100'",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE admittime >= '2100-01-01 00:00:00'",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['119.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters by admittime >= '2100-01-01' (equivalent to since 2100), but it uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id). This only overcounts multiple admissions per patient rather than the number of unique patients. This is a localized error in the SELECT clause (missing DISTINCT) and does not require restructuring the query or altering table relationships.",
            "error_type": "Local: missing DISTINCT"
        },
        "13192d30de747f4a5bf8cd8a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the input that patient 10022017 had first had?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT inputevents.itemid FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) ) ORDER BY inputevents.starttime ASC LIMIT 1 )",
            "pred_sql": "SELECT d_items.label FROM d_items INNER JOIN inputevents ON d_items.itemid = inputevents.itemid WHERE inputevents.subject_id = 10022017 ORDER BY inputevents.charttime ASC LIMIT 1",
            "gold_answer": "[['or crystalloid intake']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted query fails because it orders by inputevents.charttime, but the inputevents table has no charttime column (it uses starttime). This is a simple column reference mistake within the ORDER BY clause and does not alter the overall query structure or logic. Thus, it is a local error.",
            "error_type": "Local: table/column reference error"
        },
        "df8e1a5f567fb53013c261d0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time on 04/03/this year that patient 10020740 had his/her sao2 checked?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '04-03' ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = '10020740' AND itemid = 220277 AND strftime('%Y-%m-%d', charttime) = '2100-04-03'",
            "gold_answer": "[['2100-04-03 00:00:00']]",
            "pred_answer": "[['2100-04-03 00:00:00'], ['2100-04-03 01:00:00'], ['2100-04-03 02:00:00'], ['2100-04-03 03:00:00'], ['2100-04-03 04:00:00'], ['2100-04-03 05:00:00'], ['2100-04-03 06:00:00'], ['2100-04-03 07:00:00'], ['2100-04-03 08:00:00'], ['2100-04-03 09:00:00'], ['2100-04-03 10:00:00'], ['2100-04-03 11:00:00'], ['2100-04-03 12:00:00'], ['2100-04-03 13:00:00'], ['2100-04-03 14:00:00'], ['2100-04-03 15:00:00'], ['2100-04-03 16:00:00'], ['2100-04-03 17:00:00'], ['2100-04-03 18:00:00'], ['2100-04-03 19:00:00'], ['2100-04-03 20:00:00'], ['2100-04-03 21:00:00'], ['2100-04-03 22:00:00'], ['2100-04-03 23:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL drops the required joins through admissions and ICU stays and omits the lookup of the correct itemid via d_items. Instead it filters only on subject_id and a hard-coded itemid, returning all that patient's sao2 measurements on that date rather than the first ICU measurement. This mistake alters the overall logic and table relationships, not just a localized clause.",
            "error_type": "Global: join error"
        },
        "c6babda1b9f713c49ac00e19": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What drug was patient 10004720 prescribed with after the diagnosis of palmar fascial fibromatosis [dupuytren] within 2 days in 12/2100?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT diagnoses_icd.subject_id, diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'palmar fascial fibromatosis [dupuytren]' ) AND strftime('%Y-%m',diagnoses_icd.charttime) = '2100-12' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND strftime('%Y-%m',prescriptions.starttime) = '2100-12' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')",
            "pred_sql": "SELECT prescriptions.drug FROM diagnoses_icd INNER JOIN patients ON diagnoses_icd.subject_id = patients.subject_id INNER JOIN prescriptions ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10004720 AND diagnoses_icd.icd_code = 'icd9|41401' AND diagnoses_icd.charttime BETWEEN datetime('2100-12-20') AND datetime('2100-12-22')",
            "gold_answer": "[['0.9% sodium chloride'], ['0.9% sodium chloride'], ['0.9% sodium chloride (mini bag plus)'], ['0.9% sodium chloride (mini bag plus)'], ['0.9% sodium chloride (mini bag plus)'], ['5% dextrose'], ['5% dextrose'], ['artificial tear ointment'], ['bag'], ['bag'], ['bag'], ['chlorhexidine gluconate 0.12% oral rinse'], ['citalopram'], ['divalproex (delayed release)'], ['famotidine'], ['glucagon'], ['glucose gel'], ['insulin'], ['iso-osmotic dextrose'], ['iso-osmotic dextrose'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['midazolam'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin'], ['vancomycin']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic by: 1) Using a hard-coded incorrect ICD code ('icd9|41401') instead of looking up the code for 'palmar fascial fibromatosis [dupuytren]' in d_icd_diagnoses; 2) Failing to join prescriptions via hadm_id or restrict prescriptions to December 2100; 3) Omitting the 2-day window clause entirely and only filtering diagnoses dates; 4) Joining tables solely on subject_id, ignoring admissions context. These changes affect the overall structure and semantics of the query, not just a single clause.",
            "error_type": "Global: join-logic mismatch error"
        },
        "94d58c7def5ec9cdcb2b0467": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the icu visits of patient 10007058 since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT icustays.stay_id ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 ) AND strftime('%Y',icustays.intime) >= '2100'",
            "pred_sql": "SELECT count(icustays.hadm_id) FROM icustays INNER JOIN transfers ON icustays.hadm_id = transfers.hadm_id WHERE transfers.subject_id = 10007058 AND transfers.intime BETWEEN '2100-01-01 00:00:00' AND '2100-12-31 23:59:59'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining icustays to transfers rather than filtering icustays by patient admissions. It also counts hadm_id instead of distinct stay_id and applies the time filter to transfers.intime rather than icustays.intime. These changes span multiple clauses (FROM, JOIN, WHERE) and require a structural rewrite to use the correct tables and filters, so this is not a localized syntax mistake but a flaw in the overall query construction.",
            "error_type": "Global: join error"
        },
        "a60f116185911329f2b6d8b8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the change in creatinine in patient 10007058 last measured on the last hospital visit compared to the first value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatinine' ) ORDER BY labevents.charttime DESC LIMIT 1 ) - ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatinine' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT CASE WHEN max(chartevents.valuenum) - min(chartevents.valuenum) IS NULL THEN 0 ELSE max(chartevents.valuenum) - min(chartevents.valuenum) END FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10007058 AND chartevents.itemid = 50961 AND patients.dod IS NOT NULL AND chartevents.valuenum IS NOT NULL",
            "gold_answer": "[['0.1']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by using the chartevents table instead of labevents for lab values, failing to restrict to the patient\u2019s last hospital admission, and adding an irrelevant death filter (patients.dod IS NOT NULL). These issues span multiple clauses (FROM, JOIN, WHERE, aggregation) and alter the intended meaning of comparing first and last creatinine lab measurements within the last admission, making the error structural and global.",
            "error_type": "Global: join error"
        },
        "447895a7f06300c849dd8e20": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many times did patient 10039997 have other computer assisted surgery this year?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other computer assisted surgery' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT count(procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE procedures_icd.subject_id = 10039997 AND d_icd_procedures.long_title LIKE '%other computer-assisted surgery%' AND strftime('%Y', procedures_icd.charttime) = '2100'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses a LIKE pattern with a hyphen ('other computer-assisted surgery') that does not match the actual long_title ('other computer assisted surgery'). This is a localized mistake in the WHERE clause\u2019s string comparison and can be fixed by correcting the literal. The overall query structure (joins, filters by year and patient) remains appropriate, so the error is local.",
            "error_type": "Local: value parsing error"
        },
        "bda21105f3950d0b64ef875d": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the average overall hospital cost that includes cyanocobalamin this year?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'cyanocobalamin' ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) / count(DISTINCT admissions.hadm_id) FROM cost INNER JOIN prescriptions ON cost.event_id = prescriptions.row_id WHERE strftime('%Y', prescriptions.starttime) = '2100' AND prescriptions.drug = 'cyanocobalamin'",
            "gold_answer": "[['1605.733']]",
            "pred_answer": "no such column: admissions.hadm_id",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error arises from referencing admissions.hadm_id in the COUNT(DISTINCT ...) clause even though the admissions table is never included in the FROM or JOIN clauses. This is a localized column/table reference mistake and can be corrected by adding the appropriate JOIN to admissions without restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "d7ab6fd0f27ec7c921da3de4": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me patient 10021487's minimum magnesium, urine value since 12 months ago?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'magnesium, urine' ) AND datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-12 month')",
            "pred_sql": "SELECT itemid, value FROM outputevents WHERE subject_id = 10021487 AND itemid IN (225168, 51301) AND charttime > datetime('now', '-12 month') GROUP BY itemid, value ORDER BY value LIMIT 1",
            "gold_answer": "[['13.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the outputevents table instead of the labevents table, selects incorrect itemids, uses the wrong columns (value vs valuenum), filters by subject_id rather than hadm_id via admissions, and employs 'now' instead of the fixed datetime. These errors span multiple clauses and fundamentally alter the intended data source and logic, requiring a full restructuring of the query.",
            "error_type": "Global: wrong table/logic"
        },
        "52fcf1b4e0958500d0e297af": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10004720 have their last chest tube #1 output since 174 days ago?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'chest tube #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime) >= datetime('2100-12-31 23:59:00','-174 day') ORDER BY outputevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT outputevents.charttime FROM chartevents INNER JOIN outputevents ON chartevents.row_id = outputevents.row_id WHERE outputevents.subject_id = 10004720 AND chartevents.itemid = 225667 AND outputevents.itemid = 225667 AND outputevents.charttime >= ( SELECT max(charttime) - 174 FROM outputevents WHERE subject_id = 10004720 AND itemid = 225667 )",
            "gold_answer": "[['2100-12-08 18:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an INNER JOIN between chartevents and outputevents on row_id, which is not a valid relationship and fundamentally changes the query\u2019s logic. It also misuses chartevents rather than outputevents for the chest tube output, applies an incorrect subquery for the date filter (subtracting 174 from a timestamp), and fails to follow the schema\u2019s correct linking via d_items. These issues span multiple clauses and require restructuring the overall query (fixing the join, using the correct table, and properly computing the date filter), so this is not a localized typo but a structural, logical error.",
            "error_type": "Global: join error"
        },
        "be7acaa3733f17d7c078dd3c": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the total dosage of iso-osmotic sodium chloride that patient 10027602 was prescribed last month?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND prescriptions.drug = 'iso-osmotic sodium chloride' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT sum(drug) FROM prescriptions WHERE subject_id = 10027602 AND starttime LIKE '2100-11-%'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['12.06']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly sums the drug column (sum(drug)) instead of the numeric dosage column dose_val_rx, and it also omits the filter for drug = 'iso-osmotic sodium chloride' and misuses subject_id vs hadm_id and starttime filtering. These mistakes are confined to column references and WHERE-clause filters and do not require restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "c85da658022ccba3515bc2f8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of times that patient 10029291 received the performance of urinary filtration, multiple procedure on the last hospital visit.",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'performance of urinary filtration, multiple' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 )",
            "pred_sql": "SELECT count(DISTINCT procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title LIKE '%urinary filtration%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to patient 10029291 and the last hospital visit, uses a LIKE filter instead of an exact title match, and counts distinct hadm_id instead of counting each procedure occurrence. These omissions and changes alter the overall query logic and intended scope, not just a localized clause, requiring restructuring of filters and aggregation.",
            "error_type": "Global: incorrect filtering and aggregation"
        },
        "e3cba1c79b43587fbfdbcdfc": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first time that patient 10003400 had the maximum amount of chloride in 06/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003400 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '06' ORDER BY labevents.valuenum DESC, labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT min(charttime) FROM chartevents WHERE itemid = 510 AND subject_id = 10003400 AND strftime('%m', charttime) = '06' AND value = ( SELECT max(value) FROM chartevents WHERE itemid = 509 AND subject_id = 10003400 AND strftime('%m', charttime) = '06' )",
            "gold_answer": "[['2100-06-15 05:34:00']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'value' in the chartevents table, causing the execution error 'no such column: value'. This is a localized column reference error and does not require restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "8010b9081130f60c6d4a482e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the cost of a drug known as octreotide acetate?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'prescriptions' AND cost.event_id IN ( SELECT prescriptions.row_id FROM prescriptions WHERE prescriptions.drug = 'octreotide acetate' )",
            "pred_sql": "SELECT cost.cost FROM cost INNER JOIN prescriptions ON cost.row_id = prescriptions.row_id WHERE prescriptions.drug = 'octreotide acetate'",
            "gold_answer": "[['12.44']]",
            "pred_answer": "[['10.06'], ['10.09'], ['11.03'], ['11.16'], ['11.55'], ['8.6'], ['9.32']]",
            "is_correct": false,
            "reasoning": "The predicted query joins cost.row_id to prescriptions.row_id and omits the event_type filter, whereas the correct logic joins cost.event_id to prescriptions.row_id and restricts cost.event_type = 'prescriptions'. This wrong join key and missing filter fundamentally alters the intended mapping between cost entries and prescription events and requires restructuring the query logic rather than a simple local fix.",
            "error_type": "Global: incorrect join"
        },
        "76ee482fdcd9d445253719e5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the difference between the total input and output volume for patient 10005909 on last month/11.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '11' ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '11' )",
            "pred_sql": "SELECT sum(inputevents.totalamount) - sum(outputevents.totalamount) FROM patients INNER JOIN inputevents ON patients.subject_id = inputevents.subject_id INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10005909 AND strftime('%Y %m',inputevents.starttime) = '2100 11' AND strftime('%Y %m',outputevents.charttime) = '2100 11'",
            "gold_answer": "[['3210.0']]",
            "pred_answer": "no such column: outputevents.totalamount",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column outputevents.totalamount (the correct column for output volume is outputevents.value). This is a simple column reference error and does not require restructuring the query's logic or joins\u2014just fixing the column name.",
            "error_type": "Local: table/column reference error"
        },
        "ba1809d4eafa379093b710bd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours has it been since patient 10021487 stayed in the careunit emergency department on the current hospital visit for the first time?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', transfers.intime) ) FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND transfers.careunit = 'emergency department' ORDER BY transfers.intime ASC LIMIT 1",
            "pred_sql": "SELECT strftime('%J', transfers.outtime) - strftime('%J', transfers.intime) AS days FROM transfers INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE transfers.careunit = 'emergency' AND admissions.subject_id = 10021487",
            "gold_answer": "[['601.033']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails to restrict to the current hospital admission (missing admissions.dischtime IS NULL), uses the wrong careunit literal ('emergency' instead of 'emergency department'), computes the difference using transfers.outtime instead of the fixed current time, and omits converting days to hours and ordering/limiting to the first occurrence. These issues span multiple clauses (SELECT expression, JOIN/WHERE filters, and overall query structure) and require restructuring the logic, so this is not a minor typo but a fundamental query logic error.",
            "error_type": "Global: incorrect filter and time calculation logic"
        },
        "ea7da99dbb2c6595e989664a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have passed since the last time patient 10018081 received a lab test of sodium on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', labevents.charttime) ) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT round((strftime('%J', labevents.charttime) - strftime('%J', transfers.intime)) * 24) AS hours_since FROM labevents INNER JOIN transfers ON transfers.hadm_id = labevents.hadm_id WHERE labevents.itemid = 50285 AND labevents.subject_id = 10018081",
            "gold_answer": "[['1.867']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses transfers.intime as the reference time instead of the fixed current time ('2100-12-31 23:59:00') and fails to restrict to the current hospital admission (dischtime IS NULL). It also omits ordering/limiting to the last sodium test and misuses the transfers table rather than admissions. These mistakes fundamentally alter the logic and data relationships, requiring restructuring of multiple clauses (JOIN, WHERE, SELECT) to correct, so this is not a localized syntax fix but a core logical error.",
            "error_type": "Global: incorrect join and reference logic"
        },
        "0d1eedad4af1daa77620607a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the price of a diagnosis for nontoxic uninodular goiter?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nontoxic uninodular goiter' ) )",
            "pred_sql": "SELECT DISTINCT cost.cost FROM diagnoses_icd INNER JOIN cost ON diagnoses_icd.row_id = cost.event_id INNER JOIN d_icd_diagnoses ON cost.diagnosis_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'Nontoxic uninodular goiter'",
            "gold_answer": "[['6.17']]",
            "pred_answer": "no such column: cost.diagnosis_code",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column cost.diagnosis_code, which violates the schema. The overall query structure and logic (joining cost to diagnoses and filtering by long_title) remains intact, but the incorrect column name is a localized mistake.",
            "error_type": "Local: table/column reference error"
        },
        "2b067714ed119dcbd0f6f71a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the procedure that was given two or more times to patient 10020740?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, COUNT( procedures_icd.charttime ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 >= 2 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE procedures_icd.subject_id = 10020740 GROUP BY procedures_icd.icd_code HAVING count(procedures_icd.icd_code) > 2",
            "gold_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['insertion of endotracheal tube'], ['other electroshock therapy']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['other electroshock therapy']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s logic error is confined to the HAVING clause: it filters for count > 2 (i.e., three or more occurrences) instead of count >= 2 (two or more). This is a localized mistake in the threshold condition and does not require restructuring the query or altering table joins. It can be corrected by changing >2 to >=2.",
            "error_type": "Local: having-count-threshold error"
        },
        "aafda5799cef4cdbb1a8c0e0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the count of current patients?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is null",
            "pred_sql": "SELECT count(subject_id) FROM patients WHERE dod IS NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['81.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts patients with a NULL dod (alive patients) from the patients table, whereas the question asks for current patients (ongoing admissions) defined by admissions.dischtime IS NULL. The predicted SQL uses the wrong table and criterion, fundamentally altering the query\u2019s logic and intended meaning. This requires changing the query structure and table references, not just a minor fix.",
            "error_type": "Global: incorrect table and logic"
        },
        "35c0173f97865f92a3780f49": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Which are the top four most frequent procedures patients had within the same hospital visit after being diagnosed with pain in left shoulder?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'pain in left shoulder' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM chartevents INNER JOIN procedures_icd ON chartevents.row_id = procedures_icd.row_id INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Left shoulder pain' GROUP BY d_icd_procedures.long_title ORDER BY count(d_icd_procedures.long_title) DESC LIMIT 4",
            "gold_answer": "[['performance of urinary filtration, multiple']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely misrepresents the intended logic by joining chartevents to procedures_icd on row_id (an unrelated relationship), filtering procedures by a diagnosis title rather than finding procedures after a diagnosis in the same visit, and omitting the temporal and admission-based joins and ranking logic. This is not a minor syntax or column reference mistake but a fundamental restructuring of the query that alters its meaning and yields no results.",
            "error_type": "Global: incorrect join relationship"
        },
        "b1bf14bf9333394d698cdcb5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last sodium test that patient 10037928 received since 11/2100?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10037928 ) AND strftime('%Y-%m',labevents.charttime) >= '2100-11' ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE itemid IN (225464, 220739) AND subject_id = 10037928 AND strftime('%Y %m', charttime) = '2100 11' ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['2100-11-12 23:49:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query pulls from chartevents instead of labevents (the sodium test is a lab event), and it misuses strftime formatting and comparison. The core mistake is using the wrong table reference, a localized error that can be fixed by correcting the table name (and minor formatting), without altering the overall query structure.",
            "error_type": "Local: table reference error"
        },
        "5e11ebb43f301cecc472e7c7": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10036156's last visit to the hospital occur?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10036156 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admissions.dischtime FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10036156 ORDER BY admissions.dischtime DESC LIMIT 1",
            "gold_answer": "[['2100-07-15 04:52:00']]",
            "pred_answer": "[['2100-07-17 15:08:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only selects the wrong column (discharge time instead of admission time) but also introduces an unnecessary INNER JOIN to icustays, thereby changing the scope of the query to ICU stays rather than all hospital visits. This is a structural change affecting overall query logic and the relationships between tables, rather than a simple local typo or syntax issue.",
            "error_type": "Global: incorrect join and column selection"
        },
        "6ee50f63ab0ecd43a03d1c47": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the daily average input of nitroglycerin for patient 10019172 last month?",
            "gold_sql": "SELECT AVG(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019172 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'nitroglycerin' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') GROUP BY strftime('%Y-%m-%d',inputevents.starttime)",
            "pred_sql": "SELECT sum(inputevents.amount) FROM patients INNER JOIN inputevents ON patients.subject_id = inputevents.subject_id WHERE patients.subject_id = 10019172 AND inputevents.amount > 0 AND strftime('%Y-%m', inputevents.starttime) = '2015-03' AND inputevents.itemid = 62248",
            "gold_answer": "[['250.0'], ['250.0']]",
            "pred_answer": "no such column: inputevents.amount",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to a column reference error: it uses inputevents.amount, which does not exist in the schema (the correct column is totalamount). This is a localized mistake within the SELECT clause and does not impact the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "8d4345c0ed9926353a1fe59a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients had introduction of nutritional substance into central vein, percutaneous approach done in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of nutritional substance into central vein, percutaneous approach' ) AND strftime('%Y',procedures_icd.charttime) = '2100' )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM procedures_icd WHERE icd_code LIKE '456.3%' AND charttime LIKE '2100%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only queries the procedures_icd table with a LIKE filter on icd_code and a prefix match on charttime, rather than joining to d_icd_procedures to map the long_title to the exact code and using strftime to extract the year. These mistakes are confined to the WHERE clause (incorrect filter conditions) and do not require restructuring joins or the overall query logic.",
            "error_type": "Local: incorrect WHERE clause"
        },
        "c3eb5302bcf43b5a5d43efcf": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last dose of glucagon since 05/2100, prescribed to patient 10020306?",
            "gold_sql": "SELECT prescriptions.dose_val_rx FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020306 ) AND prescriptions.drug = 'glucagon' AND strftime('%Y-%m',prescriptions.starttime) >= '2100-05' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT dose_val_rx FROM prescriptions WHERE subject_id = 10020306 AND starttime > '2100-05-21 00:00:00' ORDER BY starttime DESC LIMIT 1",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['75.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the drug filter (prescriptions.drug = 'glucagon') and misinterprets the \u201csince 05/2100\u201d condition by using a fixed date of '2100-05-21 00:00:00' instead of filtering by month. These mistakes are confined to the WHERE clause and do not require changing the overall query structure or JOIN logic.",
            "error_type": "Local: where-clause error"
        },
        "2fe27400810c706671948c64": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the microbiology test given to patient 10004720 for the last time since 12/2100?",
            "gold_sql": "SELECT microbiologyevents.test_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-12' ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE strftime('%Y %m', charttime) < '2100 12' AND subject_id = 10004720 ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['gram stain']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s mistake lies entirely in the WHERE clause\u2019s date filtering: it uses strftime('%Y %m') with a space instead of '%Y-%m', applies the wrong comparison operator (< instead of >=), and compares against an incorrectly formatted literal ('2100 12' rather than '2100-12'). The rest of the query structure (SELECT, ORDER BY, LIMIT) remains intact, so this is a localized temporal\u2010filter error that does not require restructuring the overall query logic.",
            "error_type": "Local: other-temporal error"
        },
        "c58a5ec41c71bc32e692d0b7": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What would be the monthly average weight of patient 10012552 in 2100?",
            "gold_sql": "SELECT AVG(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10012552 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND strftime('%Y',chartevents.charttime) = '2100' GROUP BY strftime('%Y-%m',chartevents.charttime)",
            "pred_sql": "SELECT avg(patients.weight) FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id WHERE patients.subject_id = 10012552 AND admissions.admittime LIKE '2100%'",
            "gold_answer": "[['83.2']]",
            "pred_answer": "no such column: patients.weight",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column patients.weight, causing a column reference error. This is a localized mistake that does not affect the overall query structure or logic\u2014only the column name is wrong.",
            "error_type": "Local: table/column reference error"
        },
        "75072ceb723a2148aae97311": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much does quinidine gluconate e.r. cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'prescriptions' AND cost.event_id IN ( SELECT prescriptions.row_id FROM prescriptions WHERE prescriptions.drug = 'quinidine gluconate e.r.' )",
            "pred_sql": "SELECT cost.cost FROM prescriptions INNER JOIN cost ON prescriptions.row_id = cost.event_id WHERE prescriptions.drug = 'quinidine gluconate e.r.'",
            "gold_answer": "[['11.42']]",
            "pred_answer": "[['11.42'], ['8.95']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins prescriptions and cost correctly but omits the filter cost.event_type = 'prescriptions' and also fails to use DISTINCT, causing extra rows to appear. This is a localized mistake in the WHERE clause rather than a structural or multi-clause logical issue.",
            "error_type": "Local: missing event_type filter"
        },
        "baeb70e8d9a9783d1d063250": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time that patient 10021312 had the minimum value of heart rate?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.valuenum ASC, chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT min(charttime) FROM chartevents WHERE subject_id = 10021312 AND itemid IN (112, 402, 111, 128)",
            "gold_answer": "[['2100-09-17 22:11:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic: it simply takes the earliest charttime for a loosely defined set of itemids and ignores the need to filter by ICU stays, admissions, or to identify the record with the minimum heart rate value. It also misuses MIN on charttime instead of ordering by valuenum then charttime, and it does not reference d_items to select the correct heart rate item. This is a structural/logical flaw spanning multiple clauses, not a localized syntax or reference error.",
            "error_type": "Global: incorrect logic"
        },
        "598a0007438cebbb5e0202c7": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Do you know the minimum transitional epithelial cells value of patient 10018081 this month?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'transitional epithelial cells' ) AND datetime(labevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month')",
            "pred_sql": "SELECT DISTINCT CASE WHEN label = 'transitional epithelial cells' THEN valuenum ELSE NULL END FROM chartevents WHERE strftime('%Y-%m', charttime) = '2100-11' AND subject_id = 10018081",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: label",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong table (chartevents instead of labevents) and references a non-existent column (label) without joining the mapping table (d_labitems). These are isolated table/column reference errors that can be fixed by correcting the FROM clause and column references without restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "1afa09110562c5799d193e9a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Please list the monthly maximum volume of straight cath that was output by patient 10015931.",
            "gold_sql": "SELECT MAX(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015931 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'straight cath' AND d_items.linksto = 'outputevents' ) GROUP BY strftime('%Y-%m',outputevents.charttime)",
            "pred_sql": "SELECT sum(outputevents.volume) FROM outputevents INNER JOIN transfers ON outputevents.subject_id = transfers.subject_id WHERE transfers.subject_id = 10015931 AND outputevents.itemid IN (226560, 225664) AND transfers.intime = ( SELECT min(transfers.intime) FROM transfers INNER JOIN outputevents ON transfers.subject_id = outputevents.subject_id WHERE transfers.subject_id = 10015931 AND outputevents.itemid IN (226560, 225664) )",
            "gold_answer": "[['200.0']]",
            "pred_answer": "no such column: outputevents.volume",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column outputevents.volume, causing a column reference error. This is a localized mistake in the SELECT clause and does not affect the overall query structure or intended logic of grouping by month and taking maxima.",
            "error_type": "Local: column reference error"
        },
        "bc8d116f59e4fd75687ea181": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What new medications were introduced for patient 10004733 today compared to the ones yesterday?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT sum(CASE WHEN strftime('%Y %m %d', inputevents.starttime) = strftime('%Y %m %d', 'now') THEN 1 ELSE 0 END) - sum(CASE WHEN strftime('%Y %m %d', inputevents.starttime) = strftime('%Y %m %d', 'now', '-1 day') THEN 1 ELSE 0 END) AS difference FROM inputevents INNER JOIN prescriptions ON inputevents.subject_id = prescriptions.subject_id WHERE inputevents.subject_id = 10004733",
            "gold_answer": "[['iso-osmotic dextrose'], ['pantoprazole'], ['quetiapine fumarate'], ['ranitidine (liquid)'], ['sodium chloride 0.9%'], ['vial']]",
            "pred_answer": "[['-1360.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely deviates from the intended logic: it uses inputevents instead of prescriptions, sums counts with CASE statements, and joins tables improperly on subject_id rather than focusing on prescriptions.starttime and EXCEPT logic. This is not a minor typo or local reference issue but a fundamental restructuring that fails to retrieve distinct new medications. The join strategy, choice of tables, and aggregation approach all break the overall query logic.",
            "error_type": "Global: flawed join strategy"
        },
        "4e6fc34a2642a459d3ea7032": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time that the respiratory rate readings of patient 10018423 were recorded?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018423 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT min(CHARTTIME) FROM chartevents WHERE subject_id = 10018423 AND itemid = 220052",
            "gold_answer": "[['2100-05-21 19:16:00']]",
            "pred_answer": "[['2100-05-21 19:10:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to follow the required multi\u2010step filtering: it does not filter chartevents by ICU stay (via admissions and icustays) nor does it map the 'respiratory rate' label through the d_items table. Instead it directly uses subject_id and a hardcoded itemid, which fundamentally changes the data scope and joins. This is not a simple typo or clause\u2010level fix but rather a flawed overall join and filtering strategy that alters the intended query logic.",
            "error_type": "Global: incorrect join strategy"
        },
        "1c43c26e60ec75a3a8e84c7e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last monocytes result for patient 10006053 last month?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'monocytes' ) AND datetime(labevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT itemid, value, unitname FROM chartevents WHERE subject_id = 10006053 AND itemid IN (1278, 1280, 1281, 1282, 1283, 1284, 1285) AND strftime('%Y-%m', charttime) = '2015-05' AND value IS NOT NULL ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['5.0']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly references non-existent columns (\u201cvalue\u201d and \u201cunitname\u201d) in chartevents instead of the correct columns (e.g., valuenum, valueuom) or using the labevents table. This is a localized mistake in column naming and table choice and does not reflect a deeper logical flaw across multiple clauses.",
            "error_type": "Local: table/column reference error"
        },
        "a05f7f0e4fab0a67cd2533c2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the difference in the respiratory rate value for patient 10022281 second measured on the first icu visit compared to the first value measured on the first icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022281 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 OFFSET 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022281 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT julianday(icustays.outtime) - julianday(icustays.intime) AS days , outputevents.value - lead(outputevents.value) OVER (PARTITION BY icustays.subject_id ORDER BY icustays.intime) AS diff FROM outputevents INNER JOIN icustays ON outputevents.stay_id = icustays.stay_id WHERE icustays.subject_id = 10022281 AND outputevents.itemid IN (618, 220051)",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely diverges from the intended logic: it uses the outputevents table (meant for fluid/output data) instead of chartevents (where vital signs like respiratory rate are stored), employs a window function and unintended time difference calculation instead of selecting the first and second respiratory rate measurements, and fails to isolate the first ICU stay or properly filter by the 'respiratory rate' item. This error spans multiple clauses and requires restructuring the entire query logic, making it a global error.",
            "error_type": "Global: incorrect table usage and query structure"
        },
        "70a08b004ba90593af33dfab": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Could you provide me with patient 10035185's hospital admission time since 2100?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10035185 AND strftime('%Y',admissions.admittime) >= '2100'",
            "pred_sql": "SELECT admissions.admittime FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10035185 AND admissions.dischtime IS NULL",
            "gold_answer": "[['2100-05-17 12:53:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by introducing an unnecessary INNER JOIN to icustays and filtering on admissions.dischtime IS NULL (i.e., current admissions) instead of selecting all admissions since year 2100. It omits the required date filter and changes the query\u2019s intent from \u201cadmissions since 2100\u201d to \u201ccurrent admissions,\u201d which is a structural flaw affecting the overall logic rather than a localized syntax mistake.",
            "error_type": "Global: incorrect join strategy"
        },
        "d3b5eb094434de4c257a6007": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What does it cost to have percutaneous transluminal coronary angioplasty [ptca]?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'procedures_icd' AND cost.event_id IN ( SELECT procedures_icd.row_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous transluminal coronary angioplasty [ptca]' ) )",
            "pred_sql": "SELECT DISTINCT cost.cost FROM procedures_icd INNER JOIN cost ON procedures_icd.row_id = cost.event_id INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Percutaneous transluminal coronary angioplasty [ptca]'",
            "gold_answer": "[['6.67']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails simply because it doesn\u2019t match the procedure title text (it uses a capital \u201cPercutaneous\u2026\u201d whereas the stored long_title is lowercase) and also omits the cost.event_type filter \u2013 both of which are local WHERE\u2010clause issues. The overall join structure is correct; only the predicate is wrong and fixable without reworking the joins or logic. Therefore this is a Local error (value parsing/mismatch in the WHERE clause).",
            "error_type": "Local: value parsing error"
        },
        "9e464232cfb26d809dde71e4": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "For the first time last month, when was patient 10026354 prescribed a drug?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026354 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE patients.subject_id = 10026354 ORDER BY prescriptions.starttime DESC LIMIT 1",
            "gold_answer": "[['2100-11-01 12:00:00']]",
            "pred_answer": "[['2100-11-09 10:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the requirement to restrict prescriptions to last month and instead returns the most recent prescription overall. Fixing this would require adding the datetime filter on last month and reversing the sort order to ascending before limiting \u2014 changes that alter the core logic, not just a minor clause. Therefore, it\u2019s a global error in the query\u2019s overall filtering and ordering logic.",
            "error_type": "Global: incorrect filtering and ordering"
        },
        "94b916696ee9cfe48eca3e41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When has patient 10020740 been given hemoglobin test for the first time in 03/2100?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'hemoglobin' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) AND strftime('%Y-%m',labevents.charttime) = '2100-03' ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = 10020740 AND itemid IN (5122, 5123, 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131) AND strftime('%Y-%m', charttime) = '2100-03'",
            "gold_answer": "[['2100-03-23 16:50:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the chartevents table for hemoglobin tests using a hard\u2010coded list of itemids, whereas the ground truth correctly uses labevents joined to d_labitems to identify hemoglobin via its label. This is not a simple syntax or column name mistake but a fundamental misplacement of the data source (wrong table and lookup logic), altering the entire intention of retrieving the first lab hemoglobin event. Fixing this requires restructuring the FROM clause and correct table joins rather than a minor local adjustment.",
            "error_type": "Global: wrong-table error"
        },
        "1a914c34a8501fbeec1d63d3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the daily maximum value of heart rate for patient 10004733 on the current hospital visit?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10004733 AND chartevents.valuenum IS NOT NULL AND chartevents.itemid IN (211, 220045)",
            "gold_answer": "[['107.0'], ['111.0'], ['115.0'], ['117.0'], ['118.0'], ['130.0'], ['140.0'], ['91.0'], ['93.0']]",
            "pred_answer": "[['140.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the date-based GROUP BY, fails to restrict to the current ICU stay(s) of the current hospital visit, and combines tables incorrectly by joining chartevents directly to admissions instead of filtering by stay_id and dischtime IS NULL. This fundamentally alters the query\u2019s logic and structure (returning a single maximum rather than daily maxima), requiring significant restructuring beyond a localized fix.",
            "error_type": "Global: incorrect query structure"
        },
        "3a7c1bad5e1498d7555d626a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many current patients belong to the 60 or above age category?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age >= 60",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE age >= 60",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['60.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the crucial WHERE condition admissions.dischtime IS NULL that restricts to current patients and also fails to use COUNT(DISTINCT subject_id), counting all admissions rather than unique patients. These omissions change the overall logic of the query, not just a single clause or reference, thus it is a structural/logical error requiring modification across multiple parts of the query.",
            "error_type": "Global: missing current patient filter"
        },
        "0cfc184df990beed1921b205": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Among patients in their 60 or above since 2100, what are the top three most frequent diagnoses?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',diagnoses_icd.charttime) >= '2100' GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT diagnoses_icd.icd_code FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id WHERE patients.dob < '2100-01-01' AND patients.gender = 'm' AND patients.dod IS NULL AND patients.age >= 60 GROUP BY diagnoses_icd.icd_code ORDER BY count(diagnoses_icd.icd_code) DESC LIMIT 3",
            "gold_answer": "[['atrial fibrillation'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "no such column: patients.age",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column patients.age, causing the error 'no such column: patients.age'. This is a localized issue with an incorrect column reference and can be fixed by using the correct column (e.g., admissions.age) or adjusting the join, without changing the overall query structure.",
            "error_type": "Local: table/column reference error"
        },
        "05b8b940351626bb60292719": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What medication was patient 10021487 prescribed two or more times on the current hospital visit?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE hadm_id = ( SELECT max(hadm_id) FROM admissions WHERE subject_id = 10021487 ) GROUP BY drug HAVING count(drug) > 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['acetazolamide'], ['acetazolamide sodium'], ['bisacodyl'], ['calcium carbonate'], ['chlorhexidine gluconate 0.12% oral rinse'], ['diphenhydramine'], ['famotidine'], ['furosemide'], ['insulin'], ['iso-osmotic dextrose'], ['lactated ringers'], ['metronidazole (flagyl)'], ['midazolam'], ['neutra-phos'], ['ns'], ['plasmalyte'], ['potassium chloride (powder)'], ['sodium bicarbonate'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['famotidine'], ['furosemide'], ['insulin'], ['iso-osmotic dextrose'], ['lactated ringers'], ['metronidazole (flagyl)'], ['midazolam'], ['ns'], ['plasmalyte'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query in two ways: (1) It identifies the \"current hospital visit\" by taking MAX(hadm_id) instead of using admissions.dischtime IS NULL, and (2) it uses HAVING COUNT(drug) > 2 rather than >= 2. These changes span multiple clauses (the admission\u2010selection subquery and the HAVING condition) and thus affect the overall intended semantics of the query, not just a localized clause.",
            "error_type": "Global: incorrect admission selection and threshold logic"
        },
        "2b0b822ec5196516bf095185": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the total dose of vancomycin that patient 10031757 was prescribed in 10/this year?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) AND prescriptions.drug = 'vancomycin' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',prescriptions.starttime) = '10'",
            "pred_sql": "SELECT sum(CASE WHEN prescriptions.starttime LIKE '2100-11%' THEN 1 ELSE 0 END) FROM prescriptions INNER JOIN patients ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10031757 AND prescriptions.drug = 'vancomycin'",
            "gold_answer": "[['4000.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely alters the intended logic of the query. Instead of summing the dose_val_rx for vancomycin prescriptions in October of the current year, it counts prescriptions (using CASE WHEN ... THEN 1) and incorrectly filters for November ('2100-11%') rather than October. It also omits the necessary time-window logic (start-of-year and month filter) and sums the wrong value. These mistakes span both the SELECT aggregation and WHERE date-filtering clauses, fundamentally changing the query\u2019s meaning.",
            "error_type": "Global"
        },
        "e22d99ee8e54c59787862605": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first time that patient 10004733 received a prescription for 0.83% sodium chloride on the current hospital visit?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.drug = '0.83% sodium chloride' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE patients.subject_id = 10004733 AND prescriptions.drug = '0.83% sodium chloride'",
            "gold_answer": "[['2100-12-22 17:00:00']]",
            "pred_answer": "[['2100-12-22 17:00:00'], ['2100-12-23 10:00:00'], ['2100-12-23 10:00:00'], ['2100-12-26 12:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to the current hospital visit (missing `admissions.dischtime IS NULL` filter) and also omits the `ORDER BY prescriptions.starttime ASC LIMIT 1` needed to return only the first time. These are localized mistakes within the WHERE and result-limiting clauses and do not require restructuring the entire query logic.",
            "error_type": "Local: missing current visit filter and missing ORDER BY/LIMIT"
        },
        "a3569887096547747b83607e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many people were admitted to the hospital?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions",
            "pred_sql": "SELECT count(subject_id) FROM admissions",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['119.0']]",
            "is_correct": false,
            "reasoning": "The error is confined to the SELECT aggregation clause: the predicted SQL uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id). This is a localized mistake in the aggregate function and can be fixed by adding DISTINCT without changing the overall query structure or joins.",
            "error_type": "Local: aggregate-distinct error"
        },
        "27a2491ee7292c776985f138": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients were treated with fusion or refusion of 2-3 vertebrae?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'fusion or refusion of 2-3 vertebrae' ) )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM procedures_icd WHERE icd_code IN ('99.0', '99.1', '99.2', '99.3')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in the WHERE clause values: it hard-codes an incorrect list of ICD codes instead of selecting the single correct code via the d_icd_procedures lookup. No tables, joins, or overall structure have been changed; only the filtering condition is wrong. This is a localized error in the WHERE clause values.",
            "error_type": "Local: incorrect-value error"
        },
        "2b7dee89179d6e7647d06f5e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much does patient 10039831 change in base excess second measured on the current hospital visit compared to the first value measured on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'base excess' ) ORDER BY labevents.charttime ASC LIMIT 1 OFFSET 1 ) - ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'base excess' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT CASE WHEN count(chartevents.value) > 1 THEN chartevents.valuenum ELSE NULL END - CASE WHEN count(chartevents.value) > 1 THEN chartevents.valuenum ELSE NULL END FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE chartevents.itemid IN (68, 69, 70, 113, 188, 189, 190, 191, 224, 225, 226, 227) AND patients.subject_id = 10039831",
            "gold_answer": "[['2.0']]",
            "pred_answer": "no such column: chartevents.value",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong table (`chartevents` instead of `labevents`), wrong columns (`chartevents.value` which doesn\u2019t exist), incorrect item filters, and fails to restrict to the current hospital visit or properly pick the first and second measurements. This is not a small typo but a fundamentally flawed approach requiring major restructuring\u2014changing tables, joins, and logic\u2014so it\u2019s a global error.",
            "error_type": "Global: incorrect table and logic"
        },
        "8346d40a91793343d5e5393e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the charges for a procedure called excision of right internal mammary artery, percutaneous endoscopic approach?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'procedures_icd' AND cost.event_id IN ( SELECT procedures_icd.row_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right internal mammary artery, percutaneous endoscopic approach' ) )",
            "pred_sql": "SELECT cost.cost FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code INNER JOIN cost ON cost.event_id = procedures_icd.row_id WHERE d_icd_procedures.long_title LIKE '%excision of right internal mammary artery, percutaneous endoscopic approach%'",
            "gold_answer": "[['6.96']]",
            "pred_answer": "[['13.43'], ['5.13'], ['6.96'], ['9.34']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the required filter cost.event_type = 'procedures_icd' and also omits DISTINCT, leading to extra cost rows that are unrelated to the intended procedure. This is a mistake confined to the WHERE clause and does not alter the overall join structure or logic beyond this missing condition.",
            "error_type": "Local: missing event_type filter"
        },
        "d395b7b77c91a485233becad": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last prescribed dose of aspirin (buffered) that patient 10024043 got in 04/2100?",
            "gold_sql": "SELECT prescriptions.dose_val_rx FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10024043 ) AND prescriptions.drug = 'aspirin (buffered)' AND strftime('%Y-%m',prescriptions.starttime) = '2100-04' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.subject_id = 10024043 AND prescriptions.starttime LIKE '2100-04-21%' AND prescriptions.drug = 'aspirin (buffered)' ORDER BY prescriptions.stoptime DESC LIMIT 1",
            "gold_answer": "[['325.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query makes several localized mistakes without altering the overall structure:\n1. SELECT clause is wrong: it returns prescriptions.drug instead of prescriptions.dose_val_rx.\n2. The WHERE clause filters starttime to a single day ('2100-04-21%') instead of the whole month ('2100-04').\n3. It orders by stoptime rather than starttime.  \nNone of these errors require changing the join logic or overall query structure\u2014they are confined to column selection, filtering literals, and ordering. Therefore it\u2019s a Local error.",
            "error_type": "Local: where-clause and select-column error"
        },
        "859c5d5288e0f9d7cdce7d9c": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the 6-month survival rate for patients diagnosed with nonrheumatic mitral (valve) insufficiency?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (strftime('%J',patients.dod) - strftime('%J',T1.charttime)) < 365 / 2 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nonrheumatic mitral (valve) insufficiency' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 2 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id )",
            "pred_sql": "SELECT cast(sum(CASE WHEN patients.dod IS NULL THEN 1 ELSE 0 END) AS REAL) * 100 / count(patients.subject_id) FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id WHERE diagnoses_icd.icd_code = '493.81' GROUP BY patients.subject_id HAVING count(patients.subject_id) > 0",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query completely ignores the 6-month time window, uses a hard-coded (and incorrect) ICD code rather than looking up the diagnosis by its long_title, and aggregates survival incorrectly by grouping per patient and computing a percentage at that level. These issues span the overall logic\u2014date filtering, code lookup, and aggregation strategy\u2014so this is not a small localized typo or syntax fix but a fundamentally flawed approach requiring a restructuring of the query.",
            "error_type": "Global: logical inconsistency error"
        },
        "4fd9504906ec9a8655390830": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have passed from the time when patient 10018081 was admitted to the icu?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', icustays.intime) ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL",
            "pred_sql": "SELECT cast(strftime('%J', icustays.intime) - strftime('%J', admissions.admittime) AS REAL) * 24 FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10018081",
            "gold_answer": "[['126.817']]",
            "pred_answer": "[['0.2']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally misinterprets the question\u2019s intent by computing the time between hospital admission and ICU admission rather than the hours since ICU admission until now. It also omits the filter for a current ICU stay (outtime IS NULL) and uses the wrong timestamp for \u201cnow.\u201d These flaws span multiple clauses (JOIN logic, WHERE conditions, time calculations) and change the core purpose of the query, indicating a global logical error.",
            "error_type": "Global: logical inconsistency"
        },
        "0f238fbe279f08c43dc8c8dc": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "In patients who had the continuous invasive mechanical ventilation for less than 96 consecutive hours this year, what are the three most common diagnoses that followed within 2 months?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'continuous invasive mechanical ventilation for less than 96 consecutive hours' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "SELECT diagnoses_icd.icd_code, count(diagnoses_icd.icd_code) FROM diagnoses_icd INNER JOIN ( SELECT subject_id, hadm_id FROM admissions WHERE dischtime IS NOT NULL AND dischtime - admittime < 1095 AND admission_type = 'ELECTIVE' GROUP BY subject_id, hadm_id ) AS t3 ON diagnoses_icd.hadm_id = t3.hadm_id WHERE t3.admission_type = 'ELECTIVE' AND t3.dischtime - t3.admittime < 1095 GROUP BY diagnoses_icd.icd_code ORDER BY count(diagnoses_icd.icd_code) DESC LIMIT 3",
            "gold_answer": "[['accidental cut, puncture, perforation or hemorrhage during heart catheterization'], ['accidental puncture or laceration during a procedure, not elsewhere classified'], ['acute on chronic diastolic heart failure'], ['acute pancreatitis'], ['alkalosis'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['calculus of bile duct without mention of cholecystitis, without mention of obstruction'], ['congestive heart failure, unspecified'], ['cystic kidney disease, unspecified'], ['depressive disorder, not elsewhere classified'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['hypovolemia'], ['long-term (current) use of insulin'], ['mitral valve disorders'], ['obstructive sleep apnea (adult)(pediatric)'], ['other and unspecified hyperlipidemia'], ['other chronic pulmonary heart diseases'], ['other iatrogenic hypotension'], ['other postprocedural status'], ['peritoneal adhesions (postoperative) (postinfection)'], ['personal history of methicillin resistant staphylococcus aureus'], ['physical restraints status'], ['portal hypertension'], ['postinflammatory pulmonary fibrosis'], ['schizoaffective disorder, unspecified'], ['thrombocytopenia, unspecified'], ['umbilical hernia without mention of obstruction or gangrene']]",
            "pred_answer": "no such column: t3.admission_type",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s subquery aliased as t3 only selects subject_id and hadm_id, yet the outer query refers to t3.admission_type (and also t3.dischtime and t3.admittime) which do not exist in t3\u2019s projection. This is a column reference error in a specific part of the query and does not require restructuring the overall logic.",
            "error_type": "Local: table/column reference error"
        },
        "620dde6d062ca27f785e6250": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How old is patient 10038992 on the last hospital visit?",
            "gold_sql": "SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10038992 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admissions.admittime, admissions.dischtime FROM admissions WHERE admissions.subject_id = 10038992 ORDER BY admissions.admittime DESC LIMIT 1",
            "gold_answer": "[['70.0']]",
            "pred_answer": "[['2100-11-23 18:26:00', '2100-11-29 16:22:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the admission and discharge times instead of the patient's age and fails to filter for only completed (discharged) visits. It fundamentally alters the query\u2019s intent by returning different columns and missing the dischtime IS NOT NULL condition, thus changing the overall logic rather than fixing a localized issue.",
            "error_type": "Global: incorrect selection and filter"
        },
        "33423e8e52128ee0f0498b33": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the last input that patient 10004422 had?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT inputevents.itemid FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 ) ) ORDER BY inputevents.starttime DESC LIMIT 1 )",
            "pred_sql": "SELECT inputevents.itemid FROM patients INNER JOIN inputevents ON patients.subject_id = inputevents.subject_id WHERE patients.subject_id = 10004422 ORDER BY inputevents.charttime DESC LIMIT 1",
            "gold_answer": "[['po intake']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column inputevents.charttime instead of inputevents.starttime. This is a simple column reference mistake and can be fixed by correcting the column name without altering the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "b46069965a45836dc176614f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first 5% dextrose (excel bag) dose that patient 10018081 was prescribed on the current hospital visit?",
            "gold_sql": "SELECT prescriptions.dose_val_rx FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) AND prescriptions.drug = '5% dextrose (excel bag)' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT dose_val_rx FROM prescriptions WHERE subject_id = 10018081 AND drug = 'dextrose 5%' ORDER BY dose_val_rx DESC LIMIT 0, 0.05",
            "gold_answer": "[['500.0']]",
            "pred_answer": "datatype mismatch",
            "is_correct": false,
            "reasoning": "The predicted SQL fails immediately with a datatype mismatch due to the LIMIT clause using a non\u2010integer (0.05). This is a localized issue in value parsing/syntax. The overall query structure (selecting from prescriptions) remains intact, so no global restructuring is required.",
            "error_type": "Local: value parsing error"
        },
        "50727c16951cf4ee1f147513": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the mean blood pressure change of patient 10018328 second measured on the first icu visit compared to the first value measured on the first icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018328 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 OFFSET 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018328 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT avg(chartevents.valuenum) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE icustays.subject_id = 10018328 AND chartevents.itemid IN (211,220045)",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['68.278']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic: instead of calculating the difference between the second and first measurements on the first ICU visit, it averages all values for two itemids across all ICU stays for the patient. It ignores the first ICU visit restriction, the ordering of measurements, and the subtraction operation. This is not a localized typo or missing alias, but a completely different aggregation and filtering strategy, thus a global error.",
            "error_type": "Global: incorrect aggregation logic"
        }
    },
    "qwen3": {
        "716f26323d1b05f7e996515f": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since 1 year ago, what are the top three most common drugs prescribed within the same hospital visit to male patients aged 60 or above after being diagnosed with fall resulting in striking against other object?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'fall resulting in striking against other object' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'm' ) AND admissions.age >= 60 AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT p.drug, COUNT(DISTINCT p.hadm_id) AS count FROM prescriptions p WHERE p.hadm_id IN ( SELECT a.hadm_id FROM admissions a JOIN patients pt ON a.subject_id = pt.subject_id JOIN diagnoses_icd d ON a.hadm_id = d.hadm_id JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code WHERE pt.gender = 'm' AND a.age >= 60 AND datetime(a.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') AND dd.long_title = 'fall resulting in striking against other object' ) GROUP BY p.drug ORDER BY count DESC LIMIT 3;",
            "gold_answer": "[['0.9% sodium chloride (mini bag plus)'], ['acetaminophen'], ['albuterol 0.083% neb soln'], ['aspirin'], ['atorvastatin'], ['docusate sodium'], ['heparin'], ['ibuprofen'], ['ipratropium bromide neb'], ['iso-osmotic dextrose'], ['ketorolac'], ['lamotrigine'], ['metoprolol tartrate'], ['prednisone'], ['senna'], ['sodium chloride 0.9%  flush'], ['tiotropium bromide'], ['tramadol (ultram)'], ['vancomycin']]",
            "pred_answer": "[['tiotropium bromide', '1.0'], ['tramadol (ultram)', '1.0'], ['vancomycin', '1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL does not enforce the required temporal relationship between diagnosis and prescription (i.e., prescriptions must occur after the fall diagnosis) and misplaces the one\u2013year\u2013ago filter on the admission time rather than on the diagnosis and prescription times. These omissions alter the overall logic of the query\u2014prescriptions before the diagnosis or outside the one\u2013year window might be included\u2014so it is not a local fix but a structural flaw requiring reworking of multiple clauses.",
            "error_type": "Global: incorrect temporal and logical structure"
        },
        "2b873eff97b30a727b9feead": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When was the measurement of patient 10018081's sao2 taken first on the current icu visit?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(chartevents.charttime) AS first_sao2_time FROM chartevents JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10018081 AND chartevents.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND d_items.label = 'o2 saturation pulseoxymetry' AND chartevents.charttime >= ( SELECT intime FROM icustays WHERE hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND outtime IS NULL ORDER BY intime DESC LIMIT 1 ) AND chartevents.charttime <= COALESCE( (SELECT outtime FROM icustays WHERE hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND outtime IS NULL ORDER BY intime DESC LIMIT 1), '2100-12-31 23:59:00' );",
            "gold_answer": "[['2100-12-26 16:59:00']]",
            "pred_answer": "[['2100-12-26 17:15:00']]",
            "is_correct": false,
            "reasoning": "The ground truth query restricts events by the ICU stay_id to capture any sao2 measurement tagged to the current ICU visit\u2014even if a measurement timestamp slightly precedes the recorded ICU intime. The predicted query instead filters chartevents by charttime >= icustays.intime (and by hadm_id/subject_id) rather than matching the stay_id directly. As a result, it omits the earliest measurement that occurred just before the recorded ICU intime (16:59:00) and returns the next one (17:15:00). This error is not a minor typo or a simple WHERE\u2010clause fix\u2014it stems from a flawed logical approach to associating chartevents with the ICU stay, requiring restructuring the join/filter strategy. Hence it is a global error.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "6adc7de0c5d946d7b38ae9ad": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the total number of patients who underwent insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s) since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s)' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'insertion of four or more vascular stents' AND p.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong procedure name in the WHERE clause ('insertion of four or more vascular stents' instead of the target 'insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s)'). This is a localized filter value error; the overall query structure, joins, and aggregations are correct, but the literal used to filter the desired procedure is incorrect.",
            "error_type": "Local: value parsing error"
        },
        "f5f51233cf706b9edea9efe9": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many patients received unilateral simple mastectomy within the same hospital visit after being diagnosed with malignant neoplasm of central portion of female breast?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'malignant neoplasm of central portion of female breast' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'unilateral simple mastectomy' ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) FROM diagnoses_icd d JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code JOIN procedures_icd p ON d.subject_id = p.subject_id AND d.hadm_id = p.hadm_id JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dd.long_title = 'Malignant neoplasm of central portion of female breast' AND dp.long_title = 'Unilateral simple mastectomy' AND d.charttime <= p.charttime;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the comparison operator: it uses d.charttime <= p.charttime instead of the strict < used in the ground truth. This is a minor change in the WHERE clause that doesn\u2019t alter the overall query structure or the join logic, and can be fixed by replacing <= with <. Therefore, it is a localized error in the condition, not a flaw in the global logic or joins.",
            "error_type": "Local: comparison operator error"
        },
        "4431d6ea6f7771a84c1d5374": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Which care unit did patient 10027602 last enter on the first hospital visit, as recorded in the transfer record?",
            "gold_sql": "SELECT transfers.careunit FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND transfers.careunit IS NOT NULL ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT t.careunit FROM transfers t JOIN admissions a ON t.hadm_id = a.hadm_id WHERE a.subject_id = 10027602 AND a.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10027602 ORDER BY admittime ASC LIMIT 1 ) ORDER BY t.intime DESC LIMIT 1;",
            "gold_answer": "[['neurology']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is almost correct in structure\u2014it joins transfers to admissions and picks the first admission by admittime, then orders transfers by intime to get the last careunit. However, it fails to filter out admissions without a discharge time (dischtime IS NOT NULL) and does not exclude NULL careunit values (transfers.careunit IS NOT NULL). These omissions are local mistakes in the WHERE clauses and do not require restructuring the whole query.",
            "error_type": "Local: missing filter condition"
        },
        "2a4f10719bce5c9db91062c4": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many instances of implantation or replacement of carotid sinus stimulation device, total system procedures have been completed in 2100?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'implantation or replacement of carotid sinus stimulation device, total system' ) AND strftime('%Y',procedures_icd.charttime) = '2100'",
            "pred_sql": "SELECT COUNT(*) AS total_procedures FROM procedures_icd JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Implantation or replacement of carotid sinus stimulation device, total system procedures' AND strftime('%Y', procedures_icd.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the WHERE clause string literal for d_icd_procedures.long_title. It incorrectly adds \u201cprocedures\u201d at the end and capitalizes the first letter, so no rows match. This is a minor, localized error in the literal value, not in the overall query structure or joins.",
            "error_type": "Local: value parsing error"
        },
        "02c1ac678276183eba4aa39d": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Count the number of times that patient 10019172 had calcium gluconate on last month/20.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019172 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'calcium gluconate' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '20'",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE subject_id = 10019172 AND drug = 'calcium gluconate' AND datetime(starttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', starttime) = '20';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to count occurrences of 'calcium gluconate', whereas the question and assumptions specify using the inputevents table (with d_items mapping) for input-related events. This misuse of the core table alters the entire logic of the query, not just a localized condition or syntax. Fixing it requires rewriting the query to use inputevents and join to d_items, which is a structural change affecting the overall intended meaning.",
            "error_type": "Global: incorrect table selection"
        },
        "170e423407c175442b5fc342": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When did patient 10004733 have a last creatine kinase (ck) laboratory test in 12/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatine kinase (ck)' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '12' ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT l.charttime FROM labevents l JOIN d_labitems d ON l.itemid = d.itemid WHERE l.subject_id = 10004733 AND d.label = 'creatine kinase' AND strftime('%Y-%m', l.charttime) = '2100-12' ORDER BY l.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-12-31 21:15:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on d.label = 'creatine kinase' instead of the exact label 'creatine kinase (ck)', so it returns no rows. This is a localized mistake in the WHERE clause value, not a structural or multi-clause logic issue.",
            "error_type": "Local: value parsing error"
        },
        "5eef1271ba9f4ff35bc0efa0": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients who are diagnosed with essential (primary) hypertension , what are the top three most common medications that followed afterward within the same hospital visit?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'essential (primary) hypertension' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT p.drug, COUNT(DISTINCT ha.hadm_id) AS cnt FROM ( SELECT DISTINCT d.hadm_id FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code WHERE dicd.long_title = 'essential (primary) hypertension' ) AS ha JOIN prescriptions p ON ha.hadm_id = p.hadm_id GROUP BY p.drug ORDER BY cnt DESC LIMIT 3 );",
            "gold_answer": "[['bag'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['bag'], ['heparin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the crucial temporal filter (T1.charttime < T2.starttime) that ensures medications occurred after the hypertension diagnosis, and it restructures the query by only joining on hadm_id and counting ALL prescriptions in a visit rather than those following the diagnosis event. This is not a simple typo or localized fix but alters the overall logic and intended meaning of the query, requiring a significant restructuring to reintroduce the time-based join and ranking. Therefore, it is a global error.",
            "error_type": "Global: incorrect join/temporal logic"
        },
        "f832db59e9dda84d9dd14c15": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What are the four most frequent procedures for patients who are in their 60 or above this year?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM patients JOIN procedures_icd ON patients.subject_id = procedures_icd.subject_id JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE 2100 - CAST(strftime('%Y', patients.dob) AS INTEGER) >= 60 GROUP BY d_icd_procedures.long_title ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['arterial catheterization'], ['arteriography of cerebral arteries'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['coronary arteriography using two catheters'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left lower lung lobe, via natural or artificial opening endoscopic'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['insertion of endotracheal tube'], ['left heart cardiac catheterization'], ['single internal mammary-coronary artery bypass']]",
            "pred_answer": "[['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['single internal mammary-coronary artery bypass']]",
            "is_correct": false,
            "reasoning": "The predicted SQL makes two localized mistakes in the WHERE clause: it calculates age manually from patients.dob instead of using the admissions.age column (violating the age calculation assumption) and it omits the filter to restrict procedures to the current year. These errors are confined to a specific part of the query (the WHERE conditions) and do not require restructuring the overall query logic or JOINs.",
            "error_type": "Local: value parsing error"
        },
        "07e6a4280f821838e9ccbeae": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the number of times that patient 10020740 had a or ebl event since 05/10/2100?",
            "gold_sql": "SELECT COUNT(*) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'or ebl' AND d_items.linksto = 'outputevents' ) AND strftime('%Y-%m-%d',outputevents.charttime) >= '2100-05-10'",
            "pred_sql": "SELECT COUNT(*) AS event_count FROM chartevents c JOIN d_items d ON c.itemid = d.itemid WHERE c.subject_id = 10020740 AND d.label IN ('a', 'ebl') AND c.charttime >= '2100-05-10';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses chartevents rather than outputevents and filters on c.subject_id directly, looking for labels 'a' or 'ebl', which fundamentally changes the data source and join logic. The correct query must aggregate from outputevents linked via stay_id and admissions for the given patient and filter for the specific 'or ebl' item. This is not a minor typo or syntax issue but a change in the core tables and relationships, requiring a restructuring of the query.",
            "error_type": "Global: join error"
        },
        "5a69bd52eccb00f7bc5a5dbd": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Give me the three most frequent procedures performed in 2100.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d.long_title FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE strftime('%Y', p.charttime) = '2100' GROUP BY p.icd_code, d.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['central venous catheter placement with guidance'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['other electroshock therapy'], ['percutaneous abdominal drainage']]",
            "pred_answer": "[['central venous catheter placement with guidance'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a simple GROUP BY with ORDER BY COUNT DESC and LIMIT 3 to pick the \u201ctop 3\u201d procedures, whereas the ground truth uses DENSE_RANK to include all procedures tied at the 3rd most frequent count. This is not simply a typo or missing column\u2014it is a fundamental logical difference in how rankings and ties are handled, requiring a restructure of the query (adding a window function and subquery) to match the intended behavior. Therefore, it is a Global error.",
            "error_type": "Global: incorrect ranking strategy"
        },
        "6f048eeb50b8e74a15d18d6d": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since 09/2100, what was the yearly maximum dose of pre-admission/non-icu intake for patient 10021487?",
            "gold_sql": "SELECT MAX(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'pre-admission/non-icu intake' AND d_items.linksto = 'inputevents' ) AND strftime('%Y-%m',inputevents.starttime) >= '2100-09' GROUP BY strftime('%Y',inputevents.starttime)",
            "pred_sql": "SELECT strftime('%Y', p.starttime) AS year, MAX(p.dose_val_rx) AS max_dose FROM prescriptions p WHERE p.subject_id = 10021487 AND p.starttime >= '2100-09-01' AND p.starttime <= '2100-12-31 23:59:00' AND ( (p.hadm_id IS NULL AND EXISTS ( SELECT 1 FROM admissions a WHERE a.subject_id = 10021487 AND p.starttime < a.admittime )) OR (p.hadm_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM icustays i WHERE i.hadm_id = p.hadm_id AND p.starttime >= i.intime AND p.starttime <= COALESCE(i.outtime, '2100-12-31 23:59:00') )) ) GROUP BY year;",
            "gold_answer": "[['1050.0']]",
            "pred_answer": "[['2100.0', '900.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and dose_val_rx column with complex EXISTS conditions to approximate pre-admission/non-ICU intake, whereas the ground truth correctly uses inputevents.totalamount filtered by the d_items.label 'pre-admission/non-icu intake' and ICU stay logic. This is not a simple column or syntax mistake but a fundamental misuse of the table and logic, altering the intended meaning of the query. Therefore, it is a Global error.",
            "error_type": "Global: incorrect table usage"
        },
        "59d35743b9ec360ca5a5d2db": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the average daily number of patients who had secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified in 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT COUNT( DISTINCT diagnoses_icd.hadm_id ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' GROUP BY strftime('%Y-%m-%d',diagnoses_icd.charttime) ) AS T1",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) / 365.0 AS average_daily_count FROM diagnoses_icd d JOIN d_icd_diagnoses i ON d.icd_code = i.icd_code WHERE i.long_title LIKE '%secondary diabetes mellitus%' AND i.long_title NOT LIKE '%complication%' AND i.long_title NOT LIKE '%uncontrolled%' AND i.long_title NOT LIKE '%unspecified%' AND strftime('%Y', d.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of daily averaging by counting distinct patients over the whole year and dividing by 365, instead of grouping counts per day and then averaging those daily counts. It also uses subject_id instead of hadm_id and employs inaccurate LIKE filters rather than matching the exact long_title. These alterations span multiple clauses (JOIN, WHERE, aggregation) and alter the intended computation, requiring restructuring of the query rather than a simple fix in one part.",
            "error_type": "Global: aggregation logic error"
        },
        "e5859415e36580c3966e0e1a": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Get me the top four most frequent lab tests that patients had within the same hospital visit after receiving venous catheterization, not elsewhere classified since 1 year ago.",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT d.label FROM labevents l JOIN procedures_icd p ON l.hadm_id = p.hadm_id JOIN d_icd_procedures dproc ON p.icd_code = dproc.icd_code JOIN d_labitems d ON l.itemid = d.itemid WHERE dproc.long_title = 'insertion of venous catheter, not elsewhere classified' AND p.charttime >= datetime('2100-12-31 23:59:00', '-1 year') AND l.charttime > p.charttime GROUP BY l.itemid, d.label ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['white blood cells']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to match any procedure rows because it filters on d_icd_procedures.long_title = 'insertion of venous catheter, not elsewhere classified', whereas the correct title is 'venous catheterization, not elsewhere classified'. This is a mismatch in the literal value used in the WHERE clause, causing no rows to pass the filter. The rest of the query structure (joins, grouping, ordering) aligns with the intended logic, so only the literal in the WHERE clause is wrong.",
            "error_type": "Local: value parsing error"
        },
        "0c7b4ddbcc44458591fe84e6": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Among patients who are prescribed with albumin 5% (25g / 500ml), what are the top three most frequently prescribed drugs at the same time?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'albumin 5% (25g / 500ml)' ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT p2.drug FROM prescriptions p1 JOIN prescriptions p2 ON p1.hadm_id = p2.hadm_id WHERE p1.drug = 'albumin 5% (25g / 500ml)' AND p2.drug != 'albumin 5% (25g / 500ml)' GROUP BY p2.drug ORDER BY COUNT(DISTINCT p2.hadm_id) DESC LIMIT 3;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['albumin 5% (25g / 500ml)'], ['aspirin'], ['aspirin ec'], ['chlorhexidine gluconate 0.12% oral rinse'], ['ciprofloxacin iv'], ['docusate sodium'], ['docusate sodium (liquid)'], ['ezetimibe'], ['fish oil (omega 3)'], ['ketorolac'], ['metronidazole (flagyl)'], ['milk of magnesia'], ['ns'], ['sodium bicarbonate'], ['soln']]",
            "pred_answer": "[['furosemide'], ['lactated ringers'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins prescriptions on hadm_id only, grouping co\u2010prescriptions by admission rather than by the exact same start time for patients prescribed albumin. This violates the requirement to match on both subject and datetime for simultaneous prescriptions and fundamentally changes the query\u2019s intended logic. Fixing it requires restructuring the join to include starttime equality and using a proper ranking mechanism rather than a simple GROUP BY on hadm_id, so this is a global error.",
            "error_type": "Global: incorrect join logic"
        },
        "0687b03f52fe3a7d878757dc": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since the last diagnosis of patient 10004733 with chronic glomerulonephritis with unspecified pathological lesion in kidney on the current hospital visit, how many days have passed?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic glomerulonephritis with unspecified pathological lesion in kidney' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(di.charttime)) AS INTEGER) AS days_passed FROM admissions a JOIN diagnoses_icd di ON a.hadm_id = di.hadm_id JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND d.long_title = 'chronic glomerulonephritis with unspecified pathological lesion in kidney' ORDER BY di.charttime DESC LIMIT 1;",
            "gold_answer": "[['9.522']]",
            "pred_answer": "[['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters to the patient\u2019s current admission and the specified diagnosis, orders by the latest diagnosis time, and limits to one result\u2014so the overall query structure and joins are correct. The only mistake is in the SELECT clause: it uses CAST(julianday(...) - julianday(...)) AS INTEGER, which truncates the fractional day difference instead of preserving and rounding to three decimals using strftime('%J') as required. This is a localized error in the numeric calculation/formatting, not a structural or logical flaw.",
            "error_type": "Local: numerical-formatting error"
        },
        "5e2b6a161db39a1951d0acde": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients diagnosed with chronic kidney disease, stage 3 (moderate), what are the top three most common microbiology tests that followed within the same month since 1 year ago?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage 3 (moderate)' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT m.test_name FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code JOIN microbiologyevents m ON d.subject_id = m.subject_id AND d.hadm_id = m.hadm_id WHERE dicd.long_title = 'Chronic kidney disease stage 3 (moderate)' AND d.charttime >= datetime('2100-12-31', '-1 year') AND d.charttime <= '2100-12-31 23:59:00' AND datetime(m.charttime, 'start of month') = datetime(d.charttime, 'start of month') AND m.charttime >= d.charttime GROUP BY m.test_name ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['anaerobic culture'], ['blood culture, routine'], ['c. difficile pcr'], ['epstein-barr virus ebna igg ab'], ['epstein-barr virus vca-igg ab'], ['epstein-barr virus vca-igm ab'], ['fluid culture'], ['gram stain'], ['rubella igg serology'], ['urine culture']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended query logic by joining microbiologyevents on both subject_id and hadm_id, forcing the test to occur in the same hospital admission as the diagnosis. The ground truth only requires the test to occur within the same calendar month (not necessarily the same admission) and within one year of diagnosis. Furthermore, the predicted SQL fails to restrict microbiology events to the one\u2010year window and mismatches the exact long_title string. These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning of the query, so they cannot be fixed locally.",
            "error_type": "Global: incorrect join logic"
        },
        "8f72cf5cec372b0b9bd6f99a": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Among patients diagnosed with staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified, what are the top three most common lab tests conducted within the same hospital visit since 1 year ago?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "SELECT d.label FROM diagnoses_icd JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id JOIN labevents ON admissions.hadm_id = labevents.hadm_id JOIN d_labitems d ON labevents.itemid = d.itemid WHERE d_icd_diagnoses.long_title = 'staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified' AND datetime(admissions.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY d.label ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['magnesium'], ['ph'], ['sodium']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query by joining diagnoses, admissions, and lab events directly and only filtering on admissions.admittime within the last year. It fails to:\n1. Restrict lab tests to those occurring after the diagnosis time within the same hospital visit.\n2. Filter lab events based on their own charttime relative to one year ago.\n3. Use a ranking function or subquery to ensure the correct top-N logic based on the count of lab tests following diagnosis.\nThese issues span multiple clauses (JOIN conditions, WHERE filters, grouping/ordering logic) and require restructuring the query\u2019s core approach, not just a local fix to a specific clause or reference.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "fc6b47d6d81425ba4da6b862": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many prescriptions were prescribed for oxycodone-acetaminophen (5mg-325mg)?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'oxycodone-acetaminophen (5mg-325mg)'",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE drug = 'oxycodone-acetaminophen 5mg-325mg';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong string literal in the WHERE clause: 'oxycodone-acetaminophen 5mg-325mg' instead of the correct 'oxycodone-acetaminophen (5mg-325mg)'. This is a localized mistake in the value used for filtering and can be fixed by correcting the literal without altering the overall structure or logic of the query.",
            "error_type": "Local: value parsing error"
        },
        "8904dd5587873880719ae639": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "List the top five most frequent specimens that patients are tested within the same hospital visit after receiving dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic.",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH target_procedure AS ( SELECT icd_code FROM d_icd_procedures WHERE long_title = 'Dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic' ) SELECT m.spec_type_desc FROM procedures_icd p JOIN target_procedure tp ON p.icd_code = tp.icd_code JOIN microbiologyevents m ON p.hadm_id = m.hadm_id GROUP BY m.spec_type_desc ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['bronchial washings']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the critical WHERE clause enforcing that microbiology events occur after the dilation procedure (T1.charttime < T2.charttime) and fails to correlate on subject, thus joining all specimens in the same admission regardless of timing. This is not a simple typo or minor syntax fix but a fundamental join/logical error that changes the query\u2019s overall meaning and requires restructuring (adding the time filter and proper join). Therefore, it is a Global error.",
            "error_type": "Global: join logic error"
        },
        "a8d68ef0763c469039fcfab5": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What was the name of the procedure that was performed two or more times last month on patient 10027602?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, COUNT( procedures_icd.charttime ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND datetime(procedures_icd.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 >= 2 )",
            "pred_sql": "SELECT d.long_title FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE p.subject_id = 10027602 AND strftime('%Y-%m', p.charttime) = strftime('%Y-%m', datetime('2100-12-31', '-1 month')) GROUP BY p.icd_code HAVING COUNT(*) >= 2;",
            "gold_answer": "[['arteriography of cerebral arteries']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the filtering logic: instead of restricting procedures via admissions.hadm_id for patient 10027602 it directly filters on procedures_icd.subject_id. This alters the intended table relationship and overall query logic (it bypasses the admissions join/subquery), leading to incorrect results. Fixing it requires restructuring the query (adding the proper admissions subquery or join), not just tweaking a single clause. Therefore, this is a Global error.",
            "error_type": "Global: incorrect-join-condition"
        },
        "d5e534e863c69bb1b9c84c51": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When was the first time patient 10029291 was measured with a heart rate greater than 76.0 on the first icu visit?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum > 76.0 ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(chartevents.charttime) AS first_time FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid INNER JOIN ( SELECT stay_id, intime, outtime FROM icustays WHERE subject_id = 10029291 ORDER BY intime ASC LIMIT 1 ) AS first_icu ON chartevents.stay_id = first_icu.stay_id WHERE d_items.label = 'heart rate' AND chartevents.valuenum > 76.0 AND chartevents.charttime BETWEEN first_icu.intime AND first_icu.outtime;",
            "gold_answer": "[['2100-02-25 04:03:00']]",
            "pred_answer": "[['2100-02-25 05:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the icustays.outtime IS NOT NULL filter when selecting the \"first ICU visit.\" Because of this missing WHERE clause condition, it picks the wrong ICU stay and thus returns a later heart\u2010rate measurement (05:00 instead of 04:03). This is a localized mistake in the subquery\u2019s filter, not a restructuring of the overall query logic or multiple clauses.",
            "error_type": "Local: missing-filter error"
        },
        "5516832b69c00c4c165c31d1": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Tell me the top three most frequent procedures that patients are given within the same month after receiving insertion of endotracheal tube since 2100.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'insertion of endotracheal tube' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH insertion_events AS ( SELECT p.subject_id, p.hadm_id, p.charttime, p.icd_code AS insertion_code FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'insertion of endotracheal tube' AND datetime(p.charttime) >= datetime('2100-01-01') ) SELECT dp.long_title FROM insertion_events i JOIN procedures_icd p ON i.subject_id = p.subject_id AND i.hadm_id = p.hadm_id JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE datetime(p.charttime, 'start of month') = datetime(i.charttime, 'start of month') AND p.icd_code != i.insertion_code GROUP BY dp.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['arteriography of cerebral arteries'], ['closed [endoscopic] biopsy of bronchus'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['endovascular (total) embolization or occlusion of head and neck vessels'], ['insertion or replacement of external ventricular drain [evd]'], ['other electroshock therapy'], ['percutaneous [endoscopic] gastrostomy [peg]']]",
            "pred_answer": "[['central venous catheter placement with guidance'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['other electroshock therapy']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies insertion events and joins to subsequent procedures in the same month, groups by procedure name, and takes the top three by count. However, it fails to enforce that the follow\u2010up procedures occur after the insertion event\u2014it only matches on the same month without a p.charttime > i.charttime condition. This mistake is confined to a single WHERE clause condition (a temporal filter) and does not require restructuring the overall query logic or joins.",
            "error_type": "Local: other-temporal error"
        },
        "333d052bb087db30ae9a9518": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Can you get me the top five diagnoses that have the highest 3-month mortality rate?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T4.icd_code FROM ( SELECT T3.icd_code, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C3 FROM ( SELECT T2.icd_code, AVG(C1) AS C2 FROM ( SELECT T1.icd_code, ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T1.charttime) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id, diagnoses_icd.icd_code HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id ) AS T2 GROUP BY T2.icd_code ) AS T3 ) AS T4 WHERE T4.C3 <= 5 )",
            "pred_sql": "WITH first_diagnoses AS ( SELECT subject_id, icd_code, MIN(charttime) AS first_charttime FROM diagnoses_icd GROUP BY subject_id, icd_code ) SELECT long_title FROM ( SELECT d.long_title, (SUM(CASE WHEN p.dod IS NOT NULL AND p.dod BETWEEN fd.first_charttime AND date(fd.first_charttime, '+91 days') THEN 1 ELSE 0 END) * 1.0 / COUNT(*)) AS mortality_rate FROM first_diagnoses fd JOIN patients p ON fd.subject_id = p.subject_id JOIN d_icd_diagnoses d ON fd.icd_code = d.icd_code GROUP BY fd.icd_code, d.long_title ) ORDER BY mortality_rate DESC LIMIT 5;",
            "gold_answer": "[['abnormal involuntary movements'], ['acidosis'], ['acquired hypertrophic pyloric stenosis'], ['acute and subacute hepatic failure with coma'], ['acute and subacute hepatic failure without coma'], ['acute kidney failure with lesion of tubular necrosis'], ['acute kidney failure, unspecified'], ['acute respiratory failure'], ['acute respiratory failure, unspecified whether with hypoxia or hypercapnia'], ['alcohol abuse, continuous'], ['alcohol abuse, in remission'], ['alcoholic cirrhosis of liver'], ['alcoholic polyneuropathy'], ['altered mental status'], ['amyotrophic lateral sclerosis'], ['anemia in other chronic diseases classified elsewhere'], ['arthrodesis status'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['atherosclerotic heart disease of native coronary artery with other forms of angina pectoris'], ['automatic implantable cardiac defibrillator in situ'], ['benign neoplasm of cerebral meninges'], ['cachexia'], ['chronic airway obstruction, not elsewhere classified'], ['chronic hepatitis c with hepatic coma'], ['chronic hepatitis c without mention of hepatic coma'], ['chronic kidney disease, stage 3 (moderate)'], ['chronic kidney disease, stage iii (moderate)'], ['chronic kidney disease, unspecified'], ['chronic osteomyelitis, site unspecified'], ['chronic systolic heart failure'], ['cirrhosis of liver without mention of alcohol'], ['delirium due to conditions classified elsewhere'], ['dementia, unspecified, without behavioral disturbance'], ['dependence on respirator, status'], ['diabetes mellitus without mention of complication, type ii or unspecified type, uncontrolled'], ['diarrhea'], ['diplopia'], ['do not resuscitate status'], ['early satiety'], ['edema'], ['embolism and thrombosis of other specified artery'], ['encounter for palliative care'], ['epistaxis'], ['esophageal varices without mention of bleeding'], ['glucocorticoid deficiency'], ['hemoperitoneum'], ['hemorrhage of gastrointestinal tract, unspecified'], ['hip joint replacement'], ['hyperosmolality and/or hypernatremia'], ['hyperpotassemia'], ['hypertensive chronic kidney disease with stage 1 through stage 4 chronic kidney disease, or unspecified chronic kidney disease'], ['hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified'], ['hypertrophy (benign) of prostate without urinary obstruction and other lower urinary tract symptom (luts)'], ['hypo-osmolality and hyponatremia'], ['hyposmolality and/or hyponatremia'], ['hypospadias'], ['hypotension, unspecified'], ['hypovolemic shock'], ['ileus, unspecified'], ['infection with microorganisms resistant to penicillins'], ['jaundice, unspecified, not of newborn'], ['malignant neoplasm of body of pancreas'], ['malignant neoplasm of liver, secondary'], ['malignant neoplasm of rectosigmoid junction'], ['metabolic encephalopathy'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['mycosis fungoides, unspecified site, extranodal and solid organ sites'], ['natural blood and blood products causing adverse effects in therapeutic use'], ['neoplasm related pain (acute) (chronic)'], ['nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney'], ['nonalcoholic steatohepatitis (nash)'], ['nonspecific elevation of levels of transaminase or lactic acid dehydrogenase [ldh]'], ['old myocardial infarction'], ['oliguria and anuria'], ['other and unspecified coagulation defects'], ['other chronic pulmonary heart diseases'], ['other cirrhosis of liver'], ['other closed fractures of distal end of radius (alone)'], ['other complications due to other vascular device, implant, and graft'], ['other constipation'], ['other convulsions'], ['other dependence on machines, supplemental oxygen'], ['other diseases of lung, not elsewhere classified'], ['other disorders of muscle, ligament, and fascia'], ['other disorders of neurohypophysis'], ['other dysphagia'], ['other fluid overload'], ['other iatrogenic hypotension'], ['other postprocedural shock, initial encounter'], ['other secondary thrombocytopenia'], ['other septicemia due to gram-negative organisms'], ['other specified analgesics and antipyretics causing adverse effects in therapeutic use'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['other specified disorders of peritoneum'], ['other specified disorders of stomach and duodenum'], ['other specified forms of hearing loss'], ['other specified pre-operative examination'], ['paralytic ileus'], ['paroxysmal ventricular tachycardia']]",
            "pred_answer": "[['anemia in other chronic diseases classified elsewhere'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['sepsis due to escherichia coli [e. coli]'], ['unspecified severe protein-calorie malnutrition']]",
            "is_correct": false,
            "reasoning": "The predicted SQL computes a mortality_rate for each diagnosis by dividing the count of deaths within 91 days by the total count of first diagnoses, but it never filters out diagnoses that occurred less than 3 months before the reference date. As a result, recent diagnoses without the full 3-month follow-up window are included in the denominator (and typically have zero deaths), biasing mortality rates downward. This omission is not a minor syntax mistake in one clause but a fundamental flaw in the overall inclusion logic that changes the query\u2019s intended meaning and results.",
            "error_type": "Global: temporal filtering error"
        },
        "bb4987c184d509429ea2e482": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many patients received a plasma lab test since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'plasma' ) AND datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT le.subject_id) FROM labevents le JOIN d_labitems dli ON le.itemid = dli.itemid WHERE dli.label LIKE '%plasma%' AND le.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The only difference is in the WHERE clause\u2019s label filter: the ground truth requires an exact match (d_labitems.label = 'plasma') while the prediction uses a broad LIKE '%plasma%', which pulls in extra rows and inflates the count. This is a localized mistake in the condition, not a structural or multi-clause logic error, so it\u2019s a Local error.",
            "error_type": "Local: where-clause value-matching error"
        },
        "227b5e0e2e01df82980de19b": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since patient 10039831's admission in the icu, how many days have elapsed?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', icustays.intime) ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(intime)) AS INTEGER) AS days_elapsed FROM icustays WHERE subject_id = 10039831 ORDER BY intime DESC LIMIT 1;",
            "gold_answer": "[['0.182']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the logic for selecting the relevant ICU stay. Instead of restricting to the current ICU stay (outtime IS NULL) for the patient\u2019s hospital admission, it simply picks the most recent ICU stay by ORDER BY and ignores the outtime filter and hadm_id relationship. This is not a minor typo or localized mistake\u2014it alters the core query structure and the intended meaning, so it is a global error.",
            "error_type": "Global: flawed join/filter logic"
        },
        "877c95aa1614afdd964a50bb": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What was patient 10018081's first output time of foley yesterday?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'foley' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day') ORDER BY outputevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(outputevents.charttime) AS first_output_time FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10018081 AND d_items.label = 'Foley catheter' AND datetime(outputevents.charttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day');",
            "gold_answer": "[['2100-12-30 00:00:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the wrong label ('Foley catheter' instead of the correct 'foley') in the WHERE clause against d_items.label. This is a localized mistake in the value literal\u2014not a structural issue with joins, subqueries, or overall query logic. Correcting the label string would allow the query to return the proper result without altering its structure or join logic.",
            "error_type": "Local: value parsing error"
        },
        "3572f327afb3fe1874b00603": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients that are given heparin flush (100 units/ml) in 2100, what are the top four drugs that they are prescribed afterwards within the same month?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'heparin flush (100 units/ml)' AND strftime('%Y',prescriptions.starttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE strftime('%Y',prescriptions.starttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH heparin_item AS ( SELECT itemid FROM d_items WHERE label = 'heparin flush (100 units/ml)' ) SELECT p.drug FROM prescriptions p WHERE EXISTS ( SELECT 1 FROM inputevents i JOIN heparin_item h ON i.itemid = h.itemid WHERE i.subject_id = p.subject_id AND strftime('%Y-%m', i.starttime) = strftime('%Y-%m', p.starttime) AND p.starttime > i.starttime AND i.starttime BETWEEN '2100-01-01' AND '2100-12-31' ) GROUP BY p.drug ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['0.83% sodium chloride'], ['acetaminophen iv'], ['aspirin'], ['clonidine patch 0.3 mg/24 hr'], ['iso-osmotic dextrose'], ['lactated ringers'], ['nicardipine iv'], ['olanzapine (disintegrating tablet)'], ['ondansetron'], ['pantoprazole'], ['phosphorus'], ['potassium chloride replacement (critical care and oncology)'], ['quetiapine fumarate'], ['ranitidine (liquid)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['vancomycin'], ['vial']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets where to find the heparin flush events and which table drives the logic. It uses inputevents joined to d_items to identify 'heparin flush (100 units/ml)' instead of directly querying the prescriptions table (as the ground truth does) and then joining to subsequent prescriptions. This error spans multiple clauses and alters the entire structure and meaning of the query\u2014it uses the wrong source table for the key event and thus never matches any prescriptions afterward, leading to an empty result.",
            "error_type": "Global: incorrect-table-join error"
        },
        "4bf5d4ab59d0590189c5795f": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many days have elapsed since the last time on the current hospital visit that patient 10021487 was diagnosed with other chronic nonalcoholic liver disease?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other chronic nonalcoholic liver disease' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(MAX(d.charttime))) AS INTEGER) AS days_elapsed FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code JOIN admissions a ON d.hadm_id = a.hadm_id WHERE d.subject_id = 10021487 AND a.dischtime IS NULL AND dicd.long_title = 'other chronic nonalcoholic liver disease';",
            "gold_answer": "[['24.983']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies the most recent diagnosis and filters for the current admission but then casts the day difference to an integer, truncating the decimal portion. This is a localized error in the SELECT clause (value parsing/formatting) and does not affect the overall query structure or logic of joining tables and filtering. The mistake can be fixed by removing the CAST to integer or preserving the float, without altering the joins or filters.",
            "error_type": "Local: value parsing error"
        },
        "e6bb44f8f32c8755a047c619": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many hours has it been since patient 10004733's last stay in the careunit medicine on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', transfers.intime) ) FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND transfers.careunit = 'medicine' ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT (julianday(COALESCE(t.outtime, '2100-12-31 23:59:00')) - julianday(t.intime)) * 24 AS hours_since_last_stay FROM transfers t JOIN admissions a ON t.hadm_id = a.hadm_id WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND t.careunit = 'medicine' AND t.intime = ( SELECT MAX(intime) FROM transfers WHERE hadm_id = a.hadm_id AND careunit = 'medicine' );",
            "gold_answer": "[['27.933']]",
            "pred_answer": "[['21.738']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s JOIN and filtering logic correctly identifies the patient\u2019s current hospital admission and the most recent medicine careunit transfer. The error lies solely in the SELECT expression: it uses COALESCE(t.outtime, current_time) to compute the duration of the stay, instead of calculating the time elapsed since the transfer\u2019s start (intime) to now. This is a localized mistake in the temporal calculation, not a structural or multi-clause logic error.",
            "error_type": "Local: other-temporal error"
        },
        "fb35e5711bd598ff9c938892": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What was the name of the specimen test that patient 10038992 had for the last time last month?",
            "gold_sql": "SELECT microbiologyevents.spec_type_desc FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038992 ) AND datetime(microbiologyevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE subject_id = 10038992 AND datetime(charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') ORDER BY charttime DESC LIMIT 1;",
            "gold_answer": "[['blood culture']]",
            "pred_answer": "[['blood culture, routine']]",
            "is_correct": false,
            "reasoning": "The predicted query errs only in the SELECT clause by choosing microbiologyevents.test_name instead of spec_type_desc. Everything else (filtering by month, ordering, LIMIT) maintains the same structure and logical flow. This is a localized column\u2010reference mistake, not a structural or multi\u2010clause logic error.",
            "error_type": "Local: column reference error"
        }
    },
    "llm2sql": {
        "5be413e7ffdc64923bbd30db": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How many patients had other open umbilical herniorrhaphy two or more times?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other open umbilical herniorrhaphy' ) GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(DISTINCT pi.subject_id) FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code WHERE dp.long_title = 'other open umbilical herniorrhaphy' GROUP BY pi.subject_id HAVING COUNT(pi.row_id) >= 2;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall structure (filtering by the procedure name, grouping by subject_id, and requiring at least two occurrences) matches the intent of the ground truth. However, it misuses COUNT(DISTINCT pi.subject_id) together with GROUP BY pi.subject_id and HAVING. This aggregation mistake is isolated to the SELECT/GROUP BY clause and can be fixed by removing the unnecessary DISTINCT (or by restructuring only the aggregation), without altering the join logic or the rest of the query. Hence, this is a localized aggregation error.",
            "error_type": "Local: grouping/aggregation error"
        },
        "9b48cb64daf9115952eca1ce": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among patients diagnosed with pneumonitis due to inhalation of food and vomit, what are the top five most common lab tests that followed within the same month this year?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'pneumonitis due to inhalation of food and vomit' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT D.label FROM ( SELECT L.itemid, COUNT(*) as frequency FROM diagnoses_icd AS DI JOIN d_icd_diagnoses as DID ON DI.icd_code = DID.icd_code JOIN labevents AS L ON DI.subject_id = L.subject_id AND DI.hadm_id = L.hadm_id WHERE DID.long_title = 'pneumonitis due to inhalation of food and vomit' AND datetime(DI.charttime, 'start of month') = datetime(L.charttime, 'start of month') AND datetime(DI.charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY L.itemid ORDER BY frequency DESC LIMIT 5 ) AS TopLabs JOIN d_labitems AS D ON TopLabs.itemid = D.itemid;",
            "gold_answer": "[['alanine aminotransferase (alt)'], ['alkaline phosphatase'], ['anion gap'], ['asparate aminotransferase (ast)'], ['base excess'], ['bicarbonate'], ['bilirubin, total'], ['calcium, total'], ['calculated total co2'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['pco2'], ['ph'], ['phosphate'], ['platelet count'], ['po2'], ['pt'], ['ptt'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['glucose'], ['magnesium'], ['phosphate'], ['sodium'], ['urea nitrogen']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by joining diagnoses_icd and labevents on both subject_id and hadm_id, thereby restricting lab results to the same hospital admission rather than simply the same calendar month. Furthermore, it omits the requirement that the lab test occur after the diagnosis (DI.charttime < L.charttime). These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning and result set, not just a localized syntax mistake.",
            "error_type": "Global: incorrect join and missing temporal condition"
        },
        "8556595488ba5fe19d017b8a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Tell me the sum of the input volume for patient 10022017 since 293 days ago.",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-293 day')",
            "pred_sql": "SELECT SUM(amount) AS total_input_volume FROM inputevents WHERE subject_id = 10022017 AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-293 days')",
            "gold_answer": "[['24007.0']]",
            "pred_answer": "no such column: amount",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `amount` instead of the correct `totalamount`. This is a localized mistake in the SELECT clause and does not alter the overall logic or structure of the query. It can be fixed by using the correct column name without restructuring the query.",
            "error_type": "Local: table/column reference error"
        },
        "58a8d29f9daf5d4d12948ba5": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was the last time patient 10012853 was diagnosed with dizziness and giddiness in 2100?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'dizziness and giddiness' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10012853 ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(diagnoses_icd.charttime) AS last_diagnosis_time FROM diagnoses_icd JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE diagnoses_icd.subject_id = 10012853 AND d_icd_diagnoses.long_title = 'Dizziness and giddiness' AND strftime('%Y', diagnoses_icd.charttime) = '2100'",
            "gold_answer": "[['2100-04-23 15:36:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the literal 'Dizziness and giddiness' (capitalized) instead of the exact 'dizziness and giddiness' used in the diagnosis table, causing the filter to return no rows. This is a localized mismatch in the WHERE clause string literal and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "c1ade02d0de383345c62fe4b": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Count the number of times patient 10025612 received dextrose 5% since 186 days ago.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10025612 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'dextrose 5%' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-186 day')",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE subject_id = 10025612 AND drug = 'dextrose 5%' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-186 days');",
            "gold_answer": "[['37.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to count occurrences of 'dextrose 5%', whereas the correct logic requires counting entries in inputevents (joined through icustays and admissions) for input administrations. This is not a simple reference or syntax mistake but a fundamental change in the query\u2019s structure and data relationships, altering the intended meaning. Fixing it requires replacing the entire source table and join logic, so it is a global error.",
            "error_type": "Global: incorrect table selection"
        },
        "c114793bdbb1cbf102c8f53a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the top five drugs that are prescribed to patients within the same month after they are prescribed ondansetron odt this year?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'ondansetron odt' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT P2.drug FROM prescriptions AS P1 INNER JOIN prescriptions AS P2 ON P1.subject_id = P2.subject_id AND strftime('%Y-%m', P1.starttime) = strftime('%Y-%m', P2.starttime) WHERE P1.drug = 'ondansetron odt' AND strftime('%Y', P1.starttime) = '2100' AND P2.drug != 'ondansetron odt' GROUP BY P2.drug ORDER BY COUNT(P2.drug) DESC LIMIT 5;",
            "gold_answer": "[['5% dextrose (excel bag)'], ['bag'], ['iso-osmotic dextrose'], ['lidocaine 1%'], ['midodrine'], ['sodium chloride 0.9%'], ['vancomycin']]",
            "pred_answer": "[['bag'], ['heparin'], ['iso-osmotic dextrose'], ['sodium chloride 0.9%'], ['vancomycin']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly joins prescriptions on the same patient and same month/year, but it omits the crucial condition that the second drug prescription must occur after the ondansetron odt prescription (i.e., P1.starttime < P2.starttime). This missing temporal filter leads to including drugs prescribed before ondansetron odt. The error is localized to the WHERE/JOIN condition without changing the overall query structure or table relationships.",
            "error_type": "Local: missing temporal filter"
        },
        "330a5ea38d543d09f93a3e3c": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Can you tell me the first value of the mchc test for patient 10039708 on the last hospital visit?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039708 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mchc' ) ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT L.valuenum FROM labevents L + JOIN d_labitems D ON L.itemid = D.itemid + JOIN admissions A ON L.hadm_id = A.hadm_id + WHERE A.subject_id = 10039708 AND D.label = 'mchc' + AND A.admittime = (SELECT MAX(admittime) FROM admissions WHERE subject_id = 10039708 AND dischtime IS NOT NULL) + ORDER BY L.charttime ASC LIMIT 1;",
            "gold_answer": "[['32.3']]",
            "pred_answer": "near \"+\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces stray '+' characters between clauses (e.g., \"FROM labevents L + JOIN d_labitems...\") causing a syntax error near '+'. This is a localized syntax mistake in the JOIN/SELECT construction and does not involve any major logical restructuring or incorrect table relationships. It can be fixed by removing the '+' signs. Therefore, it is a Local error.",
            "error_type": "Local: syntax error"
        },
        "57e1eb9257a000ee96aa3a4e": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the four most frequently prescribed medications for patients in their 20s since 2100?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND strftime('%Y',prescriptions.starttime) >= '2100' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT P.drug FROM prescriptions AS P JOIN patients AS PT ON P.subject_id = PT.subject_id WHERE (2100 - CAST(SUBSTR(PT.dob,1,4) AS INTEGER)) BETWEEN 20 AND 29 AND datetime(P.starttime) >= datetime('2100-01-01 00:00:00') GROUP BY P.drug ORDER BY COUNT(P.drug) DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['metoprolol tartrate'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended query logic in two major ways. First, it computes patient age using patients.dob instead of using the admissions.age column as required by the assumptions, thus joining an unrelated table and misfiltering the cohort. Second, it replaces the DENSE_RANK window function\u2014which preserves ties for the top four most frequent drugs\u2014with a simple ORDER BY ... LIMIT 4, causing it to drop a drug that should be included when there is a tie. These errors span multiple clauses (JOIN, WHERE, SELECT, ranking) and require restructuring the query, so they are Global.",
            "error_type": "Global: incorrect age calculation and ranking strategy"
        },
        "c883954cdceee4237e2e39f4": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the top five most frequent procedures that patients received within the same month after being diagnosed with other nonspecific abnormal finding of lung field?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other nonspecific abnormal finding of lung field' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT P.long_title, COUNT(*) as frequency nFROM procedures_icd PI nJOIN diagnoses_icd DI ON PI.subject_id = DI.subject_id AND PI.hadm_id = DI.hadm_id nJOIN d_icd_procedures P ON PI.icd_code = P.icd_code nJOIN d_icd_diagnoses D ON DI.icd_code = D.icd_code nWHERE D.long_title = 'other nonspecific abnormal finding of lung field' n AND strftime('%Y-%m', PI.charttime) = strftime('%Y-%m', DI.charttime) nGROUP BY P.long_title nORDER BY frequency DESC nLIMIT 5;",
            "gold_answer": "[['hemodialysis']]",
            "pred_answer": "near \"nFROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains stray 'n' characters before FROM and JOIN clauses (e.g., 'nFROM'), causing a syntax error. This is a localized issue in the SELECT statement formatting and does not affect the overall query logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "fb4971631664b9b3ac5360bd": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was patient 10019568's first minimum urea nitrogen in 02/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019568 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '02' ORDER BY labevents.valuenum ASC, labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(T1.charttime) AS first_minimum_time FROM labevents AS T1 INNER JOIN d_labitems AS T2 ON T1.itemid = T2.itemid WHERE T1.subject_id = 10019568 AND T2.label = 'urea nitrogen' AND datetime(T1.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') AND strftime('%m', T1.charttime) = '02' ORDER BY T1.valuenum ASC, T1.charttime ASC LIMIT 1",
            "gold_answer": "[['2100-02-05 04:30:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic used to find the first minimum urea nitrogen measurement in February. It replaces the hadm_id subquery with a direct subject_id filter, misapplies the date filter by using 'start of month' instead of restricting to February of this year via 'start of year' plus month check, and aggregates MIN(charttime) instead of ordering by valuenum then charttime. These errors span multiple clauses and change the intended query structure and logic, making this a global error.",
            "error_type": "Global: incorrect overall query logic"
        },
        "1279690fd165c9155890598f": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the total void output in patient 10022880 on 04/03/2100?",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022880 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'void' AND d_items.linksto = 'outputevents' ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-04-03'",
            "pred_sql": "SELECT SUM(value) FROM outputevents WHERE subject_id = 10022880 AND datetime(charttime, 'start of day') = datetime('2100-04-03 23:59:00', 'start of day', '-0 day')",
            "gold_answer": "[['200.0']]",
            "pred_answer": "[['1260.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters by subject_id and date, but omits the condition restricting outputevents to the 'void' item label (and linksto). This is a missing WHERE clause filter, which is a localized issue that can be fixed by adding the appropriate condition. The overall query structure and joins remain correct.",
            "error_type": "Local: missing-where-filter"
        },
        "8aaee78b4bbe7adab81c6104": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the five most commonly taken specimens that patients are tested within 2 months after receiving a other endovascular procedures on other vessels procedure?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other endovascular procedures on other vessels' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT me.spec_type_desc, COUNT(*) AS count FROM procedures_icd AS pi JOIN microbiologyevents AS me ON pi.subject_id = me.subject_id AND pi.hadm_id = me.hadm_id WHERE pi.icd_code = (SELECT icd_code FROM d_icd_procedures WHERE long_title = 'endovascular procedures on other vessels') AND datetime(me.charttime) <= datetime(pi.charttime, '+2 months') GROUP BY me.spec_type_desc ORDER BY count DESC LIMIT 5;",
            "gold_answer": "[['stool'], ['urine']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining microbiologyevents and procedures_icd on both subject_id and hadm_id (restricting to the same hospital stay) and only applying an upper time bound, whereas the ground truth joins on subject_id across stays with both lower and upper time bounds. It also uses the wrong procedure title filter. These are structural and semantic join errors that alter the intended meaning, not just a minor syntax fix.",
            "error_type": "Global: incorrect join logic"
        },
        "60582cd00e26838f77d59095": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How much did patient 10031757 weigh the first time it was measured since 3 months ago?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-3 month') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT valuenum  n  FROM chartevents  n  WHERE subject_id = 10031757  n  AND itemid = (SELECT itemid FROM d_items WHERE label = 'daily weight')  n  AND datetime(charttime) >= datetime('2100-12-31 23:59:00','start of day','-365/4 day')  n  ORDER BY charttime ASC  n  LIMIT 1;",
            "gold_answer": "[['109.6']]",
            "pred_answer": "near \"n\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces invalid aliasing with stray 'n' tokens (e.g., \"SELECT valuenum  n  FROM chartevents  n\" and \"WHERE subject_id = 10031757  n\"), causing a syntax error (near \"n\"). This is a localized mistake in the query syntax rather than a flaw in the overall logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "36e6e4b77fb6bc869d511e26": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When did patient 10027602 enter the hospital the last time via emergency room?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.admission_location = 'emergency room' AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admittime FROM admissions WHERE subject_id = 10027602 AND admission_type = 'emergency' ORDER BY admittime DESC LIMIT 1",
            "gold_answer": "[['2100-11-23 12:05:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only filters on admission_type = 'emergency' instead of admission_location = 'emergency room' and also omits the requirement dischtime IS NOT NULL. Both mistakes occur in the WHERE clause and do not alter the overall query structure or table relationships, making this a localized filter/column reference error.",
            "error_type": "Local: column reference error"
        },
        "25675cd3569afa3bcdb2d55a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among the patients who are prescribed clonidine patch 0.3 mg/24 hr, what are the three most frequently prescribed drugs at the same time since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'clonidine patch 0.3 mg/24 hr' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT drug, COUNT(*) as frequency FROM prescriptions WHERE hadm_id IN ( SELECT DISTINCT hadm_id FROM prescriptions WHERE drug = 'clonidine patch 0.3 mg/24 hr' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-1 year') ) AND drug != 'clonidine patch 0.3 mg/24 hr' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY drug ORDER BY frequency DESC LIMIT 3 )",
            "gold_answer": "[['clonidine patch 0.3 mg/24 hr']]",
            "pred_answer": "[['iso-osmotic dextrose'], ['olanzapine (disintegrating tablet)'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses hadm_id membership to find co-prescriptions within the same admission rather than matching prescriptions that occurred at the exact same starttime. This violates the core requirement of finding drugs prescribed \"at the same time\" and alters the overall logic of the query, not just a local clause. The error spans multiple parts of the query (subquery selection and grouping) and requires restructuring the join condition to match timestamps, making it a global logical error.",
            "error_type": "Global: incorrect join logic"
        },
        "d11cac5a8a03f36ce1590487": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the count of patients who stayed in neurology?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT transfers.hadm_id FROM transfers WHERE transfers.careunit = 'neurology' )",
            "pred_sql": "SELECT COUNT(DISTINCT icustays.subject_id) FROM icustays WHERE first_careunit = 'neurology' OR last_careunit = 'neurology';",
            "gold_answer": "[['19.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the icustays table and its first_careunit/last_careunit columns instead of using the transfers table with careunit to identify neurology stays. This is a fundamental change in data source and logic\u2014ICU stays are not the same as careunit transfers, and it fails to capture all neurology stays. The error spans the main FROM clause and entire filtering logic, so it requires restructuring the query rather than a minor fix.",
            "error_type": "Global: wrong table/structure error"
        },
        "10ba039ccd53a6d92815ad48": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among those who are prescribed with fish oil (omega 3), what are the five most commonly prescribed drugs at the same time this year?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'fish oil (omega 3)' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH FishOilAdmissions AS ( SELECT DISTINCT hadm_id FROM prescriptions WHERE drug = 'fish oil (omega 3)' AND strftime('%Y', starttime) = '2100' ) SELECT drug FROM prescriptions WHERE hadm_id IN (SELECT hadm_id FROM FishOilAdmissions) AND strftime('%Y', starttime) = '2100' AND drug != 'fish oil (omega 3)' GROUP BY drug ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['albumin 5% (25g / 500ml)'], ['aspirin'], ['aspirin ec'], ['chlorhexidine gluconate 0.12% oral rinse'], ['clopidogrel'], ['docusate sodium'], ['docusate sodium (liquid)'], ['fenofibrate'], ['fish oil (omega 3)'], ['ibuprofen'], ['omeprazole'], ['pravastatin'], ['topiramate (topamax)'], ['vitamin d']]",
            "pred_answer": "[['0.9% sodium chloride'], ['0.9% sodium chloride (mini bag plus)'], ['albumin 25% (12.5g / 50ml)'], ['insulin'], ['metoprolol tartrate']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply selects all other drugs prescribed at any time during the same admission year for patients who ever got fish oil, rather than only those drugs given at the exact same timestamp as the fish oil prescription. It never joins or equates the prescription times, so it fundamentally changes the intended logic of \u201cat the same time.\u201d Fixing this requires restructuring the query to join the prescriptions table on the time of the fish oil event, not just filtering by admission or year. This is a global logic/join error.",
            "error_type": "Global: incorrect join logic"
        },
        "6db688d075e7d3fad38757ad": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was the first time patient 10008454 came to the hospital through emergency room?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10008454 AND admissions.admission_location = 'emergency room' AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT admittime FROM admissions WHERE subject_id = 10008454 AND admission_type = 'emergency' ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['2100-12-02 06:31:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause (admission_type = 'emergency' instead of admission_location = 'emergency room') and omits the dischtime IS NOT NULL filter. This is a localized mistake in the predicate without altering the overall query structure or table relationships.",
            "error_type": "Local: table/column reference error"
        },
        "a90dde825f1d13fd69a5b809": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the last medication that patient 10019385 was prescribed?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019385 ) ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10019385 ORDER BY stoptime DESC LIMIT 1",
            "gold_answer": "[['acetaminophen']]",
            "pred_answer": "[['aspirin']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the correct table but orders by the wrong timestamp column (stoptime instead of starttime). This is a mistake confined to the ORDER BY clause and does not alter the overall query structure or table relationships.",
            "error_type": "Local: ORDER BY column error"
        },
        "c29ee716c7d8f7b58212f716": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the three most usual lab tests for patients in their 50s since 2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) AND strftime('%Y',labevents.charttime) >= '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d_labitems.label FROM labevents JOIN admissions ON labevents.subject_id = admissions.subject_id AND labevents.hadm_id = admissions.hadm_id JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE admissions.age BETWEEN 50 AND 59 AND admissions.admittime >= '2100-01-01' GROUP BY d_labitems.label ORDER BY COUNT(labevents.itemid) DESC LIMIT 3;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['sodium']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['hemoglobin']]",
            "is_correct": false,
            "reasoning": "The predicted query misplaces the time filter: it uses admissions.admittime >= '2100-01-01' instead of filtering labevents.charttime by year >= 2100. This mistake is confined to the WHERE clause and does not alter the joins or overall structure of the query, so it is a localized (clause-level) error involving a temporal condition.",
            "error_type": "Local: temporal filter error"
        },
        "de9eceabb0bb25042457a7fc": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Count the number of patients who underwent a microbiological blood culture ( myco/f lytic bottle) test.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'blood culture ( myco/f lytic bottle)' )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) FROM microbiologyevents WHERE test_name = 'myco/f lytic bottle';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on the wrong column: it uses test_name = 'myco/f lytic bottle' instead of spec_type_desc = 'blood culture ( myco/f lytic bottle)'. This is a localized column reference error in the WHERE clause and does not require restructuring the query\u2019s overall logic.",
            "error_type": "Local: table/column reference error"
        },
        "4c81362dc30f4548514eecc8": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "List the top five most frequent microbiology tests given to patients after being diagnosed with hypokalemia within the same hospital visit since 1 year ago.",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hypokalemia' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH hypo_diagnoses AS ( SELECT DISTINCT di.subject_id, di.hadm_id, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN admissions a ON di.hadm_id = a.hadm_id WHERE dicd.long_title LIKE '%hypokalemia%' AND datetime(di.charttime) >= datetime('2100-12-31', '-1 year') ), tests_after_diagnoses AS ( SELECT me.test_name, me.charttime FROM microbiologyevents me JOIN hypo_diagnoses hd ON me.hadm_id = hd.hadm_id WHERE datetime(me.charttime) > datetime(hd.charttime) ) SELECT test_name FROM tests_after_diagnoses GROUP BY test_name ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['anaerobic bottle gram stain'], ['blood culture, routine'], ['c. difficile pcr'], ['gram stain'], ['mrsa screen'], ['respiratory culture'], ['urine culture']]",
            "pred_answer": "[['blood culture, routine'], ['gram stain'], ['mrsa screen'], ['respiratory culture'], ['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict microbiology events to within the last year (missing the WHERE datetime(me.charttime) >= datetime('2100-12-31 23:59:00','-1 year') filter) and uses LIMIT 5 instead of applying a DENSE_RANK to include ties. Both mistakes are confined to specific clauses (the WHERE filter in the CTE and the ORDER BY/LIMIT vs. DENSE_RANK logic) and can be corrected without altering the overall join structure or logic of the query.",
            "error_type": "Local: filter and ranking clause error"
        },
        "5a15ddb42e39b4c1050740e5": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the label for hand cleansing prior to procedure (picc)?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'hand cleansing prior to procedure (picc)'",
            "pred_sql": "// No suitable SQL query can be formed for the given request based on the data available in the provided tables.",
            "gold_answer": "[['hand cleansing prior to procedure (picc)'], ['hand cleansing prior to procedure (picc)']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to provide any query at all, effectively omitting the SELECT and FROM clauses needed to retrieve the label. This is a localized mistake in not referencing the appropriate table or columns, rather than a structural or multi-clause logic error. Therefore, it is classified as a Local error.",
            "error_type": "Local: missing-query error"
        },
        "7351edf011015936356fbbda": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How many days is it since patient 10039831 last stayed in the careunit surgical intensive care unit (sicu) on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', transfers.intime) ) FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND transfers.careunit = 'surgical intensive care unit (sicu)' ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND((JULIANDAY('2100-12-31 23:59:00') - JULIANDAY(T1.outtime))) AS days_since_last_sicu_stay FROM transfers T1 JOIN admissions T2 ON T1.hadm_id = T2.hadm_id WHERE T1.subject_id = 10039831 AND T1.careunit = 'sicu' AND T2.dischtime IS NULL ORDER BY T1.outtime DESC LIMIT 1;",
            "gold_answer": "[['0.182']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL misuses the careunit literal ('sicu' instead of 'surgical intensive care unit (sicu)') and references the wrong timestamp column (outtime instead of intime). These are errors confined to specific parts of the query (WHERE clause and SELECT expression) and do not alter the overall structure or logic of joining the current admission and transfers tables.",
            "error_type": "Local: column reference/value error"
        },
        "44c1c9cab30e5eb3386c026e": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the 3-month survival rate of patients who received pancrelipase 5000 treatment following a other alteration of consciousness diagnosis?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T3.charttime) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other alteration of consciousness' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'pancrelipase 5000' GROUP BY admissions.subject_id HAVING MAX(prescriptions.starttime) = prescriptions.starttime ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime ) AS T3 JOIN patients ON T3.subject_id = patients.subject_id )",
            "pred_sql": "WITH diagnosis_cte AS ( SELECT di.subject_id, di.hadm_id, MIN(di.charttime) AS diagnosis_date FROM diagnoses_icd di JOIN d_icd_diagnoses dd ON di.icd_code = dd.icd_code WHERE dd.long_title LIKE '%other alteration of consciousness%' GROUP BY di.subject_id, di.hadm_id ), pancrelipase_cte AS ( SELECT DISTINCT p.subject_id, p.hadm_id FROM prescriptions p WHERE p.drug = 'pancrelipase 5000' ), qualified_patients AS ( SELECT d.subject_id, d.hadm_id, d.diagnosis_date FROM diagnosis_cte d JOIN pancrelipase_cte p ON d.subject_id = p.subject_id AND d.hadm_id = p.hadm_id WHERE p.starttime > d.diagnosis_date ), death_check AS ( SELECT qp.subject_id, qp.diagnosis_date FROM qualified_patients qp LEFT JOIN patients pt ON qp.subject_id = pt.subject_id WHERE pt.dod IS NULL OR pt.dod > datetime(qp.diagnosis_date, '+3 months') ) SELECT CAST(COUNT(dc.subject_id) AS FLOAT) / COUNT(qp.subject_id) AS survival_rate FROM qualified_patients qp LEFT JOIN death_check dc ON qp.subject_id = dc.subject_id;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: p.starttime",
            "is_correct": false,
            "reasoning": "The error arises because the CTE \"pancrelipase_cte\" selects only subject_id and hadm_id, but later the query references p.starttime, which was not selected. This is a column reference error localized to the SELECT clause of the CTE and can be fixed by including starttime in the CTE. No overall query structure or logic needs to be changed beyond this clause.",
            "error_type": "Local: table/column reference error"
        },
        "0999752695013d993950a661": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Calculate the total volume of or urine for patient 10039831 today.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'or urine' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day')",
            "pred_sql": "SELECT SUM(value) AS total_urine_volume FROM outputevents WHERE subject_id = 10039831 AND datetime(charttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day')",
            "gold_answer": "[['880.0']]",
            "pred_answer": "[['2360.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to filter by the specific output type (\u201cor urine\u201d) and does not restrict to the patient\u2019s ICU stays, but instead uses only subject_id and date. This is a mistake in the WHERE clause\u2014a missing condition\u2014rather than a fundamental restructuring of joins or subqueries. Hence, it is a localized error in filtering.",
            "error_type": "Local: missing-filter error"
        },
        "d1ec7624c207f188607287da": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When on the last hospital visit did patient 10019777 receive their last venous catheterization, not elsewhere classified procedure?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019777 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT T2.charttime FROM admissions AS T1 JOIN procedures_icd AS T2 ON T1.hadm_id = T2.hadm_id WHERE T1.subject_id = 10019777 AND T1.dischtime IS NOT NULL AND T2.icd_code = 'icd9|3961' ORDER BY T1.admittime DESC, T2.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-03-05 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters procedures_icd.icd_code = 'icd9|3961' instead of looking up the correct ICD code via d_icd_procedures.long_title and then matching that code. This is a localized mistake in the literal/value used for icd_code, not a flaw in the overall join structure or query logic. It can be fixed by correcting the icd_code filter to the proper code (or by joining to d_icd_procedures), without changing the global query flow. Therefore, it\u2019s a Local error.",
            "error_type": "Local: value parsing error"
        },
        "cd938096c303c8fb80cb2cba": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Which are the five most common lab tests?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d_labitems.label, COUNT(*) AS test_count FROM labevents JOIN d_labitems ON labevents.itemid = d_labitems.itemid GROUP BY d_labitems.label ORDER BY test_count DESC LIMIT 5",
            "gold_answer": "[['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '1004.0'], ['glucose', '1113.0'], ['hematocrit', '982.0'], ['hemoglobin', '985.0'], ['sodium', '1001.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only uses ORDER BY \u2026 LIMIT 5 and returns both the label and the count, which (1) violates the requirement to use DENSE_RANK() for a top-N query when ties matter, and (2) returns the counts even though the question asks only for the test names. These are localized mistakes in the SELECT/ORDER BY/LIMIT portion rather than a fundamental restructuring of the query logic or table relationships.",
            "error_type": "Local: top-N ranking and projection error"
        },
        "9f32059ca482dff67930ffcd": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was patient 10015860's last value of anion gap in 10/this year?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015860 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'anion gap' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '10' ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT ce.valuenum FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10015860 AND di.label = 'anion gap' AND datetime(ce.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m', ce.charttime) = '10' ORDER BY ce.charttime DESC LIMIT 1;",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the wrong table and joins the wrong dimension table. It uses chartevents and d_items for anion gap instead of labevents and d_labitems, fundamentally changing the data source and filtering. This is not a simple column typo or filter mistake but a flawed table join that breaks the intended logic of retrieving lab results. Therefore, it is a global error involving an incorrect join and table selection.",
            "error_type": "Global: incorrect join/table selection"
        },
        "1a8e3475cdccab3b8d13051c": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the length of stay in days for patient 10005817's first hospital visit?",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10005817 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT CAST(JULIANDAY(dischtime) - JULIANDAY(admittime) AS INTEGER) AS length_of_stay_days FROM admissions WHERE subject_id = 10005817 ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['8.556']]",
            "pred_answer": "[['8.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in two localized ways: it omits the filter `dischtime IS NOT NULL` and it casts the day difference to an integer, truncating the fractional part. These are errors confined to the WHERE clause and the SELECT expression, and do not alter the overall query structure or table relationships. The query\u2019s logic (computing length of stay) remains intact, but the missing filter and the integer cast lead to an incorrect, truncated value. Hence, this is a localized error in value parsing/post-processing rather than a fundamental structural mistake.",
            "error_type": "Local: value parsing error"
        },
        "43e58def8b692bf30cd5500a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the minimum total hospital cost that involved other incision of brain?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other incision of brain' ) ) GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(c.total_cost) AS min_total_hospital_cost FROM ( SELECT p.hadm_id, SUM(c.cost) AS total_cost FROM cost AS c JOIN procedures_icd AS p ON c.hadm_id = p.hadm_id JOIN d_icd_procedures AS d ON p.icd_code = d.icd_code WHERE c.event_type = 'procedures_icd' AND d.long_title = 'other incision of brain' GROUP BY p.hadm_id ) AS c;",
            "gold_answer": "[['1721.22']]",
            "pred_answer": "[['25.16']]",
            "is_correct": false,
            "reasoning": "The ground truth query sums all cost entries for each admission that involved the specified brain incision procedure and then finds the minimum of those total costs. The predicted query, however, filters cost.event_type = 'procedures_icd' and only sums procedure-specific costs for those admissions. This fundamentally changes the intended logic\u2014only a subset of costs are aggregated rather than the total hospital cost. Fixing this requires removing or altering the event_type filter and reworking the join/filter logic, not just a localized clause tweak.",
            "error_type": "Global: incorrect event_type filter"
        },
        "3ec41e32d5474dbca8ba9aa3": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the daily maximum systolic blood pressure for patient 10013049 since 03/07/2100?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10013049 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' ) AND strftime('%Y-%m-%d',chartevents.charttime) >= '2100-03-07' GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT DATE(chartevents.charttime) AS day, MAX(chartevents.valuenum) AS max_systolic_bp FROM chartevents JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10013049 AND d_items.label = 'arterial blood pressure systolic' AND DATE(chartevents.charttime) >= DATE('2100-07-03') GROUP BY day;",
            "gold_answer": "[['129.0'], ['133.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only differs in small parts of the WHERE clause and grouping\u2014but the overall structure (grouping by day, taking MAX, joining d_items) is unchanged. The major mistake is the date literal: it uses DATE('2100-07-03') (July 3rd) instead of '2100-03-07' (March 7th), causing no rows to match. This is a localized value/parsing error in the WHERE clause and does not alter the query\u2019s overall logic or joins.",
            "error_type": "Local: value parsing error"
        },
        "7a6abe3bd11c83f13ac6651a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When did patient 10038933 come to the hospital via emergency room for the last time?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10038933 AND admissions.admission_location = 'emergency room' AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admittime FROM admissions WHERE subject_id = 10038933 AND admission_type = 'emergency' AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1",
            "gold_answer": "[['2100-09-22 12:09:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses admission_type = 'emergency' instead of the correct column admission_location = 'emergency room'. This is a column reference error that does not alter the overall query structure or logic and can be fixed by using the correct column name and value.",
            "error_type": "Local: table/column reference error"
        },
        "355dc4cb5c3c6b88cd291cd3": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Get me patient 10020740's total tpn without lipids input amount on the first icu visit.",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'tpn without lipids' AND d_items.linksto = 'inputevents' )",
            "pred_sql": "SELECT SUM(I.amount) AS total_tpn_without_lipids nFROM inputevents I nJOIN icustays S ON I.stay_id = S.stay_id nWHERE I.subject_id = 10020740 nAND S.subject_id = 10020740 nAND I.itemid IN (/* list of itemIDs for TPN without lipids */) nAND S.intime = ( n SELECT MIN(intime)  n FROM icustays  n WHERE subject_id = 10020740 n) *Note:* Replace `/* list of itemIDs for TPN without lipids */` with actual item IDs corresponding to TPN without lipids if they are known. If the item IDs are unknown, further look-up in `d_items` might be necessary based on dataset documentation or manual inspection.",
            "gold_answer": "[['9000.0']]",
            "pred_answer": "near \"nFROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a localized syntax mistake\u2014specifically the stray \u201cn\u201d prefix before FROM (\"AS total_tpn_without_lipids nFROM\") leading to a syntax error. This is a confined, clause-level issue and does not reflect a fundamental logic or structural flaw in the overall query intent. Therefore, it is a local error.",
            "error_type": "Local: syntax error"
        },
        "17d0633bd144df773e227973": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How much is a urea nitrogen lab test?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'labevents' AND cost.event_id IN ( SELECT labevents.row_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) )",
            "pred_sql": "SELECT itemid FROM d_labitems WHERE label = 'urea nitrogen';",
            "gold_answer": "[['9.91']]",
            "pred_answer": "[['51006.0'], ['52647.0']]",
            "is_correct": false,
            "reasoning": "The predicted query completely ignores the cost table and returns only the itemid from d_labitems, rather than retrieving the cost of the specified lab test. This is not a minor typo or missing column; it fundamentally alters the intended logic (finding the cost) by querying the wrong table and omitting necessary joins. Thus it is a global error.",
            "error_type": "Global: incorrect table reference"
        }
    },
    "dinsql": {
        "5c223874f1f67b99707467b0": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Find out the daily minimum chest tube #1 output volume of patient 10004720 since 4 months ago.",
            "gold_sql": "SELECT MIN(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'chest tube #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime) >= datetime('2100-12-31 23:59:00','-4 month') GROUP BY strftime('%Y-%m-%d',outputevents.charttime)",
            "pred_sql": "SELECT DATE(outputevents.charttime) AS date, MIN(outputevents.value) AS min_output_volume FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10004720 AND d_items.label = 'chest tube",
            "gold_answer": "[['10.0'], ['100.0'], ['130.0'], ['5.0'], ['90.0']]",
            "pred_answer": "unrecognized token: \"'chest tube\"",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error in the WHERE clause \u2013 the string literal for d_items.label is opened with a single quote but never closed (\u2018chest tube), leading to an unrecognized token. This is a localized value\u2010parsing/syntax mistake and does not alter the overall query structure or intended logic.",
            "error_type": "Local: value parsing error"
        },
        "b898ef45d110fe35ad724d35": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the percentile of anion gap 27.0 among all patients of the same age as patient 10004235 on the last hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'anion gap' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10004235 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) ) AS T1 WHERE T1.valuenum = 27.0",
            "pred_sql": "WITH PatientAge AS ( SELECT age FROM admissions WHERE subject_id = 10004235 ORDER BY admittime DESC LIMIT 1 ), AnionGapValues AS ( SELECT DISTINCT valuenum FROM labevents WHERE itemid = (SELECT itemid FROM d_labitems WHERE label = 'anion gap') AND hadm_id IN ( SELECT hadm_id FROM admissions WHERE age = (SELECT age FROM PatientAge) AND dischtime IS NOT NULL ) ), RankedAnionGap AS ( SELECT valuenum, PERCENT_RANK() OVER (ORDER BY valuenum) AS percentile FROM AnionGapValues ) SELECT percentile FROM RankedAnionGap WHERE valuenum = 27.0",
            "gold_answer": "[['0.935']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s overall structure (CTEs, joins, filters) matches the intent of the ground truth, but it locally alters the distribution used for the percentile calculation by taking DISTINCT valuenum before applying PERCENT_RANK. This single\u2010clause change (in the AnionGapValues CTE) breaks the percentile lookup without requiring any restructuring of joins or query flow.",
            "error_type": "Local: incorrect use of DISTINCT before window function"
        },
        "a0a67e4692f54ef3b4118ab9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "On the last hospital visit, when did patient 10027602 receive the first csf;spinal fluid microbiology test?",
            "gold_sql": "SELECT microbiologyevents.charttime FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND microbiologyevents.spec_type_desc = 'csf;spinal fluid' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(charttime) FROM microbiologyevents WHERE subject_id = 10027602 AND spec_type_desc = 'csf",
            "gold_answer": "[['2100-11-30 04:46:00']]",
            "pred_answer": "unrecognized token: \"'csf\"",
            "is_correct": false,
            "reasoning": "The predicted SQL has a localized syntax error: the string literal for spec_type_desc is not closed (unrecognized token \"'csf\"). This does not change the overall query structure or logic, and can be fixed by properly closing the quote. Therefore, it is a Local error.",
            "error_type": "Local: value parsing error"
        },
        "2b72047f0f9ba7d0dd23455f": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many people were given procedure on single vessel two or more times?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'procedure on single vessel' ) GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(DISTINCT procedures_icd.subject_id) FROM procedures_icd JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'single vessel' GROUP BY procedures_icd.subject_id HAVING COUNT(*) >= 2",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong long_title filter: it checks d_icd_procedures.long_title = 'single vessel' instead of the correct 'procedure on single vessel'. This is a localized mistake in the WHERE clause value rather than a structural issue with joins, grouping, or overall logic. Therefore, it is a fixable error within a single clause without altering the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "d66bbe740798c836c157d171": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the four most frequently ordered lab tests this year for patients of age 20s?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT T2.label FROM labevents AS T1 JOIN d_labitems AS T2 ON T1.itemid = T2.itemid JOIN admissions AS T3 ON T1.hadm_id = T3.hadm_id WHERE strftime('%Y', T1.charttime) = '2100' AND T3.age BETWEEN 20 AND 29 GROUP BY T2.label ORDER BY COUNT(*) DESC LIMIT 4",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['hemoglobin'], ['white blood cells']]",
            "is_correct": false,
            "reasoning": "The only mistake in the predicted SQL is in the top-N logic: it uses ORDER BY \u2026 LIMIT 4, which returns exactly four tests, whereas the ground-truth uses DENSE_RANK() and includes all tied tests at the fourth rank. All other parts of the query (joins, filters on year and age) match the intent. This is a localized error in the ranking clause and does not require restructuring the overall query logic.",
            "error_type": "Local: incorrect top-N ranking"
        },
        "c30451c0093dfcdea0f2b0bb": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the average total hospital expense that includes unspecified psychosis in 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified psychosis' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT AVG(total_cost) FROM ( SELECT SUM(DISTINCT C.cost) AS total_cost FROM cost AS C INNER JOIN diagnoses_icd AS D ON C.event_id = D.row_id AND C.event_type = 'diagnoses_icd' INNER JOIN d_icd_diagnoses AS DI ON D.icd_code = DI.icd_code INNER JOIN admissions AS A ON C.hadm_id = A.hadm_id WHERE DI.long_title LIKE '%unspecified psychosis%' AND strftime('%Y', A.admittime) = '2100' GROUP BY C.hadm_id ) AS hospital_expenses",
            "gold_answer": "[['1055.09']]",
            "pred_answer": "[['8.23']]",
            "is_correct": false,
            "reasoning": "The predicted SQL errs in the WHERE clause by filtering admissions.admittime instead of cost.chargetime, and joins to admissions needlessly. It also uses SUM(DISTINCT C.cost) instead of SUM(cost.cost). These are confined to a specific clause (the WHERE and aggregate), not altering the overall join strategy or query structure. Hence, this is a localized mistake rather than a fundamental structural or logical flaw.",
            "error_type": "Local: incorrect column reference in WHERE and aggregate clause"
        },
        "4a52128ae4a63ad0dc8d6410": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the top three diagnoses associated with the highest 3-month mortality rate?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T4.icd_code FROM ( SELECT T3.icd_code, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C3 FROM ( SELECT T2.icd_code, AVG(C1) AS C2 FROM ( SELECT T1.icd_code, ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T1.charttime) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id, diagnoses_icd.icd_code HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id ) AS T2 GROUP BY T2.icd_code ) AS T3 ) AS T4 WHERE T4.C3 <= 3 )",
            "pred_sql": "WITH diagnosis_mortality AS ( SELECT d.icd_code, COUNT(DISTINCT d.subject_id) AS total_patients, SUM(CASE WHEN p.dod IS NOT NULL AND julianday(p.dod) <= julianday(d.charttime) + 91.25 THEN 1 ELSE 0 END) AS deaths_within_3_months FROM diagnoses_icd d JOIN patients p ON d.subject_id = p.subject_id GROUP BY d.icd_code ), mortality_rate AS ( SELECT icd_code, (CAST(deaths_within_3_months AS FLOAT) / total_patients) AS mortality_rate FROM diagnosis_mortality ) SELECT d_icd_diagnoses.long_title FROM mortality_rate JOIN d_icd_diagnoses ON mortality_rate.icd_code = d_icd_diagnoses.icd_code ORDER BY mortality_rate.mortality_rate DESC LIMIT 3",
            "gold_answer": "[['abnormal involuntary movements'], ['acquired hypertrophic pyloric stenosis'], ['acute and subacute hepatic failure without coma'], ['acute kidney failure, unspecified'], ['acute respiratory failure, unspecified whether with hypoxia or hypercapnia'], ['alcohol abuse, continuous'], ['alcoholic polyneuropathy'], ['amyotrophic lateral sclerosis'], ['anemia in other chronic diseases classified elsewhere'], ['arthrodesis status'], ['atherosclerotic heart disease of native coronary artery with other forms of angina pectoris'], ['automatic implantable cardiac defibrillator in situ'], ['cachexia'], ['chronic airway obstruction, not elsewhere classified'], ['chronic hepatitis c with hepatic coma'], ['chronic kidney disease, stage 3 (moderate)'], ['chronic osteomyelitis, site unspecified'], ['chronic systolic heart failure'], ['delirium due to conditions classified elsewhere'], ['dementia, unspecified, without behavioral disturbance'], ['dependence on respirator, status'], ['diarrhea'], ['diplopia'], ['do not resuscitate status'], ['early satiety'], ['edema'], ['embolism and thrombosis of other specified artery'], ['encounter for palliative care'], ['epistaxis'], ['esophageal varices without mention of bleeding'], ['glucocorticoid deficiency'], ['hemoperitoneum'], ['hemorrhage of gastrointestinal tract, unspecified'], ['hyperosmolality and/or hypernatremia'], ['hyperpotassemia'], ['hypertrophy (benign) of prostate without urinary obstruction and other lower urinary tract symptom (luts)'], ['hypospadias'], ['hypotension, unspecified'], ['hypovolemic shock'], ['ileus, unspecified'], ['infection with microorganisms resistant to penicillins'], ['jaundice, unspecified, not of newborn'], ['malignant neoplasm of body of pancreas'], ['malignant neoplasm of liver, secondary'], ['malignant neoplasm of rectosigmoid junction'], ['metabolic encephalopathy'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['mycosis fungoides, unspecified site, extranodal and solid organ sites'], ['natural blood and blood products causing adverse effects in therapeutic use'], ['neoplasm related pain (acute) (chronic)'], ['nonalcoholic steatohepatitis (nash)'], ['oliguria and anuria'], ['other cirrhosis of liver'], ['other closed fractures of distal end of radius (alone)'], ['other complications due to other vascular device, implant, and graft'], ['other dependence on machines, supplemental oxygen'], ['other diseases of lung, not elsewhere classified'], ['other disorders of muscle, ligament, and fascia'], ['other disorders of neurohypophysis'], ['other dysphagia'], ['other postprocedural shock, initial encounter'], ['other septicemia due to gram-negative organisms'], ['other specified analgesics and antipyretics causing adverse effects in therapeutic use'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['other specified disorders of peritoneum'], ['other specified disorders of stomach and duodenum'], ['other specified forms of hearing loss'], ['other specified pre-operative examination'], ['paroxysmal ventricular tachycardia'], ['peritonitis, unspecified'], ['personal history of alcoholism'], ['personal history of malignant neoplasm of tongue'], ['personal history of peptic ulcer disease'], ['personal history of poliomyelitis'], ['personal history of pulmonary embolism'], ['pneumonia due to klebsiella pneumoniae'], ['postprocedural hemorrhage of a circulatory system organ or structure following a cardiac catheterization'], ['pulmonary congestion and hypostasis'], ['regional enteritis of unspecified site'], ['sepsis due to escherichia coli [e. coli]'], ['septic shock'], ['spinal stenosis, lumbar region, without neurogenic claudication'], ['spontaneous bacterial peritonitis'], ['stricture of artery'], ['subarachnoid hemorrhage following injury without mention of open intracranial wound, with no loss of consciousness'], ['tachypnea'], ['toxic encephalopathy'], ['transfusion related acute lung injury (trali)'], ['unspecified atrial flutter'], ['unspecified condition of brain'], ['unspecified deficiency anemia'], ['unspecified fall'], ['unspecified infection due to central venous catheter, initial encounter'], ['unspecified severe protein-calorie malnutrition'], ['unspecified vascular insufficiency of intestine'], ['unspecified viral hepatitis c with hepatic coma'], ['urethral stricture, unspecified'], ['varices of other sites'], ['ventilator associated pneumonia']]",
            "pred_answer": "[['asymptomatic human immunodeficiency virus [hiv] infection status'], ['cirrhosis of liver without mention of alcohol'], ['other dependence on machines, supplemental oxygen']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to apply the key cohort filter and logic used in the ground truth: it does not restrict diagnoses to those occurring at least 3 months before the reference date nor does it use only the first diagnosis per patient. Without excluding recent diagnoses (for which outcome cannot yet be observed) or properly anchoring on the first diagnosis event, the mortality rate calculation is fundamentally different and incorrect. This error spans multiple parts of the logic (WHERE filtering, cohort definition, and aggregation), so it is a global logic error.",
            "error_type": "Global: missing cohort time-filter"
        },
        "98412e460c98a20d61ef2b78": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Show me the difference between patient 10022041's total input and output on the last icu visit.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022041 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022041 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) )",
            "pred_sql": "WITH LastICUVisit AS ( SELECT stay_id FROM icustays WHERE subject_id = 10022041 ORDER BY intime DESC LIMIT 1 ), TotalInput AS ( SELECT SUM(totalamount) AS total_input FROM inputevents WHERE stay_id = (SELECT stay_id FROM LastICUVisit) ), TotalOutput AS ( SELECT SUM(value) AS total_output FROM outputevents WHERE stay_id = (SELECT stay_id FROM LastICUVisit) ) SELECT (TI.total_input - TO.total_output) AS input_output_difference FROM TotalInput TI, TotalOutput TO",
            "gold_answer": "[['6400.0']]",
            "pred_answer": "near \"TO\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with a syntax error near \"TO\" because it aliases the TotalOutput table as TO, which is a reserved keyword. This is a localized alias/syntax mistake that can be fixed by choosing a different alias. The overall query structure and logic remain intact, so this is a Local error.",
            "error_type": "Local: syntax error"
        },
        "f63abc2721bee6659fec40e2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many patients received a arterial catheterization after the procedure of enteral infusion of concentrated nutritional substances within the same month?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'enteral infusion of concentrated nutritional substances' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'arterial catheterization' ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) FROM procedures_icd AS T1 JOIN procedures_icd AS T2 ON T1.subject_id = T2.subject_id AND T1.hadm_id = T2.hadm_id WHERE T1.icd_code = 'icd9|3961' -- arterial catheterization AND T2.icd_code = 'icd9|9604' -- enteral infusion of concentrated nutritional substances AND datetime(T1.charttime, 'start of month') = datetime(T2.charttime, 'start of month') AND T1.charttime > T2.charttime",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['10.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic of the query by adding a hadm_id join and filtering both procedures within the same hospital admission, whereas the ground truth only requires that the two procedures occur for the same patient in the same calendar month. This incorrect join condition fundamentally alters which patients are counted, leading to a drastically different result. Fixing this would require restructuring the join logic, not just a minor tweak in a single clause.",
            "error_type": "Global: join error"
        },
        "20aaeb128f0419f83461f5c9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the specimen test that was last given to patient 10027602 since 12/2100?",
            "gold_sql": "SELECT microbiologyevents.spec_type_desc FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-12' ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE subject_id = 10027602 AND datetime(charttime) >= datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['urine']]",
            "pred_answer": "[['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the SELECT clause: it returns microbiologyevents.test_name instead of microbiologyevents.spec_type_desc. All other parts (time filter, patient filter, ordering, limit) match the intended logic. This is a localized mistake in selecting the wrong column, not a structural or logical flaw across multiple clauses.",
            "error_type": "Local: column reference error"
        },
        "a9b5787ee9cc74593541e313": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "For patients who underwent diagnostic ultrasound of digestive system , what are the five most common diagnoses within 2 months?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'diagnostic ultrasound of digestive system' ) ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT D.long_title FROM procedures_icd AS P JOIN d_icd_procedures AS DP ON P.icd_code = DP.icd_code JOIN diagnoses_icd AS DI ON P.hadm_id = DI.hadm_id JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code WHERE DP.long_title = 'diagnostic ultrasound of digestive system' AND julianday(DI.charttime) - julianday(P.charttime) <= 60 GROUP BY D.long_title ORDER BY COUNT(DI.icd_code) DESC LIMIT 5",
            "gold_answer": "[['acquired absence of breast and nipple'], ['acute pancreatitis'], ['alcohol abuse, unspecified'], ['anemia, unspecified'], ['cyst and pseudocyst of pancreas'], ['other and unspecified mycoses'], ['other specified diseases of pancreas'], ['personal history of antineoplastic chemotherapy'], ['personal history of malignant neoplasm of breast'], ['pure hypercholesterolemia'], ['secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified'], ['unspecified essential hypertension']]",
            "pred_answer": "[['accidents occurring in residential institution'], ['acute pancreatitis'], ['depressive disorder, not elsewhere classified'], ['long-term (current) use of insulin'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the question by joining procedures and diagnoses on the same hospital admission (P.hadm_id = DI.hadm_id) instead of tying them by patient (subject_id) and looking at diagnoses up to two months after the procedure. It also fails to enforce that diagnoses occur after the ultrasound (it only checks a \u226460-day difference, allowing diagnoses before the procedure). These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning of the query, so this is not a fixable typo or isolated syntax mistake but a structural/logical flaw.",
            "error_type": "Global: incorrect join and time-window logic"
        },
        "0a6429bfa48cdb76d66bf9a6": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many days has it been since patient 10021487's hospital admission?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT julianday('2100-12-31') - julianday(admittime) AS days_since_admission FROM admissions WHERE subject_id = 10021487 AND dischtime IS NULL ORDER BY admittime DESC LIMIT 1",
            "gold_answer": "[['24.983']]",
            "pred_answer": "[['23.984']]",
            "is_correct": false,
            "reasoning": "The predicted query localizes its mistake to the date literal and function used for computing the difference. Instead of using the full timestamp '2100-12-31 23:59:00' with strftime('%J',\u2026) as in the ground truth, it uses julianday('2100-12-31') and julianday(admittime), thereby dropping the time component and producing an off-by-one-day result. This is a contained error in the temporal calculation (date literal/format) and does not affect the overall query structure or logic of filtering the current admission. Hence it is a Local error.",
            "error_type": "Local: other-temporal error"
        },
        "c3d9bc95379f78cc11a90cb2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the percentile of 2.0 in the rbc lab test with the same age as patient 10022041 on the last hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'rbc' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10022041 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) ) AS T1 WHERE T1.valuenum = 2.0",
            "pred_sql": "WITH patient_age AS ( SELECT age FROM admissions WHERE subject_id = 10022041 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ), age_group AS ( SELECT labevents.valuenum FROM labevents INNER JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.age = (SELECT age FROM patient_age) AND labevents.itemid = (SELECT itemid FROM d_labitems WHERE label = 'rbc') AND admissions.dischtime IS NOT NULL AND admissions.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = admissions.subject_id ORDER BY admittime DESC LIMIT 1 ) ) SELECT ROUND((SUM(CASE WHEN valuenum <= 2.0 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 2) AS percentile FROM age_group",
            "gold_answer": "[['0.5']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query. It restricts labevents to each patient\u2019s most recent admission rather than all admissions of patients of the same age, replaces the PERCENT_RANK() window function with a manual percentage calculation, and misuses a correlated subquery on admissions to filter hadm_id. These errors span multiple clauses (WITH clauses, JOIN, WHERE, and SELECT) and change the intended semantics, requiring major restructuring to correct.",
            "error_type": "Global"
        },
        "d5efaa4569dc08743e756b5c": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the medication that patient 10021312 was prescribed within 2 days after having received a drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic procedure on the last hospital visit?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT procedures_icd.subject_id, procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic' ) ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')",
            "pred_sql": "SELECT DISTINCT P.drug FROM procedures_icd AS PI JOIN d_icd_procedures AS DIP ON PI.icd_code = DIP.icd_code JOIN admissions AS A ON PI.hadm_id = A.hadm_id JOIN prescriptions AS P ON A.hadm_id = P.hadm_id WHERE PI.subject_id = 10021312 AND DIP.long_title = 'drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic' AND A.dischtime IS NOT NULL AND A.admittime = ( SELECT MAX(admittime) FROM admissions WHERE subject_id = 10021312 AND dischtime IS NOT NULL ) AND datetime(P.starttime) BETWEEN datetime(PI.charttime, '+0 days') AND datetime(PI.charttime, '+2 days')",
            "gold_answer": "[['0.9% sodium chloride'], ['calcium carbonate'], ['famotidine'], ['iso-osmotic sodium chloride'], ['vancomycin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['albuterol 0.083% neb soln'], ['calcium carbonate'], ['famotidine'], ['iso-osmotic sodium chloride'], ['pantoprazole'], ['vancomycin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally similar to the ground truth (same tables, joins, and overall flow). The only semantic mismatch is in the WHERE time condition: it uses\n  \n    datetime(P.starttime) BETWEEN datetime(PI.charttime, '+0 days') AND datetime(PI.charttime, '+2 days')\n  \ninstead of the ground\u2010truth\u2019s stricter `T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')`. This difference in the temporal predicate lets through extra drugs (e.g., albuterol, pantoprazole) and can be fixed by tightening the time condition without altering the rest of the query\u2019s logic or joins. Hence it\u2019s a localized (WHERE\u2010clause) error in the time filter.",
            "error_type": "Local: other-temporal error"
        },
        "a395c9eb659ffebdeff1c451": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the total cost for patient 10005817's hospital stay since 1 year ago?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005817 ) AND datetime(cost.chargetime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT SUM(DISTINCT C.cost) AS total_cost FROM admissions AS A JOIN cost AS C ON A.hadm_id = C.hadm_id WHERE A.subject_id = 10005817 AND datetime(A.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') AND C.event_type IN ('procedures_icd', 'labevents', 'prescriptions', 'diagnoses_icd')",
            "gold_answer": "[['4840.59']]",
            "pred_answer": "[['1180.45']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the question in two ways: (1) it filters admissions by admittime in the last year instead of filtering cost records by their chargetime, and (2) it adds an unnecessary event_type filter that excludes some cost entries. These mistakes alter the overall structure and meaning of the query rather than fixing a small localized mistake, so this is a global error.",
            "error_type": "Global: incorrect date filtering and event_type constraint"
        },
        "2c652b6030efee163a6dfaf1": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Retrieve the top five most frequent diagnoses of patients with age in their 50s since 1 year ago.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT D.long_title FROM diagnoses_icd AS DI JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code JOIN admissions AS A ON DI.hadm_id = A.hadm_id WHERE A.age BETWEEN 50 AND 59 AND datetime(DI.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY D.long_title ORDER BY COUNT(DISTINCT DI.hadm_id) DESC LIMIT 5",
            "gold_answer": "[['accidents occurring in residential institution'], ['acute kidney failure, unspecified'], ['acute pancreatitis'], ['acute respiratory failure'], ['alcohol abuse, unspecified'], ['anemia of other chronic disease'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['chronic airway obstruction, not elsewhere classified'], ['cirrhosis of liver without mention of alcohol'], ['colostomy status'], ['cyst and pseudocyst of pancreas'], ['dependence on respirator [ventilator] status'], ['depressive disorder, not elsewhere classified'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['hyposmolality and/or hyponatremia'], ['long term (current) use of insulin'], ['long-term (current) use of insulin'], ['nonspecific elevation of levels of transaminase or lactic acid dehydrogenase [ldh]'], ['obstructive sleep apnea (adult)(pediatric)'], ['other and unspecified hyperlipidemia'], ['other ascites'], ['other iatrogenic hypotension'], ['other specified procedures as the cause of abnormal reaction of patient, or of later complication, without mention of misadventure at time of procedure'], ['patient room in hospital as the place of occurrence of the external cause'], ['personal history of malignant neoplasm of breast'], ['pneumonitis due to inhalation of food or vomitus'], ['portal hypertension'], ['pure hypercholesterolemia'], ['schizoaffective disorder, unspecified'], ['secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified'], ['thrombocytopenia, unspecified'], ['tobacco use disorder'], ['type 2 diabetes mellitus without complications'], ['unspecified essential hypertension']]",
            "pred_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other ascites'], ['pure hypercholesterolemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL matches the ground truth in terms of tables joined, filter conditions (age between 50 and 59, charttime within one year), grouping by diagnosis title, and limiting to five results. The only difference is the aggregation used in ORDER BY: the ground truth ranks diagnoses by total occurrences (COUNT(*)), while the prediction ranks by distinct admissions count (COUNT(DISTINCT DI.hadm_id)). This is a localized mistake in the aggregate function, not a structural or multi-clause logical error.",
            "error_type": "Local: aggregate function error"
        },
        "8c9285958fb27bcd8453fdc9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the average total hospital cost that includes other specified diseases of pancreas this year.",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified diseases of pancreas' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT AVG(total_cost) FROM ( SELECT SUM(DISTINCT C.cost) AS total_cost FROM cost AS C INNER JOIN diagnoses_icd AS D ON C.event_id = D.row_id AND C.event_type = 'diagnoses_icd' INNER JOIN d_icd_diagnoses AS DI ON D.icd_code = DI.icd_code WHERE DI.long_title = 'other specified diseases of pancreas' AND strftime('%Y', C.chargetime) = '2100' GROUP BY C.hadm_id )",
            "gold_answer": "[['1276.33']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by restricting cost to only diagnosis-related events (C.event_type = 'diagnoses_icd' and join on D.row_id) and by using SUM(DISTINCT C.cost), instead of summing all cost types for each admission as required. This isn\u2019t a simple typo or syntax fix; it alters multiple clauses (JOIN, WHERE, aggregation) and thus the overall meaning of the query. Therefore, this is a global error in the join/aggregation logic.",
            "error_type": "Global: incorrect join/aggregation logic"
        },
        "682f46170aeaf404bdf67198": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the difference between the total input and output of patient 10005909 on last month/11.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '11' ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '11' )",
            "pred_sql": "SELECT (SELECT COALESCE(SUM(totalamount), 0) FROM inputevents WHERE subject_id = 10005909 AND datetime(starttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-1 year') AND strftime('%m', starttime) = '11') - (SELECT COALESCE(SUM(value), 0) FROM outputevents WHERE subject_id = 10005909 AND datetime(charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-1 year') AND strftime('%m', charttime) = '11') AS input_output_difference",
            "gold_answer": "[['3210.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic in multiple ways: it filters by subject_id instead of joining through admissions \u2192 icustays \u2192 stay_id, and it uses a year-based time window (start of year minus one year and month = '11') rather than the required last month on day 11. These changes span multiple clauses (FROM/WHERE logic and temporal filters) and change the meaning of the query, so the error is not a localized syntax issue but a global logical error.",
            "error_type": "Global: incorrect join and temporal filter"
        },
        "c61af8d575e4a0c46efaad99": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the duration in days of patient 10018845's last stay in the icu?",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018845 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND((JULIANDAY(outtime) - JULIANDAY(intime)), 0) AS duration_days FROM icustays WHERE subject_id = 10018845 ORDER BY intime DESC LIMIT 1",
            "gold_answer": "[['1.491']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a direct subject_id filter on icustays and omits the required `outtime IS NOT NULL` predicate, and then rounds the day difference to zero decimal places. The overall query structure\u2014computing ICU stay duration, ordering by intime and limiting to the last stay\u2014remains the same as the ground truth. The mistakes are confined to the WHERE clause (missing outtime filter) and the SELECT expression (unnecessary rounding), which are localized issues and do not alter the fundamental logic or join strategy of the query.",
            "error_type": "Local: where-clause and rounding error"
        },
        "cb642fb7cc4c268dd4595162": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the minimum lymphs value for patient 10027602 since 11/2100?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'lymphs' ) AND strftime('%Y-%m',labevents.charttime) >= '2100-11'",
            "pred_sql": "SELECT MIN(valuenum) AS min_lymphs_value FROM labevents WHERE subject_id = 10027602 AND itemid = (SELECT itemid FROM d_labitems WHERE label = 'lymphs') AND datetime(charttime,'start of month') = datetime('2100-12-31 23:59:00','start of year') AND strftime('%m',charttime) >= '11'",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL mis\u2010filters the charttime date. Instead of using the correct strftime('%Y-%m',\u2026) \u2265 '2100-11', it incorrectly compares datetime(charttime,'start of month') to datetime('2100-12-31 23:59:00','start of year') and then only checks the month part. This is a local mistake in the WHERE clause\u2019s date condition and does not alter the overall query structure or table relationships. It\u2019s a localized temporal filter error.",
            "error_type": "Local: temporal filter error"
        },
        "f359ba8d9b0dd17dff365533": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was patient 10027602 last discharged from the hospital?",
            "gold_sql": "SELECT admissions.dischtime FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.dischtime DESC LIMIT 1",
            "pred_sql": "dischtime FROM admissions WHERE subject_id = 10027602 AND dischtime IS NOT NULL ORDER BY dischtime DESC LIMIT 1",
            "gold_answer": "[['2100-12-14 14:45:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the column list, causing a syntax error in the SELECT clause. This is a localized mistake that does not alter the overall intended query logic and can be fixed by adding the missing keyword.",
            "error_type": "Local: syntax error"
        },
        "c3f597f199ba2bb53d59195e": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Could you tell me the name of the drug that was prescribed two or more times since 10/2100 to patient 10009628?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10009628 ) AND strftime('%Y-%m',prescriptions.starttime) >= '2100-10' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10009628 AND datetime(starttime) >= datetime('2100-10-01') GROUP BY drug HAVING COUNT(DISTINCT starttime) >= 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['5% dextrose (excel bag)'], ['acetaminophen'], ['aspirin ec'], ['atorvastatin'], ['docusate sodium'], ['furosemide'], ['glipizide'], ['glucagon'], ['insulin'], ['insulin human regular'], ['iso-osmotic dextrose'], ['isosorbide dinitrate'], ['ketorolac'], ['metformin (glucophage)'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['mupirocin nasal ointment 2%'], ['ondansetron'], ['sodium chloride 0.9%  flush'], ['syringe'], ['vancomycin'], ['warfarin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['aspirin ec'], ['atorvastatin'], ['docusate sodium'], ['furosemide'], ['glipizide'], ['glucagon'], ['insulin'], ['insulin human regular'], ['isosorbide dinitrate'], ['ketorolac'], ['metformin (glucophage)'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['mupirocin nasal ointment 2%'], ['ondansetron'], ['sodium chloride 0.9%  flush'], ['warfarin']]",
            "is_correct": false,
            "reasoning": "The only structural difference is within the aggregation: the predicted SQL uses COUNT(DISTINCT starttime) instead of COUNT(starttime). This incorrect DISTINCT suppresses multiple prescriptions that happen at the same timestamp, causing drugs to be dropped from the results. No joins or overall query logic are altered\u2014only a localized misuse of DISTINCT in the HAVING clause\u2014so it\u2019s a local aggregation error.",
            "error_type": "Local: COUNT-distinct error"
        },
        "0f494beb8dd694f45a24771a": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the three most frequent drugs prescribed to female patients with an age of 40s within 2 months after they are diagnosed with alcohol abuse, unspecified?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'alcohol abuse, unspecified' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'f' ) AND admissions.age BETWEEN 40 AND 49 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH AlcoholAbuseDiagnoses AS ( SELECT DISTINCT subject_id, hadm_id, charttime FROM diagnoses_icd WHERE icd_code = 'icd9|30500' -- Assuming 'icd9|30500' is the code for alcohol abuse, unspecified ), FemalePatientsIn40s AS ( SELECT DISTINCT A.subject_id, A.hadm_id, A.admittime FROM admissions A JOIN patients P ON A.subject_id = P.subject_id WHERE P.gender = 'f' AND A.age BETWEEN 40 AND 49 ), PrescriptionsWithin2Months AS ( SELECT P.drug FROM prescriptions P JOIN AlcoholAbuseDiagnoses D ON P.subject_id = D.subject_id AND P.hadm_id = D.hadm_id JOIN FemalePatientsIn40s F ON P.subject_id = F.subject_id AND P.hadm_id = F.hadm_id WHERE datetime(P.starttime) BETWEEN datetime(D.charttime) AND datetime(D.charttime, '+2 months') ) SELECT drug FROM ( SELECT drug, COUNT(*) AS freq FROM PrescriptionsWithin2Months GROUP BY drug ORDER BY freq DESC LIMIT 3 ) AS TopDrugs",
            "gold_answer": "[['acetaminophen'], ['aluminum-magnesium hydrox.-simethicone'], ['amitriptyline'], ['darunavir'], ['emtricitabine-tenofovir (truvada)'], ['hydroxyzine'], ['ibuprofen'], ['milk of magnesia'], ['nicotine polacrilex'], ['ritonavir'], ['sertraline']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the intended logic by forcing prescriptions and diagnoses to share the same hadm_id (hospital admission) rather than simply matching by subject_id and then applying the 2\u2010month window. This fundamentally alters which prescriptions are considered (only those in the same admission as the diagnosis instead of any admission within 2 months after diagnosis) and thus breaks the overall query logic. This is not a small typo or localized syntax issue but a core join/key error affecting multiple clauses.",
            "error_type": "Global: incorrect join keys"
        },
        "b8eb4f72ed272217689345d4": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the percentile po2 value of 128.0 among patients of the same age as patient 10018081 on the current hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'po2' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) ) ) AS T1 WHERE T1.valuenum = 128.0",
            "pred_sql": "WITH same_age_patients AS ( SELECT labevents.valuenum AS po2_value FROM labevents INNER JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.age = (SELECT age FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL) AND labevents.itemid = (SELECT itemid FROM d_labitems WHERE label = 'po2') AND admissions.dischtime IS NULL ) SELECT ROUND((SUM(CASE WHEN po2_value <= 128.0 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 2) AS percentile FROM same_age_patients",
            "gold_answer": "[['0.882']]",
            "pred_answer": "[['87.5']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes how the percentile is computed. Instead of using PERCENT_RANK() to return a value between 0 and 1 for the specific measurement (as required), it computes a cumulative distribution percentage (count \u2264 128 / total count * 100). This is a logical error affecting the entire query\u2019s approach to percentile calculation, not a minor syntax or column reference issue.",
            "error_type": "Global: incorrect percentile calculation"
        },
        "e3757f569282c07ba625db2f": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "After getting diagnosed with hyperlipidemia, unspecified, how many patients were found to have nicotine dependence, cigarettes, uncomplicated within 2 months in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hyperlipidemia, unspecified' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nicotine dependence, cigarettes, uncomplicated' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month')",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT DISTINCT subject_id, MIN(charttime) AS hyperlipidemia_time FROM diagnoses_icd WHERE icd_code = 'icd9|2724' -- hyperlipidemia, unspecified GROUP BY subject_id ), subsequent_diagnosis AS ( SELECT DISTINCT d.subject_id FROM diagnoses_icd d JOIN initial_diagnosis i ON d.subject_id = i.subject_id WHERE d.icd_code = 'icd9|3051' -- nicotine dependence, cigarettes, uncomplicated AND datetime(d.charttime) BETWEEN datetime(i.hyperlipidemia_time) AND datetime(i.hyperlipidemia_time, '+2 months') AND strftime('%Y', d.charttime) = '2100' ) SELECT COUNT(DISTINCT subject_id) AS patient_count FROM subsequent_diagnosis",
            "gold_answer": "[['1.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query logic compared to the ground truth. It uses hard-coded ICD codes instead of looking them up via d_icd_diagnoses, omits the year-2100 filter on the initial hyperlipidemia diagnosis (only applying it to the nicotine diagnosis), and restructures the filtering into CTEs in a way that changes which events are counted. These issues span multiple clauses and change the intended meaning of the question, so the error is not a simple typo or localized syntax mistake but a structural logic error.",
            "error_type": "Global: logical inconsistency"
        },
        "13f87cb21fa6c8cb0f60fd50": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the minimum total hospital cost that includes a procedure named venous catheterization, not elsewhere classified?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) ) GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS min_total_cost FROM ( SELECT SUM(cost) AS total_cost FROM cost WHERE hadm_id IN ( SELECT hadm_id FROM procedures_icd WHERE icd_code IN ( SELECT icd_code FROM d_icd_procedures WHERE long_title = 'venous catheterization, not elsewhere classified' ) ) AND event_type = 'procedures_icd' GROUP BY hadm_id )",
            "gold_answer": "[['2891.23']]",
            "pred_answer": "[['23.12']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly adds AND event_type = 'procedures_icd' in the WHERE clause, restricting the sum to only procedure costs. The original intent was to sum all cost.event_type values for admissions that include the specified procedure. This is a localized mistake in the WHERE condition and can be fixed by removing that extra filter, without restructuring the rest of the query.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "db5e5f929e7bbb00f5d16d2b": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Count how many times since 2100 patient 10016150 had left heart cardiac catheterization.",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'left heart cardiac catheterization' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10016150 ) AND strftime('%Y',procedures_icd.charttime) >= '2100'",
            "pred_sql": "SELECT COUNT(*) FROM procedures_icd AS T1 INNER JOIN d_icd_procedures AS T2 ON T1.icd_code = T2.icd_code WHERE T1.subject_id = 10016150 AND T2.long_title = 'left heart cardiac catheterization' AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-0 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s filter on charttime is wrong: it uses datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-0 year'), which effectively only includes events at or after the very end of 2100. The correct interpretation \u201csince 2100\u201d means from the start of 2100 onward. This mistake is confined to the WHERE clause\u2019s temporal condition and can be fixed by adjusting the date comparison without restructuring the query. Therefore, it is a local error in the temporal condition.",
            "error_type": "Local: other-temporal error"
        },
        "a0d1a694272df31d43d58407": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the length of the first hospital stay in days for patient 10001217?",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10001217 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT ROUND((JULIANDAY(dischtime) - JULIANDAY(admittime)), 0) AS length_of_stay FROM admissions WHERE subject_id = 10001217 AND dischtime IS NOT NULL ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['6.794']]",
            "pred_answer": "[['7.0']]",
            "is_correct": false,
            "reasoning": "The only difference is the use of ROUND(...,0) in the predicted SQL, which forces the length of stay to be an integer (7.0) instead of preserving the fractional days (6.794) and rounding to three decimals as required. This is a localized formatting/rounding mistake in the SELECT clause and does not affect joins, table relationships, or overall logic. It can be fixed by removing or adjusting the ROUND precision, so it is a Local error.",
            "error_type": "Local: numeric-rounding error"
        },
        "334b1184502387df7abf77d5": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Is the value of mean blood pressure of patient 10039831 last measured on the current icu visit less than the value second to last measured on the current icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) < ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH RankedBloodPressure AS ( SELECT valuenum AS mean_blood_pressure, DENSE_RANK() OVER (ORDER BY charttime DESC) AS rank FROM chartevents WHERE subject_id = 10039831 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure mean') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10039831 AND outtime IS NULL) )",
            "gold_answer": "[['0.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the final SELECT to compare the two ranked blood pressure values and thus does not complete the intended logic. It also misuses DENSE_RANK (not needed per assumptions) and fails to implement the comparison between the latest and second-latest measurements. Fixing this would require adding an entire query block using the CTE and altering the overall structure, not just a small syntactic tweak.",
            "error_type": "Global: structural error"
        },
        "f9cf8541bcbfc43ebc43f833": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How much is the average total cost of a hospital that involves a drug named 0.9% sodium chloride (mini bag plus) since 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = '0.9% sodium chloride (mini bag plus)' ) AND strftime('%Y',cost.chargetime) >= '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT AVG(total_cost) FROM ( SELECT SUM(DISTINCT c.cost) AS total_cost FROM prescriptions AS p INNER JOIN cost AS c ON p.hadm_id = c.hadm_id AND c.event_id = p.row_id AND c.event_type = 'prescriptions' WHERE p.drug = '0.9% sodium chloride (mini bag plus)' AND datetime(p.starttime) >= datetime('2100-01-01 00:00:00') GROUP BY p.hadm_id ) AS subquery",
            "gold_answer": "[['4369.491']]",
            "pred_answer": "[['9.87']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts costs to only prescription events by joining cost on event_type = 'prescriptions' and event_id = p.row_id, and it filters by p.starttime rather than cost.chargetime. This fundamentally changes the query\u2019s logic: instead of summing all costs for the relevant admissions since 2100, it only sums prescription costs and uses the wrong time filter. Fixing this would require restructuring the join, WHERE clauses, and aggregation, not just a small local correction.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "a029b2e1430d15462ca84d28": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the minimum monthly number of patient records marked with atrioventricular block, complete?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT COUNT( DISTINCT diagnoses_icd.hadm_id ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'atrioventricular block, complete' ) GROUP BY strftime('%Y-%m',diagnoses_icd.charttime) ) AS T1",
            "pred_sql": "SELECT MIN(monthly_count) FROM ( SELECT strftime('%Y-%m', charttime) AS month, COUNT(DISTINCT subject_id) AS monthly_count FROM diagnoses_icd WHERE icd_code = '4260' -- Assuming '4260' is the code for atrioventricular block, complete GROUP BY month )",
            "gold_answer": "[['1.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly counts distinct subject_id instead of distinct hadm_id and uses a hard\u2010coded icd_code rather than retrieving it via the d_icd_diagnoses lookup. These are errors confined to a specific clause (the SELECT/count expression and WHERE filter) and do not require restructuring the overall query. Hence, this is a local error.",
            "error_type": "Local: column reference error"
        },
        "d351c1981c8f0350905da618": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was the last time this month that patient 10004733 was prescribed with vancomycin and iso-osmotic dextrose at the same time?",
            "gold_sql": "SELECT T1.starttime FROM ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND prescriptions.drug = 'vancomycin' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND prescriptions.drug = 'iso-osmotic dextrose' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) ORDER BY T1.starttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(datetime(starttime)) FROM prescriptions WHERE subject_id = 10004733 AND drug IN ('vancomycin', 'iso-osmotic dextrose') AND datetime(starttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') GROUP BY datetime(starttime) HAVING COUNT(DISTINCT drug) = 2",
            "gold_answer": "[['2100-12-26 09:00:00']]",
            "pred_answer": "[['2100-12-22 15:00:00'], ['2100-12-22 20:00:00'], ['2100-12-24 11:00:00'], ['2100-12-26 08:00:00'], ['2100-12-26 09:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted query groups by each prescription start time and applies MAX() within that group, which simply returns each matching timestamp rather than the single latest timestamp overall. This is an error in the aggregation and GROUP BY logic within the SELECT clause and does not involve changing table relationships or the overall query structure. It can be corrected by removing the GROUP BY or wrapping the grouped results to take a global MAX. Hence it is a localized aggregation/grouping error.",
            "error_type": "Local: grouping error"
        },
        "d0de56f72573390c2bf52877": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the drug that was prescribed to patient 10019777 within the same day after the angioplasty of other non-coronary vessel(s) on the first hospital visit?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT procedures_icd.subject_id, procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019777 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'angioplasty of other non-coronary vessel(s)' ) ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019777 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of day') = datetime(T2.starttime,'start of day')",
            "pred_sql": "SELECT DISTINCT P.drug FROM procedures_icd AS PI JOIN prescriptions AS P ON PI.subject_id = P.subject_id AND PI.hadm_id = P.hadm_id WHERE PI.subject_id = 10019777 AND PI.icd_code = 'icd9|3961' -- Assuming 'icd9|3961' is the code for angioplasty of other non-coronary vessel(s) AND datetime(P.starttime, 'start of day') = datetime(PI.charttime, 'start of day') AND PI.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10019777 ORDER BY admittime ASC LIMIT 1 )",
            "gold_answer": "[['0.9% sodium chloride'], ['0.9% sodium chloride (mini bag plus)'], ['0.9% sodium chloride (mini bag plus)'], ['5% dextrose'], ['alteplase (catheter clearance)'], ['iso-osmotic dextrose'], ['iso-osmotic dextrose'], ['iso-osmotic dextrose'], ['lactated ringers'], ['metronidazole (flagyl)'], ['ns'], ['octreotide acetate'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%'], ['vancomycin'], ['vancomycin']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": [
                "The predicted query fails to restrict to the first completed hospital visit: it omits `admissions.dischtime IS NOT NULL` when picking the first hadm_id, altering the intended admission context.",
                "It also omits the requirement that the prescription start time follow the procedure charttime (`T1.charttime < T2.starttime`), only enforcing same-day equality.",
                "These omissions are not mere typos or minor syntax mistakes but undermine the core logic of selecting the correct visit and ensuring the drug was prescribed after the procedure.",
                "Fixing this requires adding filters in multiple clauses (the admission subquery and the WHERE clause), indicating a broader structural/logical flaw."
            ],
            "error_type": "Global: incorrect admission selection and time\u2010ordering logic"
        },
        "98c9e3b4254aa471e55df9d2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "In 2100, what are the top five most frequently given diagnoses that patients received within 2 months after being diagnosed with body mass index 35.0-35.9, adult?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'body mass index 35.0-35.9, adult' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT di.subject_id, di.hadm_id, di.charttime AS diagnosis_time FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code WHERE dicd.long_title = 'body mass index 35.0-35.9, adult' AND strftime('%Y', di.charttime) = '2100' ), subsequent_diagnoses AS ( SELECT di.subject_id, di.icd_code, dicd.long_title, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN initial_diagnosis id ON di.subject_id = id.subject_id AND di.hadm_id = id.hadm_id WHERE di.charttime > id.diagnosis_time AND di.charttime <= datetime(id.diagnosis_time, '+2 months') ) SELECT sd.long_title FROM subsequent_diagnoses sd GROUP BY sd.long_title ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['atrial fibrillation'], ['autistic disorder, current or active state'], ['long-term (current) use of anticoagulants'], ['personal history of sudden cardiac arrest'], ['postprocedural fever'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a join on both subject_id and hadm_id when linking subsequent diagnoses to the initial BMI diagnosis. The ground truth logic only ties on subject_id (tracking diagnoses within two months regardless of hospital admission), but the prediction erroneously restricts subsequent diagnoses to the same hadm_id. This fundamentally changes the intended meaning and yields no results. This is not a simple syntax or column reference issue but a flawed join strategy that alters the query\u2019s logic.",
            "error_type": "Global: incorrect join condition"
        },
        "0857ac0dceb68b3f5ee03095": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was the last lab test that patient 10000032 received?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10000032 ) ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "MAX(labevents.charttime) AS last_lab_test_time FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10000032 AND admissions.dischtime IS NOT NULL",
            "gold_answer": "[['2100-08-27 06:15:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an extra filter on admissions.dischtime IS NOT NULL, which limits the search to only discharged admissions. The question asks for the last lab test across all admissions for patient 10000032, regardless of discharge status. Removing or correcting this WHERE-clause condition would fix the error without altering the overall query structure or requiring a different join strategy\u2014so it is a localized issue in the WHERE clause.",
            "error_type": "Local: incorrect WHERE-clause filter"
        },
        "5d688256a3f07899fb175ce8": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was patient 10010471's first hospital visit?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10010471 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "admissions.admittime FROM admissions WHERE admissions.subject_id = 10010471 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "gold_answer": "[['2100-05-21 17:05:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the column list, which is a localized syntax error in the SELECT clause and does not affect the overall logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "3ca78723b4c115f0d4b675be": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Which are the top three most frequent output events in 2100?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT outputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM outputevents WHERE strftime('%Y',outputevents.charttime) = '2100' GROUP BY outputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT itemid FROM outputevents WHERE strftime('%Y', charttime) = '2100' GROUP BY itemid ORDER BY COUNT(*) DESC LIMIT 3",
            "gold_answer": "[['chest tube #1'], ['foley'], ['void']]",
            "pred_answer": "[['226559.0'], ['226560.0'], ['226588.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL groups and limits directly on outputevents.itemid, returning raw item IDs instead of the required labels from d_items. The error is confined to the SELECT clause and the lack of a join to d_items; the overall query structure (filtering by year, grouping, ordering, and limiting) is correct. This is a fixable, isolated mistake (adding a JOIN and selecting d_items.label) without altering the query\u2019s logical flow or relationships.",
            "error_type": "Local: missing join/column reference error"
        },
        "82e01448e95df2146a69a821": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the organism in the first bronchoalveolar lavage microbiology test of patient 10021487 this month?",
            "gold_sql": "SELECT microbiologyevents.org_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) AND microbiologyevents.spec_type_desc = 'bronchoalveolar lavage' AND microbiologyevents.org_name IS NOT NULL AND datetime(microbiologyevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT org_name FROM microbiologyevents WHERE subject_id = 10021487 AND spec_type_desc = 'bronchoalveolar lavage' AND datetime(charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['staph aureus coag +']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters by subject_id, spec_type_desc, and month, but it omits the condition AND org_name IS NOT NULL. As a result, the first record returned has a NULL org_name, whereas the ground truth explicitly excludes NULL organism names. This is an error confined to the WHERE clause and can be fixed by adding the missing filter without changing the overall query structure.",
            "error_type": "Local: missing non-null filter"
        },
        "6ede363c7d448d9e34bdc68d": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the top five most frequent diagnoses that patients are given within 2 months after being diagnosed with body mass index 35.0-35.9, adult in 2100?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'body mass index 35.0-35.9, adult' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT DISTINCT di.subject_id, di.hadm_id, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code WHERE dicd.long_title = 'body mass index 35.0-35.9, adult' AND strftime('%Y', di.charttime) = '2100' ), subsequent_diagnoses AS ( SELECT DISTINCT di.subject_id, di.icd_code, dicd.long_title FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN initial_diagnosis id ON di.subject_id = id.subject_id AND di.hadm_id = id.hadm_id WHERE di.charttime > id.charttime AND di.charttime <= datetime(id.charttime, '+2 months') ) SELECT long_title FROM subsequent_diagnoses GROUP BY long_title ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['atrial fibrillation'], ['autistic disorder, current or active state'], ['long-term (current) use of anticoagulants'], ['personal history of sudden cardiac arrest'], ['postprocedural fever'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts subsequent diagnoses to the same hadm_id (hospital admission) as the initial BMI diagnosis, whereas the correct logic should consider any diagnosis within two months after the initial BMI diagnosis regardless of admission. This erroneous join condition fundamentally changes the query\u2019s intended meaning by excluding valid diagnoses in other admissions, requiring more than a localized fix.",
            "error_type": "Global: incorrect join condition"
        },
        "dca9777fd4184fbb7fc955d3": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What microbiology test did patient 10039831 undergo for the first time since 11/2100?",
            "gold_sql": "SELECT microbiologyevents.test_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-11' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE subject_id = 10039831 AND datetime(charttime) >= datetime('2100-12-31 23:59:00', 'start of year') AND strftime('%m', charttime) = '11' ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['mrsa screen']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only checks for month = '11' and charttime \u2265 start of the year, so it misses tests occurring in December 2100. The correct filter should include all dates from November 2100 onward (strftime('%Y-%m',charttime) \u2265 '2100-11'). This mistake is confined to the WHERE time\u2010filter logic and does not require restructuring joins or subqueries.",
            "error_type": "Local: other-temporal error"
        },
        "853d6c6aace2c37387e3042e": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me about the most common five output events in 2100.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT outputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM outputevents WHERE strftime('%Y',outputevents.charttime) = '2100' GROUP BY outputevents.itemid ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT itemid FROM outputevents WHERE strftime('%Y', charttime) = '2100' GROUP BY itemid ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['cerebral ventricular #1'], ['chest tube #1'], ['foley'], ['tf residual'], ['void']]",
            "pred_answer": "[['226559.0'], ['226560.0'], ['226588.0'], ['226606.0'], ['227510.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects the raw itemid from outputevents, failing to join to the d_items table to retrieve the event labels as required. This is a localized mistake in the SELECT clause/table reference, not a structural issue with the overall query logic or flow.",
            "error_type": "Local: table reference error"
        }
    },
    "deepseek": {
        "b66afc66d827b1620f9d2dd4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What label is associated with stool?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'stool'",
            "pred_sql": "SELECT label FROM d_labitems WHERE label = 'stool';",
            "gold_answer": "[['stool']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL is querying the wrong table (d_labitems) instead of d_items. The error lies in a local table reference\u2014using the incorrect table for the label \u201cstool.\u201d This does not require a restructuring of the overall query logic, only fixing the table reference.",
            "error_type": "Local: table reference error"
        },
        "b758cf0b3355a13805e92899": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Among patients who received other laparoscopic umbilical herniorrhaphy , what are the top four most frequent medications that followed within the same month?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other laparoscopic umbilical herniorrhaphy' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH filtered_procedures AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS procedure_time FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title = 'other laparoscopic umbilical herniorrhaphy' ) SELECT drug, COUNT(*) AS frequency FROM prescriptions WHERE EXISTS ( SELECT 1 FROM filtered_procedures fp WHERE prescriptions.hadm_id = fp.hadm_id AND datetime(prescriptions.starttime, 'start of month') = datetime(fp.procedure_time, 'start of month') ) GROUP;",
            "gold_answer": "[['bisacodyl']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL ends with \"GROUP;\" which is syntactically incorrect (missing GROUP BY clause and aggregation context). This is a localized syntax mistake in the GROUP clause and does not affect the overall logical structure or intended joins of the query.",
            "error_type": "Local: syntax error"
        },
        "ca991e457c98ee7a9ad5a817": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients since 1 year ago were diagnosed with other ascites within the same hospital visit after the diagnosis of unspecified viral hepatitis c without hepatic coma?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified viral hepatitis c without hepatic coma' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other ascites' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d1.subject_id) AS patient_count FROM diagnoses_icd d1 JOIN d_icd_diagnoses diag1 ON d1.icd_code = diag1.icd_code JOIN diagnoses_icd d2 ON d1.subject_id = d2.subject_id AND d1.hadm_id = d2.hadm_id JOIN d_icd_diagnoses diag2 ON d2.icd_code = diag2.icd_code WHERE diag1.long_title = 'unspecified viral hepatitis c without hepatic coma' AND diag2.long_title = 'other ascites' AND datetime(d2.charttime) > datetime(d1.charttime) AND datetime(d1.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') AND d1.hadm_id IN ( SELECT hadm_id FROM admissions WHERE datetime;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL ends abruptly in the WHERE clause with an incomplete IN subquery (`AND d1.hadm_id IN ( SELECT hadm_id FROM admissions WHERE datetime;`), leading to a syntax error near \";\". This is a localized mistake in the WHERE clause rather than a fundamental misunderstanding of the query\u2019s overall structure or logic.",
            "error_type": "Local: syntax error"
        },
        "1d46dfdd771d6cc0e5fd9703": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the cost of a acute pulmonary edema diagnosis?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'acute pulmonary edema' ) )",
            "pred_sql": "SELECT DISTINCT c.cost;",
            "gold_answer": "[['3.23']]",
            "pred_answer": "no such column: c.cost",
            "is_correct": false,
            "reasoning": "The predicted SQL \u2018SELECT DISTINCT c.cost;\u2019 references alias \u2018c\u2019 and column \u2018cost\u2019 without a FROM clause or table alias definition. This is a localized column/table reference issue and can be fixed by adding the proper FROM cost AS c and appropriate WHERE conditions. It does not alter overall query logic, only the SELECT clause\u2019s table reference. Therefore, it is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "04a0aa7059bb30df4a066313": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the name of the lab test that patient 10004733 last had on the current hospital visit?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT dl.label FROM labevents le JOIN admissions a ON le.hadm_id = a.hadm_id JOIN d_labitems dl ON le.itemid = dl.itemid WHERE le.subject_id = 10004733 AND a.dischtime IS NULL -- current hospital visit ORDER BY le.charttime DESC LIMIT 1;",
            "gold_answer": "[['creatine kinase (ck)']]",
            "pred_answer": "[['% hemoglobin a1c'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['albumin'], ['albumin'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['ammonia'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anticardiolipin antibody igg'], ['anticardiolipin antibody igm'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['base excess'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['c-reactive protein'], ['c3'], ['c4'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calculated total co2'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally the same as the ground truth\u2014it joins labevents to admissions and d_labitems\u2014but its WHERE and ORDER BY usage causes it to return all lab tests sharing the latest charttime rather than the single most recent test. This is a localized mistake in the filtering/ordering logic, not a fundamental join or structural error, so it can be fixed by refining the WHERE/ORDER BY clauses. Therefore it is a Local error.",
            "error_type": "Local: other filter/order error"
        },
        "3a03752928a5b5d2dc708683": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you show me the top three most frequent lab tests in 2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE strftime('%Y',labevents.charttime) = '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT T1.label, COUNT(*) as frequency FROM d_labitems T1 INNER JOIN labevents T2 ON T1.itemid = T2.itemid;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['sodium']]",
            "pred_answer": "[['phosphate', '26426.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the WHERE filter on year 2100, the GROUP BY clause, the DENSE_RANK/window function logic, and the ORDER/LIMIT to retrieve the top three tests. Instead, it returns an aggregate over the entire dataset without grouping or filtering, altering the query\u2019s fundamental logic and structure. Fixing this would require adding multiple clauses and restructuring the query, not just a minor tweak.",
            "error_type": "Global"
        },
        "a2cc63130b02eeb31226b139": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When did patient 10025612 receive the lr for the last time on 10/01/2100?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10025612 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'lr' AND d_items.linksto = 'inputevents' ) AND strftime('%Y-%m-%d',inputevents.starttime) = '2100-10-01' ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(starttime) AS last_lr_time FROM inputevents WHERE subject_id = 10025612 AND datetime(starttime,'start of day') = datetime('2100-10-01 00:00:00','start of day','-0 day');",
            "gold_answer": "[['2100-10-01 14:40:00']]",
            "pred_answer": "[['2100-10-01 23:05:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to filter inputevents for the \u2018lr\u2019 label (missing the JOIN/WHERE on d_items.itemid with label = 'lr') and also omits the hospital admission/ICU stay linkage; it simply uses subject_id and date. This is a localized mistake in the WHERE clause (missing condition), not a fundamental restructuring of the query\u2019s logic.",
            "error_type": "Local: missing filter on item"
        },
        "76f0161e2704b0fb1eaf5eae": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Count the number of patients for whom ibuprofen was prescribed within the same hospital visit post their diagnosis of panic disorder without agoraphobia.",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'panic disorder without agoraphobia' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'ibuprofen' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) AS patient_count FROM ( SELECT di.subject_id, MIN(di.charttime) AS first_diagnosis_time FROM diagnoses_icd di INNER JOIN d_icd_diagnoses dd ON di.icd_code = dd.icd_code WHERE dd.long_title = 'Panic disorder without agoraphobia' GROUP BY di.subject_id ) d INNER JOIN prescriptions p ON d.subject_id = p.subject_id AND d.first_diagnosis_time < p.start;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: p.start",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column p.start instead of the correct p.starttime in the prescriptions table. This is a simple column reference mistake and does not alter the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "3714936c91049926ca46bd85": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients died after having been diagnosed with delirium due to conditions classified elsewhere within the same month since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'delirium due to conditions classified elsewhere' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL AND datetime(patients.dod) >= datetime('2100-12-31 23:59:00','-1 year') ) as T3 ON T2.subject_id = T3.subject_id AND datetime(T2.charttime,'start of month') = datetime(T3.dod,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS num_patients FROM patients p JOIN diagnoses_icd d ON p.subject_id = d.subject_id JOIN d_icd_diagnoses icd ON d.icd_code = icd.icd_code WHERE icd.long_title = 'Delirium due to known physiological condition' AND datetime(p.dod) >= datetime('2100-12-31 23:59:00', '-1 year') AND STRFTIME('%Y-%;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "unrecognized token: \"'%Y-%;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with an \u201cunrecognized token\u201d error at the partially written STRFTIME format string (\u2018'%Y-%;\u2019). This is a localized syntax/value\u2010parsing issue in the WHERE clause (an improperly terminated string literal), not a deeper misjoin or structural logic flaw. It can be fixed by correctly completing the STRFTIME expression without changing the overall query structure.",
            "error_type": "Local: syntax error"
        },
        "3d5ccd254ab413fb1f98cd8a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you get me the top five most conducted lab tests?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT D.label, COUNT(L.itemid) AS test_count FROM labevents L JOIN d_labitems D ON L.itemid = D.itemid GROUP BY D.label ORDER BY test_count DESC LIMIT 5;",
            "gold_answer": "[['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '1004.0'], ['glucose', '1113.0'], ['hematocrit', '982.0'], ['hemoglobin', '985.0'], ['sodium', '1001.0']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly joins labevents to d_labitems and groups by label, but it uses COUNT and LIMIT to pick the top five and also returns the test_count column. According to the instructions, when asking for the top N items we should use DENSE_RANK() and return only the item names (no counts). This is a mistake localized to the SELECT and ordering logic\u2014it does not require restructuring joins or filters. Therefore, it\u2019s a local error with the ranking/selection clause.",
            "error_type": "Local: top-N ranking error"
        },
        "b39ceb0832a83e0203b3da54": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can i know the 3-month survival rate of patients diagnosed with unspecified disorder of kidney and ureter who were then prescribed sodium bicarbonate?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T3.charttime) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disorder of kidney and ureter' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'sodium bicarbonate' GROUP BY admissions.subject_id HAVING MAX(prescriptions.starttime) = prescriptions.starttime ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime ) AS T3 JOIN patients ON T3.subject_id = patients.subject_id )",
            "pred_sql": "WITH diagnosed_patients AS ( SELECT DISTINCT p.subject_id, p.dod, datetime(d_i.charttime, '3 months') AS three_month_cutoff FROM patients p JOIN diagnoses_icd d ON p.subject_id = d.subject_id JOIN d_icd_diagnoses d_i ON d.icd_code = d_i.icd_code WHERE d_i.long_title = 'Unspecified disorder of kidney and ureter' ), prescribed_patients AS ( SELECT DISTINCT dp.subject_id, dp.dod, dp.three_month_cutoff FROM diagnosed_patients dp JOIN prescriptions pr ON dp.subject_id = pr.subject_id WHERE pr.drug = 'sodium bicarbonate' ) SELECT ( COUNT(CASE WHEN (prescribed_patients.dod IS NULL OR prescribed_patients.dod > prescribed_pat;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with a syntax error near the end of the query (an incomplete CASE expression and missing closing parentheses), indicating a localized syntax issue. This does not reflect a fundamental problem with the overall query logic or structure but rather an isolated mistake in the SELECT clause that can be corrected without reworking the entire query.",
            "error_type": "Local: syntax error"
        },
        "df464401c02feccf6383ef33": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients were treated with reopening of recent thoracotomy site two or more times in 2100?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'reopening of recent thoracotomy site' ) AND strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "found in the output",
            "gold_answer": "[['0.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted query failed to return any result (null) and did not execute correctly, which strongly indicates a localized issue such as a missing or incorrect column/table reference or alias rather than a fundamental restructuring of the query logic. The overall purpose\u2014counting patients with two or more specific procedures in 2100\u2014is intact, but a small reference or syntax mistake prevents execution. Such an error is local to one clause and can be fixed without altering the query\u2019s high-level structure.",
            "error_type": "Local: table/column reference error"
        },
        "60ccf86e473e44100cc743fb": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many days have gone by since the first instance patient 10018081 had a nacl 0.9% input on the current icu visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', inputevents.starttime) ) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'nacl 0.9%' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime ASC LIMIT 1",
            "pred_sql": "WITH current_icu_stay AS ( SELECT stay_id, intime, outtime FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL ), first_nacl_input AS ( SELECT MIN(ie.starttime) AS first_input_time FROM inputevents ie JOIN d_items di ON ie.itemid = di.itemid WHERE ie.subject_id = 10018081 AND di.label = 'NaCl 0.9%' AND ie.stay_id = (SELECT stay_id FROM current_icu_stay) ) SELECT (strftime('%J', '2100-12-31 23:59:00');",
            "gold_answer": "[['5.286']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically incomplete: the SELECT clause ends abruptly with an unclosed strftime call and missing subtraction, parenthesis, and FROM clause. This is a localized syntax error that does not affect the overall query structure or logic and can be fixed by completing the SELECT expression.",
            "error_type": "Local: syntax error"
        },
        "9617359518e066b622aeda10": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me the length of stay in days for patient 10008454's first hospital stay.",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10008454 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%j', dischtime) - strftime('%j', admittime)) + 1 AS length_of_stay FROM admissions WHERE subject_id = 10008454 ORDER BY admittime ASC LIMIT 1;",
            "gold_answer": "[['10.39']]",
            "pred_answer": "[['11.39']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake is confined to the way it computes the date difference: it uses the '%j' specifier (day of year) instead of '%J' (Julian day) and arbitrarily adds +1, rather than correctly subtracting Julian days. This is a localized error in the temporal calculation and does not affect other parts of the query\u2019s structure or logic.",
            "error_type": "Local: other-temporal error"
        },
        "b509e60b483133af19e1b147": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "On the last hospital visit, when was the last prescription of fexofenadine issued for patient 10019385?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019385 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'fexofenadine' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(p.starttime) AS last_prescription_time FROM prescriptions p WHERE p.subject;",
            "gold_answer": "[['2100-03-24 04:00:00']]",
            "pred_answer": "no such column: p.subject",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column p.subject, causing a column reference error. This is a localized mistake in the WHERE clause and can be fixed without changing the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "6742c3d30fd69b03c9e78123": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you provide the top three most frequent lab tests that within the same month took place after patients had other repair of vessel since 1 year ago?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other repair of vessel' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH vessel_repair AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS procedure_date FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title = 'other repair of vessel' AND p.charttime >= '2099-12-31' AND p.charttime <= '2100-12-31' ) SELECT DISTINCT dl.label AS lab_test FROM labevents le JOIN d_labitems dl ON le.itemid =;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error in the JOIN clause: 'JOIN d_labitems dl ON le.itemid =;'. It is missing the right-hand side of the equality (dl.itemid) and thus fails to form a valid join. This is a localized mistake in the JOIN syntax and does not reflect a broader logical misunderstanding of the query\u2019s intent.",
            "error_type": "Local: syntax error"
        },
        "89925fb3592920a7a4050587": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me what was the total rectal tube output of patient 10029291 on the last icu visit.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'rectal tube' AND d_items.linksto = 'outputevents' )",
            "pred_sql": "SELECT SUM(oe.value) AS total_rectal_tube_output FROM outputevents oe INNER JOIN d_items di ON oe.itemid = di.itemid INNER JOIN icustays icu ON oe.stay_id = icu.stay_id WHERE oe.subject_id;",
            "gold_answer": "[['775.0']]",
            "pred_answer": "[['693796.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to implement the core logic of the question: it does not filter for the specific patient (10029291), does not restrict to the last ICU stay, and does not filter outputevents to \u2018rectal tube\u2019 via d_items.label and linksto. Instead, it leaves an incomplete WHERE clause and broadly joins all stays and all outputevents. These omissions break the overall query structure and intended multi\u2010step filtering logic, so it is not a localized typo or minor JOIN mistake but a fundamental logical/structural error.",
            "error_type": "Global: missing filter conditions"
        },
        "19a8fffa7488ae211db659dc": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Please provide the count of patients who had a solution input in 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT icustays.hadm_id FROM icustays WHERE icustays.stay_id IN ( SELECT inputevents.stay_id FROM inputevents WHERE inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'solution' AND d_items.linksto = 'inputevents' ) AND strftime('%Y',inputevents.starttime) = '2100' ) )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) AS count FROM inputevents WHERE STRFTIME('%Y', starttime) = '2100';",
            "gold_answer": "[['49.0']]",
            "pred_answer": "[['76.0']]",
            "is_correct": false,
            "reasoning": "The predicted query simply counts distinct subject_id from inputevents in 2100 without restricting to 'solution' inputs or joining through d_items and icustays/admissions. It omits the necessary join to d_items (to filter on label = 'solution' and linksto = 'inputevents') and the layers of mapping from inputevents.stay_id to admissions.subject_id via icustays/admissions. This is not a minor syntactic mistake but a fundamental logical omission affecting multiple parts of the query, altering the intended meaning and resulting dataset.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "e2c04c788a4363be6217e18e": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the output of patient 10019172 for the last time on last month/20?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019172 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '20' ORDER BY outputevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT outputevents.subject_id, outputevents.charttime, outputevents.value, outputevents.valueuom FROM outputevents WHERE outputevents.subject_id = 10019172 AND datetime(outputevents.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', outputevents.charttime) = '20' ORDER BY outputevents.charttime DESC;",
            "gold_answer": "[['foley']]",
            "pred_answer": "[['10019172.0', '2100-11-20 11:30:00', '120.0', 'ml'], ['10019172.0', '2100-11-20 11:30:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 11:30:00', '700.0', 'ml'], ['10019172.0', '2100-11-20 11:45:00', '10.0', 'ml'], ['10019172.0', '2100-11-20 12:06:00', '270.0', 'ml'], ['10019172.0', '2100-11-20 12:07:00', '15.0', 'ml'], ['10019172.0', '2100-11-20 12:35:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 13:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 13:00:00', '340.0', 'ml'], ['10019172.0', '2100-11-20 14:00:00', '400.0', 'ml'], ['10019172.0', '2100-11-20 14:00:00', '70.0', 'ml'], ['10019172.0', '2100-11-20 15:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 15:00:00', '290.0', 'ml'], ['10019172.0', '2100-11-20 16:00:00', '250.0', 'ml'], ['10019172.0', '2100-11-20 16:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 17:01:00', '160.0', 'ml'], ['10019172.0', '2100-11-20 17:01:00', '50.0', 'ml'], ['10019172.0', '2100-11-20 18:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 18:00:00', '80.0', 'ml'], ['10019172.0', '2100-11-20 19:00:00', '100.0', 'ml'], ['10019172.0', '2100-11-20 19:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 20:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 20:00:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 21:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 21:00:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 22:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 22:00:00', '32.0', 'ml'], ['10019172.0', '2100-11-20 23:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 23:00:00', '25.0', 'ml']]",
            "is_correct": false,
            "reasoning": "The predicted SQL drops the critical joins through admissions \u2192 icustays \u2192 outputevents and instead filters only by subject_id, completely altering which outputevents are considered. It also selects the wrong columns (subject_id, charttime, value, valueuom) rather than the item label via d_items, and omits the LIMIT 1. These mistakes span multiple clauses (SELECT, FROM, WHERE, ORDER BY) and require substantial restructuring of the query\u2019s logic and joins to fix. Hence this is a Global error.",
            "error_type": "Global: incorrect join and selection logic"
        },
        "e084964b489539a157781a7c": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the minimum total cost of the hospital visits that involve a procedure called excision of right saphenous vein, percutaneous endoscopic approach since 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' ) ) AND strftime('%Y',cost.chargetime) >= '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS minimum_total_cost FROM ( SELECT c.hadm_id, SUM(c.cost) AS total_cost FROM cost c JOIN procedures_icd p ON c.event_id = p.row_id AND c.event_type = 'procedures_icd' JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' AND p.charttime >= '2100-01-01' GROUP BY c.hadm_id ) ;",
            "gold_answer": "[['3838.48']]",
            "pred_answer": "[['8.6']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only sums costs directly tied to the specific procedure (c.event_type = 'procedures_icd' joined to procedures_icd) and filters by the procedure\u2019s charttime, rather than summing all costs (diagnoses, procedures, labs, prescriptions) for the entire hospital admission and filtering by cost.chargetime\u2019s year as required. This error alters the overall logic and intended aggregation scope of the query, requiring a structural rewrite to include all cost event types and apply the correct date filter. Hence, it is not a localized mistake but a fundamental logical/structural flaw.",
            "error_type": "Global: join logic error"
        },
        "bd6c49415465061eab77c735": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Count the number of patients diagnosed with catatonic type schizophrenia, unspecified who did not come to the hospital within 2 months this year.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'catatonic type schizophrenia, unspecified' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'catatonic type schizophrenia, unspecified' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND datetime(admissions.admittime) BETWEEN datetime(T2.charttime) AND datetime(T2.charttime,'+2 month') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS patient_count FROM patients p JOIN diagnoses_icd d_icd ON p.subject_id = d_icd.subject_id JOIN d_icd_diagnoses diag ON d_icd.icd_code = diag.icd_code LEFT JOIN admissions a ON p.subject_id = a.subject_id WHERE diag.long_title = 'catatonic type schizophrenia, unspecified' AND a.admittime > '2100-10-01 00:00:00' OR a.admittime IS NULL;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to capture the intended logic in several fundamental ways. It does not restrict diagnoses to this year, uses a hard\u2010coded date literal ('2100-10-01') instead of dynamically computing the two\u2010month window from each diagnosis date, misplaces the OR condition (causing incorrect inclusion of patients with no admissions), and generally restructures the joins to patients and admissions incorrectly. These issues span multiple clauses (JOINs, WHERE filters, date computations) and require a wholesale rewrite of the query logic rather than a simple fix in one clause.",
            "error_type": "Global: logical inconsistency across query components"
        },
        "64e7a22499d22f3fb08f2d4b": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many days have passed since patient 10018081's last fentanyl (concentrate) input on the current icu visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', inputevents.starttime) ) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'fentanyl (concentrate)' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%j', '2100-12-31 23:59:00') - strftime('%j', inputevents.charttime)) + 1 AS days_since_last_fentanyl FROM inputevents INNER JOIN d_items ON inputevents.itemid = d_items.itemid INNER JOIN icustays ON inputevents.stay_id = icustays.stay_id WHERE inputevents.subject_id = 10018081 AND d_items.label = 'fentanyl (concentrate)' AND icustays.outtime IS NULL ORDER BY inputevents.charttime DESC LIMIT 1;",
            "gold_answer": "[['0.65']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references inputevents.charttime, but the inputevents table uses starttime rather than charttime. This is a simple column reference mistake that does not alter the overall query logic or require restructuring of joins or clauses.",
            "error_type": "Local: table/column reference error"
        },
        "0db05ea2dbcb66080ccf5a6a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Could you tell me the minimum total cost in a hospital where clopidogrel is used in 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'clopidogrel' ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "WITH clopidogrel_cost AS ( SELECT p.hadm_id, SUM(c.cost) AS total_cost FROM prescriptions p JOIN cost c ON p.hadm_id = c.hadm_id AND p.row_id = c.event_id AND c.event_type = 'prescriptions' WHERE p.drug = 'clopidogrel' AND datetime(p.starttime) >= datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY p.hadm_id ) SELECT MIN(total_cost) AS minimum_total_cost FROM clopidogrel_cost;",
            "gold_answer": "[['318.21']]",
            "pred_answer": "[['12.3']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by joining only prescription-related cost entries (using c.event_type = 'prescriptions' and linking p.row_id to c.event_id) rather than summing all costs for each admission as in the ground truth. Additionally, it filters on prescription start times instead of cost charge times and fails to group all event types. These alterations span multiple clauses and require restructuring the query\u2019s overall approach, not just a localized fix.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "337b3c0f7c09a85625de5909": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly received procedures for patients in their 40s since 1 year ago?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 40 AND 49 ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT p.long_title AS procedure_name FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id JOIN d_icd_procedures p ON procedures_icd.icd_code = p.icd_code WHERE admissions.age BETWEEN 40 AND 49 AND procedures_icd.charttime >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY p.long_title ORDER BY COUNT(p.long_title) DESC LIMIT 4;",
            "gold_answer": "[['anastomosis of hepatic duct to gastrointestinal tract'], ['arterial catheterization'], ['bypass coronary artery, one artery from aorta with autologous venous tissue, open approach'], ['bypass coronary artery, one artery from left internal mammary with autologous arterial tissue, open approach'], ['bypass coronary artery, one artery from left internal mammary, open approach'], ['bypass coronary artery, one artery from right internal mammary with autologous arterial tissue, open approach'], ['central venous catheter placement with guidance'], ['closed [endoscopic] biopsy of bronchus'], ['colonoscopy'], ['combined right and left heart angiocardiography'], ['combined right and left heart cardiac catheterization'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['diagnostic ultrasound of abdomen and retroperitoneum'], ['diagnostic ultrasound of heart'], ['dilation of left common iliac vein with intraluminal device, percutaneous approach'], ['dilation of left external iliac vein with intraluminal device, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach, diagnostic'], ['drainage of left upper lobe bronchus, via natural or artificial opening endoscopic, diagnostic'], ['drainage of pericardial cavity with drainage device, percutaneous approach'], ['endoscopic insertion of stent (tube) into bile duct'], ['enteral infusion of concentrated nutritional substances'], ['esophagogastroduodenoscopy [egd] with closed biopsy'], ['excision of cerebral ventricle, open approach'], ['excision of left internal mammary artery, percutaneous endoscopic approach'], ['excision of left upper lobe bronchus, via natural or artificial opening endoscopic, diagnostic'], ['excision of other bile duct'], ['excision of right internal mammary artery, percutaneous endoscopic approach'], ['excision of right saphenous vein, percutaneous endoscopic approach'], ['extirpation of matter from coronary artery, one artery, percutaneous approach'], ['extirpation of matter from left common iliac vein, percutaneous approach'], ['extirpation of matter from left femoral vein, percutaneous approach'], ['fluoroscopy of multiple coronary arteries using low osmolar contrast'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['hemodialysis'], ['insertion of biological graft'], ['insertion of endotracheal airway into trachea, via natural or artificial opening'], ['insertion of endotracheal tube'], ['insertion of infusion device into superior vena cava, percutaneous approach'], ['insertion of intraluminal device into inferior vena cava, percutaneous approach'], ['insertion or replacement of other neurostimulator pulse generator'], ['introduction of nutritional substance into lower gi, via natural or artificial opening'], ['introduction of other therapeutic substance into heart, open approach'], ['introduction of other thrombolytic into peripheral vein, percutaneous approach'], ['laparoscopic cholecystectomy'], ['laparoscopic robotic assisted procedure'], ['measurement of cardiac sampling and pressure, left heart, percutaneous approach'], ['partial hepatectomy'], ['percutaneous abdominal drainage'], ['percutaneous balloon valvuloplasty'], ['performance of cardiac output, continuous'], ['repair of rectocele with graft or prosthesis'], ['respiratory ventilation, greater than 96 consecutive hours'], ['right heart cardiac catheterization'], ['simple excision of other lymphatic structure'], ['thoracentesis']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left common iliac vein, percutaneous approach'], ['performance of cardiac output, continuous']]",
            "is_correct": false,
            "reasoning": "The predicted query filters by age and time correctly and joins the right tables, so the table relationships and WHERE clauses are fine. The only mistake is that it uses GROUP BY + ORDER BY \u2026 LIMIT 4 instead of applying DENSE_RANK() to include all procedures tied for the top four counts. This is a localized error in the ranking/selection logic and does not require restructuring joins or filters.",
            "error_type": "Local: ranking error"
        },
        "a695186175740a8315577534": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me the total number of patients who were diagnosed with resistance to other specified antimicrobial drug since 1 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'resistance to other specified antimicrobial drug' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT di.subject_id) FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'Resistance to other specified antimicrobial drug' AND di.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it filters on d.long_title = 'Resistance to other specified antimicrobial drug' (capital \u201cR\u201d), whereas the actual title in d_icd_diagnoses is all lowercase (\u2018resistance to other specified antimicrobial drug\u2019). This literal mismatch causes zero rows. This is a localized error in the WHERE clause\u2019s value comparison and can be fixed by correcting the string case, without restructuring the query.",
            "error_type": "Local: value parsing error"
        },
        "0fd90a367f11fe444686ab3e": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How long has it been in hours since the last time iso-osmotic dextrose was prescribed to patient 10021487 on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'iso-osmotic dextrose' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', (SELECT MAX(p.starttime) FROM prescriptions p JOIN admissions a ON p.hadm_id = a.hadm_id WHERE p.subject_id = '10021487' AND a.dischtime IS NULL AND p.drug LIKE '%iso-osmotic dextrose%'))) / 3600 AS hours_since_last_dose ;",
            "gold_answer": "[['463.983']]",
            "pred_answer": "[['463.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs only in how it computes the time difference: it uses strftime('%s') and integer division by 3600, which truncates the fractional hours, instead of the ground truth\u2019s use of Julian days (strftime('%J') difference \u00d724) that preserves the fractional part. All joins, filters, and the overall query structure (focusing on current admission for patient 10021487 and the last \u2018iso\u2010osmotic dextrose\u2019 prescription) remain correct. This miscalculation is confined to the SELECT expression and does not require altering join logic or query flow.",
            "error_type": "Local: other-temporal error"
        },
        "d705db837f97e8a0a6b440f1": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the top four most frequently tested specimens for patients within 2 months after receiving colonoscopy?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'colonoscopy' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH colonoscopy_patients AS ( SELECT p.subject_id, p.hadm_id;",
            "gold_answer": "[['serology/blood']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error near the semicolon after the WITH clause. This is a localized syntax mistake and does not reflect a misunderstanding of the overall query logic or structure.",
            "error_type": "Local: syntax error"
        },
        "3baa8855ca9c0ddb5058fd05": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "List the top four most common lab tests for patients with age 60 or above, since 2100.",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',labevents.charttime) >= '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT T3.label AS lab_test, COUNT(T1.row_id) AS test_count FROM labevents T1 INNER JOIN admissions T2 ON T1.hadm_id = T2.hadm_id INNER JOIN d_labitems T3 ON T1.itemid = T3.itemid WHERE T2.age >= 60 AND T1.charttime >= '2100-01-01' GROUP BY T3.label ORDER BY test_count DESC LIMIT 4;",
            "gold_answer": "[['bicarbonate'], ['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '497.0'], ['glucose', '538.0'], ['hemoglobin', '478.0'], ['sodium', '497.0']]",
            "is_correct": false,
            "reasoning": "The error lies in how the top\u2010four ranking is performed. The ground truth uses DENSE_RANK() over the counts to include all items tied in the top four, per the SQL assumptions. The predicted query instead simply orders by count and applies LIMIT 4, which is a localized mistake in the ranking method\u2014it doesn\u2019t require restructuring joins, filters, or aggregation logic. This is a local error in the ranking approach.",
            "error_type": "Local: ranking error"
        },
        "14963d54e851b1a8321a7a99": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "On the current hospital visit, how many hours have passed since the first time patient 10018081 was prescribed iso-osmotic sodium chloride?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'iso-osmotic sodium chloride' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "WITH current_admission AS ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) SELECT ( (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', ( SELECT MIN(starttime) FROM prescriptions WHERE subject_id = 10018081 AND hadm_id = (SELECT hadm_id FROM current_admission) AND drug = 'iso-osmotic sodium chloride' ))) / 3600 ) AS hours_since_first_prescription;",
            "gold_answer": "[['125.983']]",
            "pred_answer": "[['125.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses Unix timestamps divided by 3600 to calculate hours, which in SQLite results in truncation to one decimal place (125.0) instead of the more precise 125.983 derived from Julian day differences. This is a localized arithmetic/precision issue in the SELECT clause and does not affect the overall query structure or table relationships.",
            "error_type": "Local: arithmetic precision error"
        },
        "c9f901a6e53a46bf81c90cce": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly prescribed drugs for patients diagnosed with alcohol abuse, unspecified previously within the same hospital visit, since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'alcohol abuse, unspecified' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT drug, COUNT(*) as count FROM prescriptions JOIN diagnoses_icd ON prescriptions.hadm_id = diagnoses_icd.hadm_id WHERE diagnoses_icd.icd_code = 'icd10|F10.9' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY drug ORDER BY count DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['furosemide'], ['lactated ringers']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the step of linking each prescription to the specific diagnosis event (it only joins on hadm_id, not subject_id, and never enforces that the prescription occurs after the alcohol\u2010abuse diagnosis). It also fails to restrict the diagnosis itself to within the past year. These missing filters and the flawed join strategy fundamentally change the query\u2019s logic (it will count any prescriptions in the past year for any patient admissions with an alcohol\u2010abuse code, even if the diagnosis happened long before or the prescription preceded the diagnosis). Fixing this would require restructuring multiple clauses, not just tweaking a single condition. Therefore it\u2019s a Global error.",
            "error_type": "Global: join/logical error"
        },
        "151e424b9f47e06ee148af99": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How much does the diastolic blood pressure change in patient 10018081 last measured on the current icu visit compared to the second to last value measured on the current icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT (SELECT valueuom FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic')) AS unit, (SELECT valuenum FROM ( SELECT charttime, valuenum FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL) ORDER BY charttime DESC LIMIT 1 )) AS last_value, (SELECT valuenum FROM ( SELECT charttime, valuenum FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL) ORDER BY charttime DESC;",
            "gold_answer": "[['11.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is truncated and has a syntax error near the final ORDER BY clause (missing closing parentheses, missing OFFSET and LIMIT termination). This is a localized syntax issue that does not alter the overall intended query logic structure. Therefore, it is a local error.",
            "error_type": "Local: syntax error"
        },
        "31688f4b90a3c63cc2398fdb": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me patient 10018081's maximum systolic blood pressure yesterday.",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT MAX(ce.valuenum) AS max_sys_bp FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10018081 AND di.label = 'arterial blood pressure systolic' AND date(ce.charttime) = date('2100-12-31') - 1;",
            "gold_answer": "[['163.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails only in its WHERE clause date filtering: it uses `date(ce.charttime) = date('2100-12-31') - 1`, which is not a valid SQLite expression for \u201cyesterday,\u201d causing no rows to match. The rest of the query structure (joins, column references) is correct. This is a localized error in the temporal comparison logic that can be fixed by using the proper SQLite date/datetime functions.",
            "error_type": "Local: other-temporal error"
        },
        "00d61436285ea67a4cc11692": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you list patient 10026354's monthly maximum mch on the first hospital visit?",
            "gold_sql": "SELECT MAX(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026354 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mch' ) GROUP BY strftime('%Y-%m',labevents.charttime)",
            "pred_sql": "SELECT STRFTIME('%Y-%m', T1.charttime) AS month, MAX(T1.valuenum) AS max_mch FROM labevents T1 INNER JOIN d_labitems T2 ON T1.itemid = T2.itemid INNER JOIN admissions T3 ON T1.hadm_id = T3.hadm_id WHERE T1.subject_id = 10026354 AND T2.label = 'MetHemoglobin' AND T1.charttime >= T3.admittime AND T1.charttime <= T3.dischtime AND T3.ad;",
            "gold_answer": "[['32.2'], ['32.3']]",
            "pred_answer": "no such column: T3.ad",
            "is_correct": false,
            "reasoning": "The predicted SQL has an incorrect column reference in the WHERE clause (AND T3.ad;) which leads to the error 'no such column: T3.ad'. This is a localized syntax/column reference mistake and does not require restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "9e6090c8b2d81c03a6dec400": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients had a diagnosis of accidents occurring in other specified places in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'accidents occurring in other specified places' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT di.subject_id) AS patient_count FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'Accidents occurring in other specified places' AND strftime('%Y', di.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on d.long_title = 'Accidents occurring in other specified places' (with a capital 'A') while the actual title in the database is 'accidents occurring in other specified places' (all lowercase). This is a simple string literal mismatch and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "2514305815b6187c8662dbe7": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What drugs are being added to patient 10018081's prescription today, as compared to yesterday?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT DISTINCT T1.drug AS today_drugs FROM prescriptions T1 LEFT JOIN prescriptions T2 ON T1.drug = T2.drug WHERE T1.subject_id = '10018081' AND datetime(T1.starttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-0 day') AND T2.subject_id = '10018081' AND datetime(T2.starttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day') AND T2.drug IS NULL;",
            "gold_answer": "[['furosemide'], ['soln']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query attempts to use a LEFT JOIN to exclude drugs present yesterday, but it incorrectly places the T2 (yesterday) filters in the WHERE clause instead of the JOIN condition. Because the WHERE predicates on T2 force non-null matches, the LEFT JOIN devolves into no rows. This is a localized mistake in the JOIN logic rather than a fundamental change to the overall structure or table relationships. Therefore, it is a Local error in the JOIN clause.",
            "error_type": "Local: join condition error"
        },
        "d72bd1edb7343d7eb4fffd28": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "List the top three most common diagnoses for patients in their 40s this year.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 40 AND 49 ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d.long_title, COUNT(*) as diagnosis_count FROM admissions a JOIN diagnoses_icd di ON a.hadm_id = di.hadm_id JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE a.age BETWEEN 40 AND 49 AND strftime('%Y', a.admittime) = '2100' GROUP BY d.long_title ORDER BY diagnosis_count DESC LIMIT 3;",
            "gold_answer": "[['acidosis'], ['acute and subacute necrosis of liver'], ['acute kidney failure with lesion of tubular necrosis'], ['acute on chronic diastolic heart failure'], ['acute respiratory failure'], ['alcohol abuse, unspecified'], ['alkalosis'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['atherosclerotic heart disease of native coronary artery with unstable angina pectoris'], ['atrial fibrillation'], ['autistic disorder, current or active state'], ['cocaine abuse, unspecified'], ['congestive heart failure, unspecified'], ['depressive disorder, not elsewhere classified'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['epilepsy, unspecified, not intractable, without status epilepticus'], ['esophageal reflux'], ['gastro-esophageal reflux disease without esophagitis'], ['gout, unspecified'], ['hyperlipidemia, unspecified'], ['long term (current) use of antithrombotics/antiplatelets'], ['long-term (current) use of anticoagulants'], ['mitral valve disorders'], ['personal history of sudden cardiac arrest'], ['postinflammatory pulmonary fibrosis'], ['presence of coronary angioplasty implant and graft'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['depressive disorder, not elsewhere classified', '4.0'], ['gout, unspecified', '3.0'], ['unspecified essential hypertension', '4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on admissions.admittime\u2019s year rather than diagnoses_icd.charttime as required by the specification for \u201cthis year.\u201d This mistake is confined to the WHERE clause\u2019s time condition and does not alter the overall join structure or logic of the query beyond that localized part. Hence, it is a Local error.",
            "error_type": "Local: wrong time filter"
        },
        "d855b598f5dd0b2f67356e05": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you provide me the daily minimum amount of jevity 1.2 (full) that patient 10027602 received last month?",
            "gold_sql": "SELECT MIN(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'jevity 1.2 (full)' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') GROUP BY strftime('%Y-%m-%d',inputevents.starttime)",
            "pred_sql": "SELECT DATE(charttime) AS date, MIN(totalamount) AS daily_min_amount, totalamountuom AS unit FROM inputevents INNER JOIN d_items ON inputevents.itemid = d_items.itemid WHERE subject_id = 10027602 AND label = 'jevity 1.2 (full)';",
            "gold_answer": "[['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0']]",
            "pred_answer": "no such column: charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references inputevents.charttime, but the inputevents table has no charttime column (it uses starttime). This is a simple table/column reference error that can be fixed locally without altering the overall query structure.",
            "error_type": "Local: table/column reference error"
        },
        "db61cc4531aaaed9ef749ac0": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients died after having been diagnosed with other specified places as the place of occurrence of the external cause within the same hospital visit?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime, T1.hadm_id FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified places as the place of occurrence of the external cause' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL ) as T3 ON T2.subject_id = T3.subject_id AND T2.hadm_id = T3.hadm_id",
            "pred_sql": "SELECT COUNT(D;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is just \u201cSELECT COUNT(D;\u201d which is an incomplete statement causing a \u201cnear \";\": syntax error\u201d. This error is confined to the SELECT clause and is purely a syntax mistake, not affecting the overall logic or table relationships. It can be fixed by completing the SELECT expression without altering the query\u2019s structure.",
            "error_type": "Local: syntax error"
        },
        "ddd17c278574bd46086445c4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How much is the total hospital cost of patient 10001217 during the stay on the last hospital visit?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10001217 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 )",
            "pred_sql": "SELECT SUM(c.cost) AS total_cost FROM cost c WHERE c.subject_id = 10001217 AND c.hadm_id = ( SELECT a.hadm_id FROM admissions a WHERE a.subject_id = 10001217 AND a.dischtime IS NOT NULL ORDER BY a.admittime DESC LIMIT 1;",
            "gold_answer": "[['1721.22']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error: it is missing a closing parenthesis for the subquery and the semicolon is placed incorrectly. This is a localized mistake in the WHERE clause/subquery and does not alter the overall query structure or logic.",
            "error_type": "Local: syntax error"
        },
        "14430504237cf96f2e13d98f": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Find the length in days of the last icu stay for patient 10012552.",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10012552 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND( (strftime('%j', datetime(outtime)) - strftime('%j', datetime(intime))) + ((strftime('%H:%M:%S', outtime) - strftime('%H:%M:%S', intime)) > '00:00:00'), 0 ) AS length_days FROM icustays WHERE subject_id = 10012552 AND outtime IS NOT NULL ORDER BY intime DESC LIMIT 1;",
            "gold_answer": "[['3.159']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main mistake lies in the SELECT expression: it uses strftime('%j') (day of year) and a custom boolean time\u2010offset trick, rather than strftime('%J') (Julian day) to compute precise day differences. This error is confined to the date arithmetic in the SELECT clause and does not alter the overall query structure (joins, filters, ordering remain intact). Therefore, it is a local error in the temporal calculation.",
            "error_type": "Local: other-temporal error"
        },
        "362a8ded48a53e28746e95a6": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the first time that patient 10004422 was prescribed quetiapine fumarate and olanzapine on the last hospital visit at the same time?",
            "gold_sql": "SELECT T1.starttime FROM ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'olanzapine' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'quetiapine fumarate' ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) ORDER BY T1.starttime ASC LIMIT 1",
            "pred_sql": "WITH last_hospital_visit AS ( SELECT hadm_id, admittime, dischtime FROM admissions WHERE subject_id = 10004422 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ), prescriptions_quetiapine AS ( SELECT starttime, stoptime FROM prescriptions WHERE drug = 'quetiapine fumarate' AND hadm_id = (SELECT hadm_id FROM last_hospital_visit) ), prescriptions_olanzapine AS ( SELECT starttime, stoptime FROM prescriptions WHERE drug = 'olanzapine' AND hadm_id = (;",
            "gold_answer": "[['2100-01-23 10:00:00']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error near the semicolon: the CTE for prescriptions_olanzapine ends with \"AND hadm_id = (;\". This is a localized syntax issue that can be fixed by completing the parentheses and CTE definition without altering the overall query logic.",
            "error_type": "Local: syntax error"
        },
        "7f7a2b9981a742290a99b704": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the top five most frequent specimens ordered for patients after being diagnosed with type 2 diabetes mellitus with foot ulcer within 2 months since 1 year ago?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'type 2 diabetes mellitus with foot ulcer' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH diabetes_foot_ulcer_patients AS ( SELECT DISTINCT d1.subject_id FROM diagnoses_icd d1 JOIN d_icd_diagnoses dd1 ON d1.icd_code = dd1.icd_code WHERE dd1.long_title LIKE '%type 2 diabetes mellitus%' AND d1.charttime >= datetime('2100-12-31 23:59:00', '-1 year') AND EXISTS ( SELECT 1 FROM diagnoses_icd d2 JOIN d_icd_diagnoses dd2 ON d2.icd_code = dd2.icd_code WHERE d2.subject_id = d1.subject_id AND dd2.long_title LIKE '%foot ulcer;",
            "gold_answer": "[['blood culture'], ['mrsa screen'], ['sputum'], ['stool'], ['urine']]",
            "pred_answer": "unrecognized token: \"'%foot ulcer;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically invalid: it\u2019s truncated after LIKE '%foot ulcer; without closing quotes or parentheses, leading to an 'unrecognized token' error. This is a localized syntax/parse error in the WHERE clause and does not involve the overall structure or logic of the query.",
            "error_type": "Local: syntax error"
        },
        "b1f566e193cdc79472613929": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the last time that patient 10019917 was diagnosed with acute posthemorrhagic anemia since 1 year ago?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'acute posthemorrhagic anemia' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019917 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(d.charttime) AS last_diagnosis_time FROM diagnoses_icd d JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code WHERE d.subject_id = 10019917 AND dd.long_title = 'Acute posthemorrhagic anemia' AND d.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['2100-01-27 23:25:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on dd.long_title = 'Acute posthemorrhagic anemia' (capitalized 'A') while the actual title in the database is lowercase 'acute posthemorrhagic anemia'. This mismatch in the string literal prevents any rows from matching. All other aspects of the query structure remain correct, and no global restructuring of joins or logic is needed\u2014only the literal in the WHERE clause must be corrected.",
            "error_type": "Local: value parsing error"
        },
        "8f66cff426954aa52eb928cc": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What drug was prescribed to patient 10004720 within the same day subsequent to their age-related osteoporosis without current pathological fracture diagnosis this month?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT diagnoses_icd.subject_id, diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'age-related osteoporosis without current pathological fracture' ) AND datetime(diagnoses_icd.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of day') = datetime(T2.starttime,'start of day')",
            "pred_sql": "SELECT DISTINCT p.drug FROM prescriptions p JOIN diagnoses_icd d_icd ON p.hadm_id = d_icd.hadm_id AND p.subject_id = d_icd.subject_id JOIN d_icd_diagnoses diag ON d_icd.icd_code = diag.icd_code WHERE diag.long_title LIKE '%age-related osteoporosis%' AND p.starttime >= datetime(d_icd.charttime, 'start of day') AND p.starttime < datetime(d_icd.charttime, 'start of day', '+1 day') AND datetime(d_icd.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND NOT EXISTS ( SELECT;",
            "gold_answer": "[['0.9% sodium chloride'], ['bag'], ['bag'], ['bag'], ['famotidine'], ['midazolam'], ['soln']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically incomplete\u2014 it ends abruptly with \"AND NOT EXISTS ( SELECT;\" and has no closing parentheses or subquery, resulting in a SQLite syntax error. This is a localized mistake in the query structure (missing subquery and parentheses) rather than a logical misjoin or overall structural flaw.",
            "error_type": "Local: syntax error"
        },
        "ceb9d85c5fba69ed153192b8": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly prescribed drugs for patients that received percutaneous abdominal drainage within the same hospital visit?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous abdominal drainage' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH drainage_patients AS ( SELECT DISTINCT p.subject_id, p.hadm_id FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title LIKE '%percutaneous abdominal drainage%' ) SELECT drug, COUNT(DISTINCT dp.subject_id) as count FROM prescriptions JOIN drainage_patients dp ON prescriptions.subject_id = dp.subject_id AND prescriptions.hadm_id = dp.hadm_id GROUP BY drug ORDER BY count DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride (mini bag plus)'], ['5% dextrose'], ['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['5% dextrose', '4.0'], ['furosemide', '4.0'], ['insulin', '4.0'], ['sodium chloride 0.9%  flush', '5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls all prescriptions for patients who had the drainage procedure in the same hospital visit but does not enforce that the prescriptions occurred after the drainage (i.e., it omits the temporal condition T1.charttime < T2.starttime). This omission changes the overall query logic and results, so it is not a simple local fix but a fundamental logical error.",
            "error_type": "Global: missing temporal condition"
        },
        "fe11b6943e0d6122dbe19246": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the patient count for nicardipine iv prescription since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'nicardipine iv' AND strftime('%Y',prescriptions.starttime) >= '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) AS patient_count FROM prescriptions WHERE drug = 'nicardipine' AND route = 'iv' AND starttime >= '2100-01-01 00:00:00';",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only looks at the prescriptions table, splits the drug and route into two predicates (drug = 'nicardipine' AND route = 'iv') instead of using the combined drug value 'nicardipine iv', and uses starttime >= '2100-01-01' rather than filtering by year via strftime. These mistakes are all confined to the WHERE clause predicates (incorrect literal and column usage) and do not change the overall query structure or table relationships. Thus, it is a local error in the WHERE clause value parsing.",
            "error_type": "Local: value parsing error"
        },
        "75e305729a5e29fab9e6d3b4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When did patient 10029484 receive a stool microbiology test for the last time last month?",
            "gold_sql": "SELECT microbiologyevents.charttime FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029484 ) AND microbiologyevents.spec_type_desc = 'stool' AND datetime(microbiologyevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT m.charttime AS last_stool_test_time FROM microbiologyevents m INNER JOIN admissions a ON m.hadm_id = a.hadm_id INNER JOIN patients p ON a.subject_id = p.subject_id WHERE p.subject_id = 10029484 AND m.test_name LIKE '%stool microbiology%' AND m.spec_type_desc = 'stool' AND datetime(m.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') ORDER BY m.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-11-24 18:43:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds an extra filter on m.test_name (m.test_name LIKE '%stool microbiology%') that is not present in the ground truth. This erroneous condition in the WHERE clause overly restricts results and leads to no output. The overall query structure (joins, date filter, ordering, limit) remains correct, so the mistake is confined to a specific part of the WHERE clause.",
            "error_type": "Local: incorrect filter condition"
        },
        "6f2bbd3e1afa5cfe430892e3": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "This year, what are the top three most frequently ordered laboratory tests for patients within the same hospital visit after diagnosed with scoliosis [and kyphoscoliosis], idiopathic?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'scoliosis [and kyphoscoliosis], idiopathic' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH scoliosis_patients AS ( SELECT DISTINCT d_icd.subject_id FROM diagnoses_icd JOIN d_icd_diagnoses d_icd ON diagnoses_icd.icd_code = d_icd.icd_code WHERE d_icd.long_title LIKE '%scoliosis [and kyphoscoliosis], idiopathic%' AND STRFTIME('%Y', diagnoses_icd.charttime) = '2100' ) SELECT T.label, COUNT(*) as order_count FROM labevents JOIN d_labitems T ON labevents.itemid = T.itemid WHERE labevents.subject_id IN (SELECT subject_id FROM scoliosis_patients) AND STRFTIME('%Y', labevents.charttime) = '2100' GROUP BY T.label ORDER BY order_count DESC LIMIT 3;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['calcium, total'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['phosphate'], ['platelet count'], ['pt'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "no such column: d_icd.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL's CTE aliases d_icd to the d_icd_diagnoses table, then attempts to select d_icd.subject_id. However, the d_icd_diagnoses table does not have a subject_id column (subject_id exists in diagnoses_icd). This is a simple column reference mistake and can be fixed by selecting diagnoses_icd.subject_id instead of d_icd.subject_id. It does not affect the overall query logic or require restructuring of joins or clauses.",
            "error_type": "Local: table/column reference error"
        },
        "632b38e37273b744559d30cf": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "This year, what are the top five most frequent drugs prescribed to patients within the same month after the prescription of emtricitabine-tenofovir (truvada)?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'emtricitabine-tenofovir (truvada)' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH truvada_prescriptions AS ( SELECT subject_id, strftime('%Y-%m', starttime) AS prescription_month FROM prescriptions WHERE drug = 'emtricitabine-tenofovir (truvada)' AND strftime('%Y', starttime) = '2100' ) SELECT p.drug, COUNT(*) AS frequency FROM prescriptions p JOIN truvada_prescriptions tp ON p.subject_id = tp.subject_id AND strftime('%Y-%m', p.starttime) = tp.prescription_month WHERE p.drug != 'emtricitabine-tenofovir (truvada)' GROUP BY p.drug ORDER BY frequency DESC LIMIT 5;",
            "gold_answer": "[['acetaminophen'], ['albumin 25% (12.5g / 50ml)'], ['albuterol inhaler'], ['aluminum-magnesium hydrox.-simethicone'], ['amitriptyline'], ['bisacodyl'], ['calcium carbonate'], ['darunavir'], ['docusate sodium (liquid)'], ['emtricitabine-tenofovir (truvada)'], ['fluticasone propionate 110mcg'], ['furosemide'], ['heparin'], ['hydroxyzine'], ['ibuprofen'], ['insulin (regular) for hyperkalemia'], ['ipratropium bromide neb'], ['lactulose'], ['milk of magnesia'], ['nicotine polacrilex'], ['raltegravir'], ['rifaximin'], ['sertraline'], ['sodium chloride 0.9%  flush'], ['sodium polystyrene sulfonate'], ['spironolactone'], ['tiotropium bromide'], ['tramadol (ultram)'], ['trazodone']]",
            "pred_answer": "[['bisacodyl', '8.0'], ['furosemide', '6.0'], ['heparin', '6.0'], ['raltegravir', '6.0'], ['sodium chloride 0.9%  flush', '6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to enforce the \u201cafter\u201d requirement: it matches any prescriptions in the same month without ensuring p.starttime > truvada prescription starttime. This is a localized mistake in the WHERE/join condition and can be fixed by adding that time comparison, without restructuring the overall query logic.",
            "error_type": "Local: missing time filter"
        },
        "ed7541fc091169b8d4b3e386": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was patient 10021312's metronidazole input the last time on the last icu visit?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'metronidazole' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT inputevents.charttime AS last_metronidazole_input FROM inputevents INNER JOIN icustays ON inputevents.subject_id = icustays.subject_id AND inputevents.hadm_id = icustays.hadm_id WHERE inputevents.subject_id = 10021312 AND inputevents.drug = 'metronidazole' AND icustays.outtime IS NOT NULL ORDER BY inputevents.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-09-18 07:35:00']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `inputevents.charttime` (the correct column is `inputevents.starttime`). This is a localized column reference error and does not involve a fundamental flaw in the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "29a295d23e2237b2efe347a6": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients since 2100 underwent a stool test?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'stool' AND strftime('%Y',microbiologyevents.charttime) >= '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT le.subject_id) AS num_patients FROM labevents le JOIN d_labitems dli ON le.itemid = dli.itemid WHERE dli.label LIKE '%stool%' AND le.charttime >= '2100-01-01';",
            "gold_answer": "[['16.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the labevents and d_labitems tables to look for a 'stool' label in lab tests, whereas the ground truth correctly queries the microbiologyevents table filtering on spec_type_desc = 'stool'. This is not a minor syntax or column naming mistake; it fundamentally alters the query\u2019s logic by targeting the wrong table and event type. Fixing this would require restructuring the query to use microbiologyevents instead of labevents, which is beyond a localized change.",
            "error_type": "Global: incorrect table usage"
        },
        "ed2471d0dc2d2a5bbd67ea07": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most frequently given microbiology tests for the patients who had previously received introduction of other thrombolytic into peripheral vein, percutaneous approach within the same month, this year?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of other thrombolytic into peripheral vein, percutaneous approach' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH ranked_tests AS ( SELECT test_name, COUNT(*) as test_count, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) as rank FROM microbiologyevents WHERE EXISTS ( SELECT 1 FROM procedures_icd p_icd JOIN d_icd_procedures d_icd_p ON p_icd.icd_code = d_icd_p.icd_code WHERE d_icd_p.long_title = 'int;",
            "gold_answer": "[['urine culture']]",
            "pred_answer": "unrecognized token: \"'int;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL is truncated and contains an unrecognized token \"'int;\" which is a syntax error in that specific part of the query. This does not reflect a misunderstanding of table relationships or the overall intended logic, but rather a localized parsing issue.",
            "error_type": "Local: syntax error"
        },
        "649c3bfef97979fa17d56b18": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the last time the sao2 of patient 10014078 registered less than 94.0 since 269 days ago?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10014078 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum < 94.0 AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-269 day') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT T1.charttime FROM chartevents T1 JOIN admissions T2 ON T1.hadm_id = T2.hadm_id JOIN patients T3 ON T2.subject_id = T3.subject_id WHERE T3.subject_id = 10014078 AND T1.label = 'o2 saturation pulseoxymetry' AND T1.valueuom = '%' AND T1.valuenum < 94.0 AND T1.charttime >= datetime('2100-12-31 23:59:00', '-269 day') ORDER BY T1.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-09-09 04:02:00']]",
            "pred_answer": "no such column: T1.label",
            "is_correct": false,
            "reasoning": "The predicted SQL attempts to reference a column T1.label, but the chartevents table does not have a column named label. This is a column reference error in the WHERE clause and can be fixed by joining the d_items table to filter by label. The overall query structure and logic remain unchanged.",
            "error_type": "Local: column reference error"
        },
        "b5bc2ad81764fb6d8d587b9a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the minimum total cost for the hospital when a immunoglobulin m lab test is involved this year?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'immunoglobulin m' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS min_total_cost FROM ( SELECT hadm_id, SUM(cost) AS total_cost FROM cost JOIN labevents ON cost.event_id = labevents.row_id JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE d_labitems.label LIKE '%immunoglobulin m%' AND STRFTIME('%Y', labevents.charttime) = '2100' GROUP BY hadm_id ) ;",
            "gold_answer": "[['8671.46']]",
            "pred_answer": "ambiguous column name: hadm_id",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to an ambiguous column reference on hadm_id: both cost and labevents tables have a hadm_id, and the unqualified hadm_id in the SELECT and GROUP BY is ambiguous. This is a localized naming error that can be fixed by prefixing the column with its table alias (e.g., cost.hadm_id) without changing the overall query structure.",
            "error_type": "Local: ambiguous-column reference error"
        },
        "38b6a4148e29acfdd741bed1": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the name of the output which patient 10027602 had for the first time since 03/08/2100?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) >= '2100-03-08' ORDER BY outputevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT d_items.label FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10027602;",
            "gold_answer": "[['foley']]",
            "pred_answer": "[['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the date filter (since 03/08/2100), the logic to restrict to the patient's ICU stays via icustays and admissions, and the ordering/limit to pick the first event. Instead, it simply joins outputevents by subject_id, returning all distinct outputs for the patient. Fixing this requires restructuring the entire query\u2014adding the date condition, joining through admissions and icustays, and applying ORDER BY and LIMIT\u2014so this is not a local fix but a global rewrite of the query logic.",
            "error_type": "Global: incorrect query logic"
        },
        "e9ed36089be4408a078a0d60": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many times in 2100 did patient 10022281 receive a rdw test?",
            "gold_sql": "SELECT COUNT(*) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'rdw' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022281 ) AND strftime('%Y',labevents.charttime) = '2100'",
            "pred_sql": "SELECT COUNT(*) FROM labevents JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE labevents.subject_id = 10022281 AND d_labitems.label = 'red cell distribution width (rdw)' AND strftime('%Y', labevents.charttime) = '2100';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails only in the WHERE clause literal for the lab test label: it uses 'red cell distribution width (rdw)' instead of the correct 'rdw'. All other joins and filters (including filtering by patient and year) follow the intended structure. This mistake is localized to a single condition and does not require changing the overall query logic or structure.",
            "error_type": "Local: value parsing error"
        },
        "e7780f295aaf7bacd3b27961": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Please tell me the total number of patients who had a famotidine (pepcid) input since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT icustays.hadm_id FROM icustays WHERE icustays.stay_id IN ( SELECT inputevents.stay_id FROM inputevents WHERE inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'famotidine (pepcid)' AND d_items.linksto = 'inputevents' ) AND strftime('%Y',inputevents.starttime) >= '2100' ) )",
            "pred_sql": "SELECT COUNT(DISTINCT ie.subject_id) AS total_patients FROM inputevents ie JOIN d_items di ON ie.itemid = di.itemid WHERE di.label IN ('famotidine', 'pepcid') AND ie.starttime >= '2100-01-01 00:00:00';",
            "gold_answer": "[['17.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s error lies in the WHERE clause: it filters on di.label IN ('famotidine', 'pepcid') rather than matching the full label 'famotidine (pepcid)' and does not include the linksto filter. This is a localized mistake in the value condition and does not require restructuring the joins or overall logic of the query.",
            "error_type": "Local: incorrect filter value"
        },
        "f21ac0d3fff988fd6278434b": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients received excision of right saphenous vein, percutaneous endoscopic approach treatment within the same hospital visit after being diagnosed with essential (primary) hypertension this year?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'essential (primary) hypertension' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS num_patients FROM patients p JOIN admissions a ON p.subject_id = a.subject_id JOIN diagnoses_icd d ON a.hadm_id = d.hadm_id JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code JOIN procedures_icd proc ON a.hadm_id = proc.hadm_id JOIN d_icd_procedures dproc ON proc.icd_code = dproc.icd_code WHERE dd.long_title = 'Essential (primary) hypertension' AND dproc.long_title = 'Excision of right saphenous vein, percutaneous endoscopic approach' AND STRFTIME('%Y', a.admitt;",
            "gold_answer": "[['2.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically incomplete, ending abruptly with \"AND STRFTIME('%Y', a.admitt;\" and causing a parse error. This is a localized syntax mistake that can be fixed without changing the overall query logic or joins.",
            "error_type": "Local: syntax error"
        }
    },
    "macsql": {
        "9178ec6bc500e893fbbc867a": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Retrieve the top three most common input events this year.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT inputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM inputevents WHERE datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY inputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT `itemid` FROM ( SELECT `itemid`, COUNT(*) AS `event_count` FROM inputevents WHERE strftime('%Y', `starttime`) = '2100' GROUP BY `itemid` ORDER BY `event_count` DESC LIMIT 3 )",
            "gold_answer": "[['dextrose 5%'], ['nacl 0.9%'], ['solution']]",
            "pred_answer": "[['220949.0'], ['225158.0'], ['225943.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters and ranks inputevents by count for the year but only selects the itemid codes rather than the required item labels. It also fails to join d_items to map itemid to its label. This is a localized mistake in the SELECT clause (and missing join), not a fundamental change to the overall query structure or logic.",
            "error_type": "Local: missing join/incorrect column selection"
        },
        "03873cbe3210703b0f9698fa": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What does a pt lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'labevents' AND cost.event_id IN ( SELECT labevents.row_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'pt' ) )",
            "pred_sql": "SELECT AVG(`cost`) AS typical_lab_test_cost FROM cost WHERE `event_type` = 'labevents'",
            "gold_answer": "[['8.95']]",
            "pred_answer": "[['9.651']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to filter costs for the specific 'pt' lab test and instead averages costs over all lab events. The mistake is isolated to the WHERE clause (missing the condition linking to d_labitems.label = 'pt' and the corresponding event_id filter) and does not require restructuring of the query\u2019s joins or overall logic.",
            "error_type": "Local: missing filter in WHERE clause"
        },
        "8cf537f8c0490a2c2e20f1e4": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Among patients who received percutaneous abdominal drainage , what are the top three most frequent medications that followed within 2 months since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous abdominal drainage' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT T3.drug, COUNT(*) AS freq FROM prescriptions AS T3 JOIN ( SELECT DISTINCT T1.subject_id, T1.hadm_id FROM procedures_icd AS T1 JOIN d_icd_procedures AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title LIKE '%percutaneous abdominal drainage%' ) AS T4 ON T3.subject_id = T4.subject_id AND T3.hadm_id = T4.hadm_id WHERE datetime(T3.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') AND datetime(T3.starttime) < datetime('2100-12-31 23:59:00', '-1 year', '+2 months') GROUP BY T3.drug ORDER BY freq DESC LIMIT 3 )",
            "gold_answer": "[['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic by applying a fixed two-month window from exactly one year ago across all patients, rather than finding medications that occurred within two months after each patient\u2019s individual procedure date. It also drops the critical condition tying prescriptions\u2019 start times to the procedure charttime. Fixing this requires restructuring the query (using a correlated join or subquery) to compare each procedure\u2019s charttime to prescription times, not simply filtering on a static date range. This is not a simple typo or local syntax fix but a global change in query structure and logic.",
            "error_type": "Global: incorrect time filter and join logic"
        },
        "f3e413b353d189c95c0a9693": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me the top three most common input events.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT inputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM inputevents GROUP BY inputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "WITH RankedEvents AS ( SELECT `itemid`, COUNT(*) AS `event_count`, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS `rank` FROM `inputevents` GROUP BY `itemid` ) SELECT `itemid` FROM RankedEvents WHERE `rank` <= 3",
            "gold_answer": "[['dextrose 5%'], ['nacl 0.9%'], ['solution']]",
            "pred_answer": "[['220949.0'], ['225158.0'], ['225943.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly ranks the top three inputevents by count but only selects the itemid values from the inputevents table and never joins to d_items to retrieve the human-readable labels. The missing JOIN to d_items and the incorrect SELECT clause are confined to a specific part of the query and can be fixed by adding the join and selecting d_items.label. This does not require restructuring the overall logic of the ranking query.",
            "error_type": "Local: table reference error"
        },
        "aa55ddc8a4466fa3f1890200": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of medicines that patient 10011398 was prescribed this month.",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.hadm_id IN( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month')",
            "pred_sql": "SELECT COUNT(DISTINCT `drug`) AS num_medicines FROM prescriptions WHERE `subject_id` = 10011398 AND strftime('%Y-%m', `starttime`) = '2100-12'",
            "gold_answer": "[['53.0']]",
            "pred_answer": "[['28.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in its aggregation and minor filtering details within the SELECT/WHERE clauses: it uses COUNT(DISTINCT drug) instead of COUNT(*) and filters on subject_id directly rather than via hadm_id subquery. These are local mistakes in how the count and filter are applied\u2014not a fundamentally different join strategy or overall query structure. Hence this is a localized aggregation/filter error.",
            "error_type": "Local: aggregation error"
        },
        "dcb2ec58e559aa7ae7427d09": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What is patient 10004422's difference in weight from the last value measured on the last hospital visit compared to the second to last value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH LastTwoWeights AS ( SELECT valuenum FROM chartevents WHERE subject_id = 10004422 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10004422 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = ( SELECT itemid FROM d_items WHERE label = 'daily weight' ) ORDER BY charttime DESC LIMIT 2 ) SELECT MAX(valuenum) - MIN(valuenum) AS weight_difference FROM LastTwoWeights",
            "gold_answer": "[['-2.3']]",
            "pred_answer": "[['2.3']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s mistake is confined to how it computes the difference between the two retrieved weight values \u2013 it uses MAX(valuenum)\u2013MIN(valuenum) rather than subtracting the most recent measurement minus the second-most-recent measurement. This aggregation-localizes the error to the SELECT logic (it also loses the sign of the change). The overall query structure (CTE, filters) remains intact, so this is a Local error.",
            "error_type": "Local: incorrect aggregation"
        },
        "6ec3d822a1210a2412249fd7": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What were the number of patients who received a creatine kinase (ck) test?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatine kinase (ck)' ) )",
            "pred_sql": "SELECT COUNT(DISTINCT `subject_id`) AS num_patients FROM labevents WHERE `itemid` IN ( SELECT DISTINCT `itemid` FROM d_labitems WHERE `label` LIKE '%creatine kinase%' OR `label` LIKE '%ck%' )",
            "gold_answer": "[['40.0']]",
            "pred_answer": "[['46.0']]",
            "is_correct": false,
            "reasoning": "The predicted query errs only in the WHERE clause\u2019s filter: it uses a broad LIKE '%creatine kinase%' OR '%ck%' match on d_labitems.label, which pulls in additional tests containing the substring \"ck\". This is a localized mistake in the filtering logic and does not alter the overall query structure or relationships between tables. Hence, it\u2019s a local error.",
            "error_type": "Local: incorrect filter condition"
        },
        "591138bd0043708115c23cd9": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What proportion of patients with a diagnosis of cirrhosis of liver without mention of alcohol survive over a 6-month period?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (strftime('%J',patients.dod) - strftime('%J',T1.charttime)) < 365 / 2 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'cirrhosis of liver without mention of alcohol' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 2 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id )",
            "pred_sql": "SELECT AVG(survived) AS survival_proportion FROM ( SELECT T1.subject_id, CASE WHEN T3.dod IS NULL OR T3.dod > DATE(T2.first_diagnosis_date, '+6 months') THEN 1 ELSE 0 END AS survived FROM ( SELECT T1.subject_id, MIN(T1.charttime) AS first_diagnosis_date FROM diagnoses_icd AS T1 JOIN d_icd_diagnoses AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title = 'cirrhosis of liver without mention of alcohol' GROUP BY T1.subject_id ) AS T2 LEFT JOIN patients AS T3 ON T2.subject_id = T3.subject_id ) AS survival_data",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL references an undefined alias T1.subject_id in the outer SELECT, but the subquery is aliased as T2. This is a simple column reference/alias mistake and does not alter the overall query logic or structure. It can be fixed by using the correct alias, so it is a localized error.",
            "error_type": "Local: table/column reference error"
        },
        "3759f2f609764584996dba4e": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me patient 10037861's monthly average wbc this year.",
            "gold_sql": "SELECT AVG(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10037861 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'wbc' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY strftime('%Y-%m',labevents.charttime)",
            "pred_sql": "SELECT strftime('%Y-%m', `charttime`) AS `month`, AVG(`valuenum`) AS `average_wbc` FROM `labevents` WHERE `subject_id` = 10037861 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'wbc' ) AND datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY strftime('%Y-%m', `charttime`) ORDER BY `month`",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on labevents.subject_id directly, rather than restricting labevents to this patient\u2019s hospital admissions via hadm_id as in the ground truth. This omission of the join through admissions fundamentally changes the data scope and yields no results. Fixing it requires altering the join logic between tables, not just tweaking a single clause, so it\u2019s a global error.",
            "error_type": "Global: join-key error"
        },
        "91406b50e1e11128c97d1761": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What diagnosis did patient 10004457 receive the last time since 1 year ago?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT diagnoses_icd.icd_code FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004457 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY diagnoses_icd.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT D.long_title FROM diagnoses_icd AS DI JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code WHERE DI.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10004457 AND datetime(admittime) >= datetime('2100-12-31 23:59:00', '-1 year') ORDER BY admittime DESC LIMIT 1 )",
            "gold_answer": "[['personal history of antineoplastic chemotherapy']]",
            "pred_answer": "[['asthma, unspecified type, unspecified'], ['coronary atherosclerosis of native coronary artery'], ['other and unspecified hyperlipidemia'], ['personal history of antineoplastic chemotherapy'], [\"personal history of hodgkin's disease\"], ['personal history of irradiation, presenting hazards to health'], ['personal history of malignant neoplasm of prostate'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by filtering on the admission date rather than the diagnosis time and by selecting the latest admission ID rather than the latest diagnosis event. This error spans multiple clauses (subquery filtering on admittime, join logic, lack of ordering by diagnosis charttime) and alters the intended meaning, resulting in multiple diagnoses rather than the single last diagnosis. Such an error requires restructuring the query logic beyond a single clause fix.",
            "error_type": "Global: flawed filter/join logic"
        },
        "db1bbc0ebe53801e7899373b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Calculate the number of patients who were given torsemide prescription within the same hospital visit following their hemodialysis this year.",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'hemodialysis' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'torsemide' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) AS num_patients FROM ( SELECT DISTINCT T1.subject_id, T1.hadm_id FROM procedures_icd AS T1 WHERE T1.icd_code = 'icd9|3995' AND datetime(T1.charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS Hemodialysis INNER JOIN ( SELECT DISTINCT T2.subject_id, T2.hadm_id FROM prescriptions AS T2 WHERE T2.drug LIKE '%torsemide%' AND datetime(T2.starttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS Torsemide ON Hemodialysis.subject_id = Torsemide.subject_id AND Hemodialysis.hadm_id = Torsemide.hadm_id",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The execution error \u201cno such column: T1.subject_id\u201d indicates that the outer query is referencing an alias T1 which is not defined at that scope. The subquery is aliased as Hemodialysis, so the outer SELECT should reference Hemodialysis.subject_id (or the subquery should be aliased T1). This is a localized column reference mistake and does not affect the overall query logic or structure.",
            "error_type": "Local: table/column reference error"
        },
        "d5c558ebb05afce593fd36f1": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of times that patient 10001725 received a laboratory test for specific gravity this year.",
            "gold_sql": "SELECT COUNT(*) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'specific gravity' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10001725 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT COUNT(*) FROM `labevents` WHERE `subject_id` = 10001725 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'specific gravity' ) AND datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict lab events by the patient\u2019s admission (hadm_id) and instead filters directly on labevents.subject_id. The ground truth joins through admissions to get the correct hadm_id(s) for subject 10001725. This mistake lies entirely in the WHERE clause (wrong table/column filter) and does not require restructuring joins or the overall logic beyond that clause. Therefore it is a Local error.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "6a1efc65db66092dc4f820c8": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What was the total amount of nicardipine iv that they prescribed to patient 10004733 on the current hospital visit?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'nicardipine iv'",
            "pred_sql": "SELECT SUM(CAST(`dose_val_rx` AS REAL)) AS total_nicardipine_iv FROM prescriptions WHERE `subject_id` = 10004733 AND `hadm_id` = ( SELECT `hadm_id` FROM admissions WHERE `subject_id` = 10004733 AND `dischtime` IS NULL ) AND `drug` = 'nicardipine' AND `route` = 'iv'",
            "gold_answer": "[['160.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause\u2019s drug filter: it splits the drug name into drug = 'nicardipine' AND route = 'iv', whereas the ground truth and schema assume the drug column stores 'nicardipine iv' as a single value. All other parts of the query structure (SUM aggregation, current visit subquery, hadm_id linkage) are correct. This is a localized mistake in the value filtering rather than a structural or multi-clause logic error.",
            "error_type": "Local: value parsing error"
        },
        "fa8a33756924ed200b38817c": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Can you tell me about patient 10019917's age on the last hospital visit?",
            "gold_sql": "SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10019917 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%Y', A.`admittime`) - strftime('%Y', P.`dob`)) - (strftime('%m-%d', A.`admittime`) < strftime('%m-%d', P.`dob`)) AS age_at_last_visit FROM admissions AS A JOIN patients AS P ON A.`subject_id` = P.`subject_id` WHERE A.`subject_id` = 10019917 AND A.`dischtime` IS NOT NULL ORDER BY A.`dischtime` DESC LIMIT 1",
            "gold_answer": "[['44.0']]",
            "pred_answer": "[['43.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL computes age manually using date arithmetic on admittime and dob, violating the assumption that age must be retrieved directly from the admissions.age column. This is a mistake localized to the SELECT expression and does not affect the overall query structure or table relationships.",
            "error_type": "Local: other-temporal error"
        },
        "2170941b28cb5fced478fa35": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Give me the four most common output events this year.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT outputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM outputevents WHERE datetime(outputevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY outputevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "WITH RankedEvents AS ( SELECT `itemid`, COUNT(*) AS event_count FROM outputevents WHERE strftime('%Y', `charttime`) = '2100' GROUP BY `itemid` ORDER BY event_count DESC LIMIT 4 ) SELECT `itemid` FROM RankedEvents",
            "gold_answer": "[['cerebral ventricular #1'], ['chest tube #1'], ['foley'], ['void']]",
            "pred_answer": "[['226559.0'], ['226560.0'], ['226588.0'], ['226606.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies the top 4 item IDs by count for this year, but it never joins to the d_items table to retrieve the human-readable labels. As a result, it returns raw itemid values instead of event names. This is a localized mistake in the SELECT clause/join, and does not require restructuring the entire query logic.",
            "error_type": "Local: missing join to lookup table"
        },
        "4a482741fa67f8148276f6d1": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Is the respiratory rate level of patient 10039997 last measured on the last icu visit greater than the second to last measurement on the last icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) > ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH LastTwoMeasurements AS ( SELECT `valuenum` FROM chartevents WHERE `subject_id` = 10039997 AND `stay_id` = ( SELECT `stay_id` FROM icustays WHERE `subject_id` = 10039997 ORDER BY `intime` DESC LIMIT 1 ) AND `itemid` = ( SELECT `itemid` FROM d_items WHERE `label` = 'respiratory rate' ) ORDER BY `charttime` DESC LIMIT 2 ) SELECT CASE WHEN COUNT(*) < 2 THEN 'Insufficient data' WHEN MAX(`valuenum`) = MIN(`valuenum`) THEN 'False' ELSE 'True' END AS result FROM LastTwoMeasurements",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['True']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic in multiple ways. First, it picks the most recent icustay for the patient simply by ORDER BY intime without requiring that it be a completed (outtime IS NOT NULL) ICU visit or even linking through admissions.hadm_id. Second, it ignores the d_items.linksto filter when finding the respiratory rate itemid. Third\u2014and most critically\u2014it aggregates the two measurements with MAX and MIN, which can tell only if the two values differ, not whether the latest measurement is greater than the second\u2010latest. Fixing these issues requires rewriting major parts of the query (subquery criteria and comparison logic), not just tweaking a single clause. Therefore this is a global logic/structure error.",
            "error_type": "Global: flawed logical comparison"
        },
        "f85c9633e450cf4188a98ce3": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Get me the top three most common diagnoses.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT `icd_code` FROM ( SELECT `icd_code`, DENSE_RANK() OVER (ORDER BY `count` DESC) AS `rank` FROM ( SELECT `icd_code`, COUNT(*) AS `count` FROM `diagnoses_icd` GROUP BY `icd_code` ) AS `subquery` ) AS `ranked_diagnoses` WHERE `rank` <= 3",
            "gold_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "[['icd9|25000'], ['icd9|2724'], ['icd9|4019']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly ranks the top three ICD codes by frequency but then returns the raw `icd_code` values rather than the corresponding diagnosis names (`long_title`). It never joins to `d_icd_diagnoses` to map codes to descriptions. This is a localized mistake in the SELECT clause/table reference and doesn\u2019t require restructuring the query logic or joins beyond adding the missing join and selecting the correct column.",
            "error_type": "Local: table/column reference error"
        },
        "85fb28226e6564139064b07b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What are the five most frequently ordered medications that male patients aged 60 or above are prescribed within the same month after they are diagnosed with chronic kidney disease, stage iii (moderate)?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage iii (moderate)' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'm' ) AND admissions.age >= 60 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT T4.`drug` FROM `prescriptions` AS T4 JOIN ( SELECT DISTINCT T1.`subject_id`, T1.`hadm_id`, T1.`charttime` FROM `diagnoses_icd` AS T1 JOIN `patients` AS T2 ON T1.`subject_id` = T2.`subject_id` JOIN `admissions` AS T3 ON T1.`hadm_id` = T3.`hadm_id` WHERE T1.`icd_code` = (SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` = 'chronic kidney disease, stage iii (moderate)') AND T2.`gender` = 'm' AND (strftime('%Y', T1.`charttime`) - strftime('%Y', T2.`dob`)) - (strftime('%m-%d', T1.`charttime`) < strftime('%m-%d', T2.`dob`)) >= 60 ) AS T5 ON T4.`subject_id` = T5.`subject_id` AND T4.`hadm_id` = T5.`hadm_id` WHERE datetime(T4.`starttime`, 'start of month') = datetime(T5.`charttime`, 'start of month') GROUP BY T4.`drug` ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['albuterol inhaler'], ['allopurinol'], ['aspirin'], ['furosemide'], ['insulin'], ['ipratropium-albuterol neb'], ['isosorbide dinitrate'], ['losartan potassium'], ['multivitamin iv'], ['pantoprazole'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['allopurinol'], ['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins prescriptions back to the diagnosis using both subject_id AND hadm_id, restricting medications to the same hospital admission rather than any prescription within the same month post\u2010diagnosis. It also fails to enforce the prescription time being after the diagnosis time and even computes age manually instead of using the stored age column. These mistakes alter the fundamental logic of which prescriptions get counted and require restructuring the JOIN/WHERE logic, so this is a global logic error.",
            "error_type": "Global: join relationship error"
        },
        "9816a74cbdf3f4145eb7ec03": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Tell me the top four most frequent procedures that patients received within the same hospital visit after being diagnosed with spontaneous bacterial peritonitis since 1 year ago.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'spontaneous bacterial peritonitis' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT `icd_code` FROM ( SELECT `icd_code`, COUNT(*) AS `frequency` FROM `procedures_icd` WHERE `hadm_id` IN ( SELECT DISTINCT `hadm_id` FROM `diagnoses_icd` WHERE `icd_code` = (SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` LIKE '%spontaneous bacterial peritonitis%') AND datetime(`charttime`) >= datetime('2100-12-31 23:59:00', '-1 year') ) GROUP BY `icd_code` ORDER BY `frequency` DESC LIMIT 4 )",
            "gold_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['control of epistaxis by anterior nasal packing'], ['insertion of endotracheal tube'], ['laryngoscopy and other tracheoscopy'], ['other endoscopy of small intestine'], ['percutaneous abdominal drainage'], ['rhinoscopy'], ['venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['icd9|4513'], ['icd9|5491'], ['icd9|9604'], ['icd9|9671']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic. It selects procedures for any admission that had a spontaneous bacterial peritonitis diagnosis in the last year, but it does not enforce that each procedure occurred after the diagnosis time within the same visit. It also fails to join on patient and admission to compare charttimes and does not retrieve the procedure names. These errors span multiple clauses (WHERE, subquery logic, missing temporal join) and alter the intended meaning of the query, requiring a restructuring of the overall query logic.",
            "error_type": "Global: incorrect temporal join and filtering logic"
        },
        "ffcc6f49ecc0514e980b88a6": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Among patients in their 20s , what are the top four prescribed drugs since 1 year ago?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT drug FROM ( SELECT PR.drug, COUNT(*) AS count, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS rank FROM prescriptions PR JOIN ( SELECT DISTINCT A.subject_id FROM admissions A JOIN patients P ON A.subject_id = P.subject_id WHERE A.age BETWEEN 20 AND 29 ) AS SubQuery ON PR.subject_id = SubQuery.subject_id WHERE datetime(PR.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY PR.drug ) AS RankedDrugs WHERE rank <= 4",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['metoprolol tartrate'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters patients by subject_id instead of hadm_id, thereby aggregating prescriptions across all admissions for those patients rather than only those admissions where age is between 20 and 29. This incorrect join on subject_id versus hadm_id fundamentally changes the query logic and results, requiring a structural correction beyond a local fix.",
            "error_type": "Global: incorrect join relationship"
        },
        "55a31e2e8ac1f0d6f0026827": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Since 1 year ago, how many patients received 0.9% sodium chloride within 2 months after being diagnosed with candidal stomatitis?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'candidal stomatitis' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = '0.9% sodium chloride' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) AS num_patients FROM ( SELECT DISTINCT T1.subject_id, T1.hadm_id, T1.charttime AS diagnosis_time FROM diagnoses_icd AS T1 JOIN d_icd_diagnoses AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title = 'candidal stomatitis' AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') ) AS D JOIN prescriptions AS P ON D.subject_id = P.subject_id AND D.hadm_id = P.hadm_id WHERE P.drug = '0.9% sodium chloride' AND datetime(P.starttime) <= datetime(D.diagnosis_time, '+2 months') AND datetime(P.starttime) >= datetime(D.diagnosis_time)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL references T1.subject_id in the outer SELECT, but the subquery is aliased as D, not T1. This is a simple aliasing/column reference mistake\u2014T1 is undefined in that scope. It can be fixed locally by changing T1.subject_id to D.subject_id without altering the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "4de248a91a8c0a75290ba3d8": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many days have elapsed since the first time patient 10039831 got a sodium lab test on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', labevents.charttime) ) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT JULIANDAY('2100-12-31') - JULIANDAY( (SELECT MIN(`charttime`) FROM labevents WHERE `subject_id` = 10039831 AND `hadm_id` = ( SELECT `hadm_id` FROM admissions WHERE `subject_id` = 10039831 AND `dischtime` IS NULL ) AND `itemid` = ( SELECT `itemid` FROM d_labitems WHERE `label` = 'sodium' )) ) AS days_elapsed",
            "gold_answer": "[['0.319']]",
            "pred_answer": "[['-0.681']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the SELECT clause\u2019s date handling: it uses JULIANDAY('2100-12-31') instead of JULIANDAY('2100-12-31 23:59:00') (and uses JULIANDAY rather than strftime('%J')) which leads to the wrong offset and a negative result. The rest of the query structure\u2014filtering by hadm_id, subject_id, and sodium itemid\u2014is correct. Fixing the literal format in the SELECT date expression corrects the result without altering joins, subqueries, or overall logic.",
            "error_type": "Local: value parsing error"
        },
        "1e56241efb64541676d9aab0": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What was the name of the organism found in patient 10016742's first swab microbiology test since 04/2100?",
            "gold_sql": "SELECT microbiologyevents.org_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10016742 ) AND microbiologyevents.spec_type_desc = 'swab' AND microbiologyevents.org_name IS NOT NULL AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-04' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT T1.`org_name` FROM microbiologyevents AS T1 WHERE T1.`subject_id` = 10016742 AND datetime(T1.`charttime`) >= datetime('2100-04-01 00:00:00') ORDER BY T1.`charttime` ASC LIMIT 1",
            "gold_answer": "[['yeast']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict results to swab tests (missing the condition spec_type_desc = 'swab') and does not ensure org_name is not null. These are omissions in the WHERE clause and can be fixed by adding the appropriate filters without restructuring the query logic or joins.",
            "error_type": "Local: missing WHERE filter"
        },
        "8c31e6bff8fb0a1e0a92eaae": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of patients who were diagnosed with diaphragmatic hernia without mention of obstruction or gangrene but didn't come back to the hospital within 2 months since 1 year ago.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'diaphragmatic hernia without mention of obstruction or gangrene' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'diaphragmatic hernia without mention of obstruction or gangrene' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime) >= datetime('2100-12-31 23:59:00','-1 year') AND datetime(admissions.admittime) BETWEEN datetime(T2.charttime) AND datetime(T2.charttime,'+2 month') )",
            "pred_sql": "SELECT COUNT(DISTINCT `subject_id`) AS `patient_count` FROM `admissions` WHERE `subject_id` IN ( SELECT DISTINCT `subject_id` FROM `diagnoses_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` = 'diaphragmatic hernia without mention of obstruction or gangrene' ) ) AND `admittime` < datetime('2100-12-31 23:59:00', '-1 year') AND `subject_id` NOT IN ( SELECT DISTINCT `subject_id` FROM `admissions` WHERE `admittime` >= datetime('2100-12-31 23:59:00', '-1 year') AND `admittime` < datetime('2100-12-31 23:59:00', '-1 year', '+2 months') )",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query applies the date filters to the admissions table rather than to the diagnosis timestamp, and it omits any link between the diagnosis date and a subsequent readmission window. It filters for admissions before one year ago instead of diagnoses since one year ago, and then checks for readmissions within two months of that wrong timestamp. This misplacement of time conditions and misassociation of tables fundamentally changes the query\u2019s logic and results, requiring a full rewrite of the date filtering and subquery structure rather than a simple fix in a single clause.",
            "error_type": "Global: incorrect overall filtering and join logic"
        },
        "18168ef8762ec6a8dec80897": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What number of patients stayed in medical intensive care unit (micu) since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT transfers.hadm_id FROM transfers WHERE transfers.careunit = 'medical intensive care unit (micu)' AND datetime(transfers.intime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT `subject_id`) AS num_patients FROM icustays WHERE `first_careunit` = 'medical intensive care unit (micu)' AND datetime(`intime`) >= datetime('2100-12-31 23:59:00', '-1 year')",
            "gold_answer": "[['20.0']]",
            "pred_answer": "[['18.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the question by using the icustays table and its first_careunit field instead of the transfers table to identify all MICU stays. The ground truth correctly captures any transfer into the MICU within the past year, whereas the prediction only counts stays where the patient\u2019s first ICU unit was MICU and occurred in the last year. This is not a simple column name or syntax mistake but an overall structural error in table choice and join logic, altering the intended meaning of the query. Therefore, this is a global error.",
            "error_type": "Global: incorrect join/structure error"
        },
        "c91416da12da067358414797": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many patients were given gt flush in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT icustays.hadm_id FROM icustays WHERE icustays.stay_id IN ( SELECT inputevents.stay_id FROM inputevents WHERE inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'gt flush' AND d_items.linksto = 'inputevents' ) AND strftime('%Y',inputevents.starttime) = '2100' ) )",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT `subject_id` FROM `prescriptions` WHERE `drug` = 'gt flush' AND strftime('%Y', `starttime`) = '2100' ) AS unique_patients",
            "gold_answer": "[['16.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to find 'gt flush', whereas the correct query must use inputevents joined through d_items (inputevents.itemid filtered by d_items.label = 'gt flush'). This is not a simple column or syntax mistake but a fundamental misplacement of the data source, altering the entire logic and relationships of the query.",
            "error_type": "Global: wrong table for event"
        },
        "34dca7a12c49d40177dee03e": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Which care unit did patient 10038999 last enter since 1 year ago, based on the transfer record?",
            "gold_sql": "SELECT transfers.careunit FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038999 ) AND transfers.careunit IS NOT NULL AND datetime(transfers.intime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT `careunit` FROM transfers WHERE `subject_id` = 10038999 AND datetime(`intime`) >= datetime('2100-12-31 23:59:00', '-1 year') ORDER BY `intime` DESC LIMIT 1",
            "gold_answer": "[['med/surg']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the condition AND transfers.careunit IS NOT NULL, so it returns a NULL careunit rather than filtering it out and returning the last non-null careunit. This is a small, localized mistake in the WHERE clause and does not alter the overall query structure or logic of joining via hadm_id and ordering by intime.",
            "error_type": "Local: missing IS NOT NULL filter"
        },
        "7ecca1580c68db5def75b1c9": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many patients received a introduction of other therapeutic substance into peripheral artery, percutaneous approach procedure within the same month after having a replacement of aortic valve with zooplastic tissue, percutaneous approach procedure since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of other therapeutic substance into peripheral artery, percutaneous approach' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`subject_id`) AS `patient_count` FROM ( SELECT `hadm_id`, `subject_id`, datetime(`charttime`, 'start of month') AS `procedure_month` FROM `procedures_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_procedures` WHERE `long_title` = 'Replacement of aortic valve with zooplastic tissue, percutaneous approach' ) AND datetime(`charttime`) >= datetime('2100-01-01 00:00:00') ) AS T1 JOIN ( SELECT `hadm_id`, `subject_id`, datetime(`charttime`, 'start of month') AS `procedure_month` FROM `procedures_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_procedures` WHERE `long_title` = 'Introduction of other therapeutic substance into peripheral artery, percutaneous approach' ) ) AS T2 ON T1.`subject_id` = T2.`subject_id` AND T1.`procedure_month` = T2.`procedure_month`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to apply the year filter on the second subquery ('introduction' procedure) and omits the condition ensuring the introduction occurs after the replacement within the same month (T1.charttime < T2.charttime). These mistakes alter the fundamental logic of the query rather than being localized syntax or naming issues.",
            "error_type": "Global: join-logic error"
        },
        "4a2a1c26c8244fba33d6203f": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What is the daily minimum level of patient 10003400's hematocrit?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003400 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'hematocrit' ) GROUP BY strftime('%Y-%m-%d',labevents.charttime)",
            "pred_sql": "SELECT DATE(`charttime`) AS `date`, MIN(`valuenum`) AS `min_hematocrit` FROM `labevents` WHERE `subject_id` = 10003400 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'hematocrit' ) GROUP BY DATE(`charttime`) ORDER BY `date`",
            "gold_answer": "[['31.3'], ['31.5'], ['34.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only filters labevents by subject_id and a single itemid, rather than using the hadm_id subquery to restrict events to that patient\u2019s admissions as in the ground truth. This mistake is confined to the WHERE clause (an incorrect column reference/condition) and doesn\u2019t change the overall query structure or logic beyond that clause. Hence it\u2019s a localized error.",
            "error_type": "Local: column reference error"
        },
        "c5bef388e557ec34b8d4bad5": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "When was the last time when patient 10021938 had the maximum value of sao2 on 11/02/this year?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021938 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '11-02' ORDER BY chartevents.valuenum DESC, chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(`charttime`) AS last_time_max_sao2 FROM `chartevents` WHERE `subject_id` = 10021938 AND `itemid` = ( SELECT `itemid` FROM `d_items` WHERE `label` = 'o2 saturation pulseoxymetry' ) AND `valuenum` = ( SELECT MAX(`valuenum`) FROM `chartevents` WHERE `subject_id` = 10021938 AND `itemid` = ( SELECT `itemid` FROM `d_items` WHERE `label` = 'o2 saturation pulseoxymetry' ) AND datetime(`charttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', `charttime`) = '02' ) AND datetime(`charttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', `charttime`) = '02'",
            "gold_answer": "[['2100-11-02 04:01:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the time filter logic by using the start of the month and checking for day '02' in the current month (December) instead of filtering for November 2nd of the current year. This is not a minor syntax mistake but a core logical error that alters the intended date window. Additionally, the ground truth limits to ICU stays via stay_id while the prediction uses subject_id directly, further changing the scope of events. These mistakes span multiple clauses and require restructuring the query logic to correct.",
            "error_type": "Global: incorrect date logic"
        },
        "eed40d16275abbb9b36c549b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me the top four most common microbiology tests this year.",
            "gold_sql": "SELECT T1.test_name FROM ( SELECT microbiologyevents.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM microbiologyevents WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY microbiologyevents.test_name ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT `spec_type_desc`, `test_name` FROM ( SELECT `spec_type_desc`, `test_name`, COUNT(*) AS `test_count`, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS `rank` FROM microbiologyevents WHERE datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY `spec_type_desc`, `test_name` ) AS ranked_tests WHERE `rank` <= 4",
            "gold_answer": "[['blood culture, routine'], ['gram stain'], ['respiratory culture'], ['urine culture']]",
            "pred_answer": "[['blood culture', 'blood culture, routine'], ['mrsa screen', 'mrsa screen'], ['sputum', 'respiratory culture'], ['urine', 'urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply adds spec_type_desc to both the SELECT and GROUP BY clauses, which changes the granularity of the aggregation and returns the wrong grouping. This is a mistake confined to the GROUP BY/SELECT clause and can be fixed by removing spec_type_desc\u2014no overall query restructuring is needed. Therefore, this is a Local error of the GROUP BY type.",
            "error_type": "Local: group-by error"
        },
        "6db0ca7ff25c436c79d07413": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What was the diagnosis received by patient 10038081 for the last time?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT diagnoses_icd.icd_code FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038081 ) ORDER BY diagnoses_icd.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT T2.`long_title` FROM diagnoses_icd AS T1 INNER JOIN d_icd_diagnoses AS T2 ON T1.`icd_code` = T2.`icd_code` WHERE T1.`hadm_id` = ( SELECT `hadm_id` FROM admissions WHERE `subject_id` = 10038081 ORDER BY `dischtime` DESC LIMIT 1 )",
            "gold_answer": "[['transfusion related acute lung injury (trali)']]",
            "pred_answer": "[['acquired hypertrophic pyloric stenosis'], ['acute kidney failure, unspecified'], ['acute respiratory failure'], ['altered mental status'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['cachexia'], ['chronic kidney disease, unspecified'], ['cirrhosis of liver without mention of alcohol'], ['diabetes mellitus without mention of complication, type ii or unspecified type, uncontrolled'], ['do not resuscitate status'], ['encounter for palliative care'], ['epistaxis'], ['esophageal varices without mention of bleeding'], ['glucocorticoid deficiency'], ['hyperpotassemia'], ['hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified'], ['hyposmolality and/or hyponatremia'], ['hypotension, unspecified'], ['jaundice, unspecified, not of newborn'], ['natural blood and blood products causing adverse effects in therapeutic use'], ['oliguria and anuria'], ['other and unspecified coagulation defects'], ['other and unspecified hyperlipidemia'], ['other ascites'], ['other disorders of muscle, ligament, and fascia'], ['other fluid overload'], ['other specified disorders of stomach and duodenum'], ['other specified pre-operative examination'], ['paralytic ileus'], ['paroxysmal ventricular tachycardia'], ['personal history of malignant neoplasm of tongue'], ['portal hypertension'], ['pulmonary congestion and hypostasis'], ['spontaneous bacterial peritonitis'], ['tachypnea'], ['transfusion related acute lung injury (trali)'], ['unspecified vascular insufficiency of intestine']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by selecting diagnoses based on the patient\u2019s last hospital admission (using discharge time) rather than the single most recent diagnosis record (using diagnoses_icd.charttime). It retrieves all diagnoses for that admission instead of the one latest diagnosis, thereby altering the overall intended result. This error spans multiple clauses and requires restructuring the core query logic, so it is a global issue.",
            "error_type": "Global: incorrect logic"
        },
        "a426a5f7267fbe34c05da68d": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What's the white blood cells change of patient 10008287 last measured on the last hospital visit compared to the first value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10008287 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'white blood cells' ) ORDER BY labevents.charttime DESC LIMIT 1 ) - ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10008287 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'white blood cells' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT (last_value - first_value) AS wbc_change FROM ( SELECT (SELECT valuenum FROM labevents WHERE subject_id = 10008287 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10008287 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = 51006 AND charttime = ( SELECT MIN(charttime) FROM labevents WHERE subject_id = 10008287 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10008287 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = 51006 ) ) AS first_value, (SELECT valuenum FROM labevents WHERE subject_id = 10008287 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10008287 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = 51006 AND charttime = ( SELECT MAX(charttime) FROM labevents WHERE subject_id = 10008287 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10008287 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = 51006 ) ) AS last_value ) AS wbc_values",
            "gold_answer": "[['-1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in how it identifies the \u201cwhite blood cells\u201d lab test\u2014it hard-codes itemid = 51006 instead of looking up d_labitems.label = 'white blood cells'. This causes the wrong measurements to be pulled and hence the wrong change value. All other structure (subqueries, date ordering, hadm_id filtering) matches the intended logic. The error is confined to the WHERE clause\u2019s itemid filter (a simple misreference), without impacting joins or overall query flow. Therefore it is a localized column/value reference error.",
            "error_type": "Local: incorrect itemid reference error"
        },
        "bd635bf7a1d3589befdf1632": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Tell me the top five diagnoses with the highest 6-month mortality rate.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T4.icd_code FROM ( SELECT T3.icd_code, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C3 FROM ( SELECT T2.icd_code, AVG(C1) AS C2 FROM ( SELECT T1.icd_code, ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T1.charttime) < 365 / 2 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 2 GROUP BY admissions.subject_id, diagnoses_icd.icd_code HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id ) AS T2 GROUP BY T2.icd_code ) AS T3 ) AS T4 WHERE T4.C3 <= 5 )",
            "pred_sql": "WITH DiagnosisMortality AS ( SELECT D.icd_code, COUNT(DISTINCT P.subject_id) AS total_patients, SUM(CASE WHEN P.dod IS NOT NULL AND P.dod <= datetime(F.first_diagnosis_time, '+6 months') THEN 1 ELSE 0 END) AS deaths_within_6_months FROM diagnoses_icd D JOIN (SELECT subject_id, MIN(charttime) AS first_diagnosis_time FROM diagnoses_icd GROUP BY subject_id) F ON D.subject_id = F.subject_id JOIN patients P ON D.subject_id = P.subject_id GROUP BY D.icd_code ) SELECT icd_code FROM DiagnosisMortality ORDER BY CAST(deaths_within_6_months AS REAL) / total_patients DESC LIMIT 5",
            "gold_answer": "[['abnormal involuntary movements'], ['acidosis'], ['acute and subacute hepatic failure with coma'], ['acute and subacute hepatic failure without coma'], ['acute kidney failure with lesion of tubular necrosis'], ['acute kidney failure, unspecified'], ['acute posthemorrhagic anemia'], ['acute respiratory failure'], ['acute respiratory failure, unspecified whether with hypoxia or hypercapnia'], ['alcohol abuse, continuous'], ['alcohol abuse, in remission'], ['alcoholic cirrhosis of liver'], ['alcoholic polyneuropathy'], ['amyotrophic lateral sclerosis'], ['anemia in other chronic diseases classified elsewhere'], ['arthrodesis status'], ['atelectasis'], ['atherosclerotic heart disease of native coronary artery with other forms of angina pectoris'], ['automatic implantable cardiac defibrillator in situ'], ['benign neoplasm of cerebral meninges'], ['bipolar disorder, unspecified'], ['chronic airway obstruction, not elsewhere classified'], ['chronic kidney disease, stage 3 (moderate)'], ['chronic kidney disease, stage iii (moderate)'], ['chronic kidney disease, unspecified'], ['chronic osteomyelitis, site unspecified'], ['chronic systolic heart failure'], ['cirrhosis of liver without mention of alcohol'], ['constipation, unspecified'], ['delirium due to conditions classified elsewhere'], ['dementia, unspecified, without behavioral disturbance'], ['dependence on respirator, status'], ['diarrhea'], ['diplopia'], ['do not resuscitate status'], ['early satiety'], ['edema'], ['embolism and thrombosis of other specified artery'], ['encounter for palliative care'], ['hemoperitoneum'], ['hemorrhage of gastrointestinal tract, unspecified'], ['hip joint replacement'], ['hyperosmolality and/or hypernatremia'], ['hyperpotassemia'], ['hypertensive chronic kidney disease with stage 1 through stage 4 chronic kidney disease, or unspecified chronic kidney disease'], ['hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified'], ['hypertrophy (benign) of prostate without urinary obstruction and other lower urinary tract symptom (luts)'], ['hypo-osmolality and hyponatremia'], ['hypospadias'], ['hypotension, unspecified'], ['hypovolemic shock'], ['ileus, unspecified'], ['infection with microorganisms resistant to penicillins'], ['jaundice, unspecified, not of newborn'], ['leukocytosis, unspecified'], ['long term (current) use of insulin'], ['malignant neoplasm of body of pancreas'], ['malignant neoplasm of liver, secondary'], ['malignant neoplasm of rectosigmoid junction'], ['metabolic encephalopathy'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['mycosis fungoides, unspecified site, extranodal and solid organ sites'], ['neoplasm related pain (acute) (chronic)'], ['nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney'], ['nonalcoholic steatohepatitis (nash)'], ['nonspecific elevation of levels of transaminase or lactic acid dehydrogenase [ldh]'], ['old myocardial infarction'], ['other ascites'], ['other chronic pulmonary heart diseases'], ['other cirrhosis of liver'], ['other closed fractures of distal end of radius (alone)'], ['other complications due to other vascular device, implant, and graft'], ['other constipation'], ['other convulsions'], ['other diseases of lung, not elsewhere classified'], ['other disorders of neurohypophysis'], ['other dysphagia'], ['other postprocedural shock, initial encounter'], ['other respiratory abnormalities'], ['other secondary thrombocytopenia'], ['other septicemia due to gram-negative organisms'], ['other specified analgesics and antipyretics causing adverse effects in therapeutic use'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['other specified disorders of peritoneum'], ['other specified forms of hearing loss'], ['palpitations'], ['peritonitis, unspecified'], ['personal history of alcoholism'], ['personal history of colonic polyps'], ['personal history of other malignant neoplasm of skin'], ['personal history of peptic ulcer disease'], ['personal history of poliomyelitis'], ['personal history of pulmonary embolism'], ['personal history of tobacco use'], ['personal history of transient ischemic attack (tia), and cerebral infarction without residual deficits'], ['pneumonia due to klebsiella pneumoniae'], ['postprocedural hemorrhage of a circulatory system organ or structure following a cardiac catheterization'], ['posttraumatic stress disorder'], ['presence of coronary angioplasty implant and graft']]",
            "pred_answer": "[['icd9|29680'], ['icd9|496'], ['icd9|5715'], ['icd9|v08'], ['icd9|v462']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally miscalculates the 6-month mortality rate by using each patient\u2019s overall first diagnosis time (across all codes) rather than the first occurrence of each specific diagnosis. It also fails to filter out diagnoses that occurred less than 6 months before the fixed \u2018today\u2019 and misuses the '+6 months' offset instead of the prescribed 365/2-day window, thereby altering the intended grouping, timing logic, and mortality calculation. This is not a simple column or syntax mistake but a flaw that spans multiple clauses (CTE definition, joins, date arithmetic, grouping), requiring a major rewrite of the query\u2019s logic rather than a localized fix.",
            "error_type": "Global: flawed mortality calculation logic"
        }
    },
    "o4-mini": {
        "b6cc190e8d03c0d1b4cb1b11": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What label corresponds to chloride (serum) (soft)?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'chloride (serum) (soft)'",
            "pred_sql": "SELECT label FROM d_labitems WHERE label = 'chloride (serum) (soft)';",
            "gold_answer": "[['chloride (serum) (soft)']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong table (d_labitems) instead of d_items and applies a WHERE on label rather than abbreviation. This is a specific table reference/column mismatch that can be fixed locally without altering the overall structure of the query.",
            "error_type": "Local: table reference error"
        },
        "26815672560ddf25199789f4": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is patient 10039831's chloride last measured on the current hospital visit less than the first value measured on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) ORDER BY labevents.charttime DESC LIMIT 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "WITH current_hadm AS ( SELECT hadm_id FROM admissions WHERE subject_id = 10039831 AND dischtime IS NULL ), chloride_vals AS ( SELECT le.valuenum AS val, le.charttime FROM labevents le JOIN d_labitems di ON le.itemid = di.itemid WHERE le.subject_id = 10039831 AND le.hadm_id = (SELECT hadm_id FROM current_hadm) AND di.label = 'chloride' ) SELECT CASE WHEN (SELECT val FROM chloride_vals ORDER BY charttime DESC LIMIT 1) < (SELECT val FROM chloride_vals ORDER BY charttime ASC LIMIT 1) THEN 'true' ELSE 'false' END AS is_last_less_than_first;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['false']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall logic\u2014filtering to the current hospital admission and comparing the last versus first chloride values\u2014is correct. The only discrepancy is in the SELECT clause\u2019s output: the ground truth returns a numeric boolean (0.0 for false), while the prediction uses a CASE to return the string 'false'. This is a minor formatting/output mismatch that does not affect the query\u2019s structure or data relationships.",
            "error_type": "Local: output-format error"
        },
        "673fa4c98a81600659a804ce": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What was the last time that patient 10029291 had the maximum value of po2?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'po2' ) ORDER BY labevents.valuenum DESC, labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(c.charttime) AS last_time FROM chartevents c JOIN d_items d ON c.itemid = d.itemid WHERE c.subject_id = 10029291 AND d.label = 'po2' AND c.valuenum = ( SELECT MAX(c2.valuenum) FROM chartevents c2 JOIN d_items d2 ON c2.itemid = d2.itemid WHERE c2.subject_id = 10029291 AND d2.label = 'po2' );",
            "gold_answer": "[['2100-02-25 04:52:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls data from chartevents joined with d_items, treating 'po2' as a vital sign, whereas 'po2' is a lab test stored in labevents joined with d_labitems. This is a fundamental mis\u2010use of tables and joins, violating the schema assumptions and requiring restructuring the entire query rather than a simple fix in a single clause.",
            "error_type": "Global: incorrect table usage"
        },
        "896e06dbfb4e1c4a502cc43b": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "When did patient 10018845 have the maximum value of heart rate for the first time?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018845 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.valuenum DESC, chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT ce.charttime FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10018845 AND di.label = 'heart rate' AND ce.valuenum = ( SELECT MAX(ce2.valuenum) FROM chartevents ce2 JOIN d_items di2 ON ce2.itemid = di2.itemid WHERE ce2.subject_id = 10018845 AND di2.label = 'heart rate' ) ORDER BY ce.charttime ASC LIMIT 1;\"",
            "gold_answer": "[['2100-10-30 13:00:00']]",
            "pred_answer": "You can only execute one statement at a time.",
            "is_correct": false,
            "reasoning": "The predicted SQL uses ce.subject_id directly to filter heart rate events, but it fails to restrict to the patient\u2019s ICU stays via admissions and icustays (as required by the schema relationships). This omission changes the query\u2019s logic by including irrelevant chartevents outside the intended ICU context. Fixing this requires restructuring the JOINs and subqueries to enforce the proper hadm_id and stay_id filtering, not just a minor tweak in one clause.",
            "error_type": "Global: incorrect join relationship"
        },
        "b5e2d13489c812114277fe37": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Can you list the top five most frequent procedures that patients received within 2 months after having venous catheterization, not elsewhere classified this year?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH venous AS ( SELECT pi.subject_id, pi.hadm_id, pi.charttime AS ven_time FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code WHERE dp.long_title = 'venous catheterization, not elsewhere classified' AND strftime('%Y', pi.charttime) = '2100' ) SELECT dp.long_title FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code JOIN venous v ON pi.subject_id = v.subject_id AND pi.hadm_id = v.hadm_id AND pi.charttime > v.ven_time AND pi.charttime <= datetime(v.ven_time, '+2 months') WHERE dp.long_title <> 'venous catheterization, not elsewhere classified' GROUP BY dp.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['arteriography of femoral and other lower extremity arteries'], ['arteriography of other intra-abdominal arteries'], ['arteriography of other specified sites'], ['central venous catheter placement with guidance'], ['closed [aspiration] [needle] [percutaneous] biopsy of pancreas'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['control of epistaxis by anterior nasal packing'], ['diagnostic ultrasound of digestive system'], ['insertion of endotracheal tube'], ['laryngoscopy and other tracheoscopy'], ['other endoscopy of small intestine'], ['other endovascular procedures on other vessels'], ['rhinoscopy'], ['transcatheter embolization for gastric or duodenal bleeding']]",
            "pred_answer": "[['arteriography of other intra-abdominal arteries'], ['insertion of endotracheal tube'], ['other endoscopy of small intestine'], ['rhinoscopy'], ['transcatheter embolization for gastric or duodenal bleeding']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts the follow-up procedures to the same hadm_id (hospital admission) as the venous catheterization, whereas the question asks for any procedures received by the same patient within two months after venous catheterization (regardless of admission). This incorrect join on hadm_id fundamentally changes which procedures are counted and excludes valid procedures in other admissions, altering the overall logic of the query rather than just a small clause.",
            "error_type": "Global: incorrect join relationship"
        },
        "2c017af839adaa90d6aaa933": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "List the top three most frequent diagnoses that patients are diagnosed with within 2 months after receiving extraction of other tooth.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "SELECT dd.long_title AS diagnosis, COUNT(*) AS freq FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code AND dp.long_title = 'extraction of other tooth' JOIN diagnoses_icd di ON di.subject_id = p.subject_id AND di.charttime >= p.charttime AND di.charttime <= datetime(p.charttime, '+2 months') JOIN d_icd_diagnoses dd ON di.icd_code = dd.icd_code GROUP BY dd.long_title ORDER BY freq DESC LIMIT 3;",
            "gold_answer": "[['aortic valve disorders'], ['atrial fibrillation'], ['osteoarthrosis, unspecified whether generalized or localized, site unspecified'], ['paroxysmal ventricular tachycardia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['paroxysmal ventricular tachycardia', '1.0'], ['tobacco use disorder', '1.0'], ['unspecified essential hypertension', '1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL replaces the ground truth\u2019s DENSE_RANK() approach (which handles ties by returning all diagnoses with rank \u2264 3) with a simple ORDER BY ... LIMIT 3. As a result, when there are ties at the cutoff, the prediction arbitrarily truncates the tied diagnoses, returning exactly three rows instead of all diagnoses sharing the top three frequencies. This is not a mere typo or missing table reference but a fundamental logical change in how the top\u2010N is defined, affecting the overall query semantics.",
            "error_type": "Global: ranking logic error"
        },
        "778ad95705de0b45ec4884a7": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is the value of heart rate in patient 10021666 from last measurement on the first icu visit greater than its first measurement on the first icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021666 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) > ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021666 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT CASE WHEN (SELECT ce_last.valuenum FROM chartevents ce_last JOIN d_items di_last ON ce_last.itemid = di_last.itemid JOIN icustays ic_last ON ce_last.stay_id = ic_last.stay_id WHERE ic_last.subject_id = 10021666 AND di_last.label = 'heart rate' AND ic_last.intime = ( SELECT MIN(intime) FROM icustays WHERE subject_id = 10021666 ) ORDER BY ce_last.charttime DESC LIMIT 1 ) > (SELECT ce_first.valuenum FROM chartevents ce_first JOIN d_items di_first ON ce_first.itemid = di_first.itemid JOIN icustays ic_first ON ce_first.stay_id = ic_first.stay_id WHERE ic_first.subject_id = 10021666 AND di_first.label = 'heart rate' AND ic_first.intime = ( SELECT MIN(intime) FROM icustays WHERE subject_id = 10021666 ) ORDER BY ce_first.charttime ASC LIMIT 1 ) THEN 'true' ELSE 'false' END AS is_greater;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['false']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic for selecting the patient\u2019s first ICU stay by (1) filtering only on icustays.subject_id instead of following the admission\u2192icustays relationship via hadm_id, and (2) omitting the outtime IS NOT NULL check used to identify completed ICU visits. These errors span multiple clauses (JOINs and WHERE) and alter which ICU stay is considered \u201cfirst,\u201d thus requiring restructuring across the query rather than a local fix.",
            "error_type": "Global"
        },
        "b54149ae47fd749a6b52da4e": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What are the five most frequent lab tests that patients had within the same month after receiving replacement of aortic valve with zooplastic tissue, percutaneous approach?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' ) ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT dl.label FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code JOIN labevents le ON le.hadm_id = p.hadm_id AND datetime(le.charttime, 'start of month') = datetime(p.charttime, 'start of month') AND le.charttime >= p.charttime JOIN d_labitems dl ON le.itemid = dl.itemid WHERE dp.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' GROUP BY dl.label ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['ph'], ['platelet count'], ['pt'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['creatinine'], ['hematocrit'], ['platelet count'], ['sodium'], ['urea nitrogen']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic of how lab events are associated with the procedure. The ground truth joins on patient (subject_id) and then filters by same month and post\u2010procedure time, allowing labs in any hospitalization that month. The predicted SQL instead joins on hadm_id (restricting to the same admission) and uses a >= condition, altering which lab events are counted. This is not a minor syntax fix but a flawed join strategy that changes the query\u2019s intended meaning.",
            "error_type": "Global: incorrect join relationship"
        },
        "f40498776baac42e6f79ba96": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What is the total hospital cost for patient 10007058's stay?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 )",
            "pred_sql": "SELECT SUM(c.cost) AS total_cost FROM cost c JOIN admissions a ON c.hadm_id = a.hadm_id WHERE a.subject_id = 10007058  AND a.dischtime IS NULL;",
            "gold_answer": "[['1689.76']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds a filter on admissions.dischtime IS NULL, thus summing only ongoing admissions rather than all admissions for the patient as required. This changes the overall logic and scope of the query, not just a minor syntax or column issue, and requires restructuring the filter logic across clauses. Therefore, it is a global error affecting the intended result.",
            "error_type": "Global: incorrect filter logic"
        },
        "1c7f17a31b72c45986809025": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What is the number of patients who died after diagnosis of hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified within the same month this year?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime AND datetime(T1.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL AND datetime(patients.dod,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) as T3 ON T2.subject_id = T3.subject_id AND datetime(T2.charttime,'start of month') = datetime(T3.dod,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) AS num_patients FROM diagnoses_icd d JOIN d_icd_diagnoses diag ON d.icd_code = diag.icd_code JOIN patients p ON d.subject_id = p.subject_id WHERE diag.long_title = 'hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified' AND p.dod IS NOT NULL AND datetime(p.dod) > datetime(d.charttime) -- same month and same year (this year = 2100) AND strftime('%Y-%m', d.charttime) = strftime('%Y-%m', p.dod) AND strftime('%Y', d.charttime) = '2100';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply joins diagnoses_icd to patients and counts rows where the death date is after the diagnosis date in the same YYYY-MM period of 2100. It never enforces that the diagnosis is the patient\u2019s first diagnosis of that code in 2100 (no GROUP BY/HAVING or subquery to take MIN(charttime)). This fundamentally changes the query logic and leads to overcounting. Fixing it would require restructuring the query (adding a subquery or aggregation) rather than a simple tweak in one clause. Therefore, this is a global logic error.",
            "error_type": "Global: incorrect first\u2010diagnosis filtering"
        },
        "d3c9c0d918fe61b341b3952e": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What are the five most frequently administered procedures for patients aged 60 or above in 2100?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d.long_title FROM admissions a JOIN procedures_icd p ON a.hadm_id = p.hadm_id JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE a.age >= 60 AND strftime('%Y', p.charttime) = '2100' GROUP BY d.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['(aorto)coronary bypass of three coronary arteries'], ['(aorto)coronary bypass of two coronary arteries'], ['arterial catheterization'], ['arteriography of cerebral arteries'], ['arteriography of other intra-abdominal arteries'], ['central venous catheter placement with guidance'], ['closed [endoscopic] biopsy of bronchus'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['coronary arteriography using two catheters'], ['drainage of peritoneal cavity, percutaneous approach'], ['drainage of peritoneal cavity, percutaneous approach, diagnostic'], ['endovascular (total) embolization or occlusion of head and neck vessels'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left lower lung lobe, via natural or artificial opening endoscopic'], ['extirpation of matter from left main bronchus, via natural or artificial opening endoscopic'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['hemodialysis'], ['insertion of endotracheal tube'], ['insertion of infusion device into right atrium, percutaneous approach'], ['insertion or replacement of external ventricular drain [evd]'], ['introduction of nutritional substance into upper gi, via natural or artificial opening'], ['left heart cardiac catheterization'], ['measurement of cardiac sampling and pressure, right heart, percutaneous approach'], ['percutaneous abdominal drainage'], ['performance of urinary filtration, multiple'], ['respiratory ventilation, greater than 96 consecutive hours'], ['restriction of intracranial artery with intraluminal device, percutaneous approach'], ['single internal mammary-coronary artery bypass'], ['spinal tap'], ['venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['single internal mammary-coronary artery bypass']]",
            "is_correct": false,
            "reasoning": "1. The predicted SQL correctly joins admissions, procedures_icd, and d_icd_procedures, and filters on age >= 60 and year = '2100', matching the ground truth's overall structure and filters.\n2. It groups by the procedure name and orders by COUNT(*) DESC, then limits to 5 rows, which superficially returns the five most frequent procedures.\n3. However, the question explicitly asks for the \u201cfive most frequently administered procedures,\u201d and the SQL assumptions state that when ranking is requested, DENSE_RANK() must be used to include ties at the cutoff.\n4. The ground truth uses a DENSE_RANK() subquery to include all procedures tied for the top five counts, potentially returning more than five rows.\n5. By using LIMIT 5 instead of DENSE_RANK(), the predicted SQL fails to include tied procedures at rank 5 and violates the ranking assumption.\n6. This mistake is localized to the ranking clause and can be corrected by replacing the LIMIT clause with the appropriate DENSE_RANK() logic. Therefore, it is a local error.",
            "error_type": "Local: ranking function error"
        },
        "6573046adda4f59cefa283c3": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "On last month/28, when did patient 10027602 the first time have a cerebral ventricular #1 event?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'cerebral ventricular #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '28' ORDER BY outputevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT ce.charttime FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10027602 AND di.label = 'cerebral ventricular #1' AND datetime(ce.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', ce.charttime) = '28' ORDER BY ce.charttime LIMIT 1;",
            "gold_answer": "[['2100-11-28 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the chartevents table instead of the outputevents table, fundamentally altering the data source and join logic. The ground truth query correctly restricts to outputevents linked via icustays and admissions, whereas the prediction treats the event as a chart event. This is not a simple column or syntax mistake but changes the overall structure and intended meaning of the query.",
            "error_type": "Global: wrong-table join error"
        },
        "853d5d23a05c7c89f691689c": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Fetch the top three most frequent procedures that are administered to patients within 2 months after they had introduction of nutritional substance into lower gi, via natural or artificial opening since 1 year ago.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of nutritional substance into lower gi, via natural or artificial opening' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH intro AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS intro_time, p.row_id AS intro_row FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title LIKE '%nutritional substance%' AND d.long_title LIKE '%lower gastrointestinal%' AND datetime(p.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) SELECT d2.long_title FROM intro i JOIN procedures_icd p2 ON p2.subject_id = i.subject_id AND p2.hadm_id = i.hadm_id JOIN d_icd_procedures d2 ON p2.icd_code = d2.icd_code WHERE datetime(p2.charttime) > datetime(i.intro_time) AND datetime(p2.charttime) <= datetime(i.intro_time, '+2 months') AND p2.row_id <> i.intro_row GROUP BY d2.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['drainage of left pleural cavity, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach, diagnostic'], ['drainage of pericardial cavity with drainage device, percutaneous approach'], ['insertion of endotracheal airway into trachea, via natural or artificial opening'], ['respiratory ventilation, greater than 96 consecutive hours']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a CTE (intro) for the initial procedure but then restricts subsequent procedures to the same hadm_id (hospital admission). The ground truth logic only matches on subject_id (patient-level) within the 2-month window, allowing procedures across different admissions. By joining on both subject_id and hadm_id, the predicted query erroneously excludes valid procedures that occur in a different admission within the two-month period, fundamentally altering the intended result set. This is not a simple typo or localized mistake\u2014it changes the core logic of how procedures are matched to the introduction event.",
            "error_type": "Global: incorrect join logic"
        },
        "eb5300e70154b4714ca808c6": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is the urea nitrogen level of patient 10004733 last measured on the current hospital visit less than the second to last measurement on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) ORDER BY labevents.charttime DESC LIMIT 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) ORDER BY labevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT CASE WHEN (SELECT le1.valuenum FROM admissions a JOIN labevents le1 ON a.hadm_id = le1.hadm_id JOIN d_labitems li1 ON le1.itemid = li1.itemid WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND li1.label = 'urea nitrogen' ORDER BY le1.charttime DESC LIMIT 1 ) < (SELECT le2.valuenum FROM admissions a JOIN labevents le2 ON a.hadm_id = le2.hadm_id JOIN d_labitems li2 ON le2.itemid = li2.itemid WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND li2.label = 'urea nitrogen' ORDER BY le2.charttime DESC LIMIT 1 OFFSET 1 ) THEN 'true' ELSE 'false' END AS is_urea_decreasing;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['true']]",
            "is_correct": false,
            "reasoning": "Both the ground truth and predicted SQL fetch the latest and second\u2010latest urea nitrogen measurements for the current hospital visit and compare them. The predicted SQL merely wraps the boolean comparison in a CASE expression to return the literals 'true'/'false' rather than relying on SQLite\u2019s numeric Boolean result (1.0/0.0). This is a localized difference in result formatting within the SELECT clause and does not change the overall query logic or table relationships.",
            "error_type": "Local: other-syntax error"
        },
        "adfdb251aaa88b786719e2e8": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "For patients who had bypass coronary artery, one artery from aorta with autologous venous tissue, open approach, what are the most frequent four microbiology tests carried out within the same hospital visit this year?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'bypass coronary artery, one artery from aorta with autologous venous tissue, open approach' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT m.test_name FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code JOIN microbiologyevents m ON p.subject_id = m.subject_id AND p.hadm_id = m.hadm_id WHERE d.long_title = 'bypass coronary artery, one artery from aorta with autologous venous tissue, open approach' AND strftime('%Y', m.charttime) = '2100' GROUP BY m.test_name ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['mrsa screen']]",
            "pred_answer": "[['mrsa screen'], ['staph aureus screen'], ['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins procedures_icd to microbiologyevents only on subject_id and hadm_id and filters tests by year, but it omits (1) restricting the procedure events to this year, and (2) requiring the microbiology tests to occur after the bypass procedure (the T1.charttime < T2.charttime condition). This missing temporal join condition fundamentally changes the logic and returns tests before the procedure and across any time in the year, rather than only those after the procedure. Fixing it would require restructuring the query\u2019s join logic and adding additional filters, not just a minor local tweak.",
            "error_type": "Global: join logic error"
        },
        "0cd28c037e9d62e1fe5d608a": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "List the top five most common diagnoses in 2100.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d.long_title FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE strftime('%Y', di.charttime) = '2100' GROUP BY d.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['atrial fibrillation'], ['coronary atherosclerosis of native coronary artery'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['atrial fibrillation'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a simple GROUP BY with ORDER BY and LIMIT 5 instead of the DENSE_RANK() approach mandated for a \u201ctop N\u201d query. As a result, it drops entries that tie on the fifth rank (e.g., \u201ccoronary atherosclerosis of native coronary artery\u201d). Fixing this requires restructuring the query to use a ranking window function and then filtering by rank, not just tweaking a WHERE clause or column name. This is therefore a global error affecting the overall logic of retrieving the correct top-N results with ties.",
            "error_type": "Global: incorrect ranking logic"
        },
        "e180639ff26c46761fd5b8f7": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Count the number of times that patient 10008454 took labetalol since 148 days ago.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10008454 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'labetalol' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-148 day')",
            "pred_sql": "SELECT COUNT(*) AS labetalol_count FROM prescriptions WHERE subject_id = 10008454 AND drug LIKE 'labetalol%' AND datetime(starttime) >= datetime('2100-12-31 23:59:00','-148 days');",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the data source and structure: it queries prescriptions rather than inputevents and omits the necessary joins/subqueries through admissions and icustays. This is not a minor syntax fix but a fundamental logical error that alters the intended meaning of the query.",
            "error_type": "Global: wrong table reference"
        },
        "0fb0be9a1f13d846f08f07aa": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Among patients who are diagnosed with presence of coronary angioplasty implant and graft in 2100, what are the top five most common diagnoses that followed within 2 months?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'presence of coronary angioplasty implant and graft' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH index_patients AS ( SELECT di.subject_id, MIN(di.charttime) AS idx_time FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'presence of coronary angioplasty implant and graft' AND strftime('%Y', di.charttime) = '2100' GROUP BY di.subject_id ) SELECT d2.long_title FROM index_patients ip JOIN diagnoses_icd di2 ON di2.subject_id = ip.subject_id JOIN d_icd_diagnoses d2 ON di2.icd_code = d2.icd_code WHERE di2.charttime > ip.idx_time AND di2.charttime <= datetime(ip.idx_time, '+2 months') AND d2.long_title != 'presence of coronary angioplasty implant and graft' GROUP BY d2.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['atherosclerotic heart disease of native coronary artery with unspecified angina pectoris'], ['gastro-esophageal reflux disease without esophagitis'], ['hyperlipidemia, unspecified'], ['nicotine dependence, cigarettes, uncomplicated'], ['presence of aortocoronary bypass graft'], ['presence of coronary angioplasty implant and graft']]",
            "pred_answer": "[['atherosclerotic heart disease of native coronary artery with unspecified angina pectoris'], ['gastro-esophageal reflux disease without esophagitis'], ['hyperlipidemia, unspecified'], ['nicotine dependence, cigarettes, uncomplicated'], ['presence of aortocoronary bypass graft']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a WHERE clause filtering out the index diagnosis ('presence of coronary angioplasty implant and graft'), whereas the ground truth includes it if it reoccurs within the 2-month window. This mistake is confined to a specific filter condition and does not alter the overall structure of joins, grouping, or ranking logic. Hence, it is a localized error in the WHERE clause.",
            "error_type": "Local: filter condition error"
        }
    }
}