{
    "codes": {
        "9960a0660d0754d95bd4252a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the number of times patient 10017492 had a lab test for sodium since 03/2100.",
            "gold_sql": "SELECT COUNT(*) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10017492 ) AND strftime('%Y-%m',labevents.charttime) >= '2100-03'",
            "pred_sql": "SELECT count(row_id) FROM labevents WHERE subject_id = 10017492 AND itemid = 50992 AND strftime('%Y-%m-%d', charttime) >= '2021-03-01'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistakes are confined to the WHERE clause: it hard-codes an incorrect itemid instead of looking up d_labitems to find sodium, uses the wrong date literal (\u20182021-03-01\u2019 instead of filtering from \u20182100-03\u2019), and doesn\u2019t follow the assumption to filter by hadm_id via admissions. These are localized errors in value references and literals, not a fundamental join or structural logic issue.",
            "error_type": "Local: value parsing error"
        },
        "dfd3a721ed94f94f630146ab": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the top four most commonly prescribed medications within 2 months to patients in their 20s who are diagnosed with other postprocedural status this year.",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other postprocedural status' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.age BETWEEN 20 AND 29 AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT prescriptions.drug_name FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE strftime('%Y', admissions.admittime) = '2100' AND strftime('%Y', prescriptions.starttime) = '2100' AND strftime('%Y', patients.dob) = '2100' AND strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) >= 20 AND prescriptions.drug_name IS NOT NULL GROUP BY prescriptions.drug_name ORDER BY count(prescriptions.drug_name) DESC LIMIT 4",
            "gold_answer": "[['5% dextrose'], ['acetaminophen'], ['acetaminophen iv'], ['albumin 5% (25g / 500ml)'], ['aspirin ec'], ['bisacodyl'], ['docusate'], ['furosemide'], ['iso-osmotic dextrose'], ['ketorolac'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['polyethylene glycol'], ['senna'], ['sodium chloride 0.9%  flush'], ['syringe']]",
            "pred_answer": "no such column: prescriptions.drug_name",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `prescriptions.drug_name`, causing a column reference error. This is a localized issue that can be fixed by using the correct column name (`prescriptions.drug`), without needing to alter the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "e0f2d92c8357f7bb2d8b8b14": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the lab test that was last given to patient 10002495 since 06/2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10002495 ) AND strftime('%Y-%m',labevents.charttime) >= '2100-06' ORDER BY labevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT microbiologyevents.test_name FROM labevents INNER JOIN microbiologyevents ON labevents.row_id = microbiologyevents.row_id WHERE labevents.subject_id = 10002495 AND labevents.charttime >= '2100-06-01'",
            "gold_answer": "[['ptt']]",
            "pred_answer": "[['mrsa screen'], ['mrsa screen']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally alters the intended logic by joining labevents with microbiologyevents and retrieving microbiology test names instead of lab test labels from d_labitems. It also uses subject_id directly on labevents rather than filtering by hadm_id from admissions, and never orders or limits to get the last event. These mistakes span multiple clauses and change the overall structure and meaning of the query.",
            "error_type": "Global: incorrect join/structure error"
        },
        "5bf8a233011a0df867e45191": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was patient 10018501's last alcohol detoxification treatment on the last hospital visit?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'alcohol detoxification' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018501 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.charttime FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id INNER JOIN transfers ON admissions.hadm_id = transfers.hadm_id WHERE transfers.subject_id = 10018501 ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['2100-08-11 00:00:00']]",
            "pred_answer": "[['2100-08-11 22:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses chartevents and transfers tables instead of procedures_icd and admissions to filter for the 'alcohol detoxification' procedure on the last hospital visit. It omits the necessary filter on d_icd_procedures.long_title and uses the wrong table for charttime, fundamentally altering the query logic and the intended result. This error spans multiple clauses (FROM, JOIN, WHERE) and changes the overall meaning of the query, not just a minor syntax issue.",
            "error_type": "Global: incorrect table and join logic"
        },
        "4d7de56400e4472e829c0dc3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the name of the output patient 10015931 had first on this month/09?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015931 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') AND strftime('%d',outputevents.charttime) = '09' ORDER BY outputevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT outputevents.value FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN outputevents ON admissions.hadm_id = outputevents.hadm_id WHERE patients.subject_id = 10015931 AND strftime('%Y-%m',outputevents.charttime) = '2100-09' ORDER BY outputevents.charttime ASC LIMIT 1",
            "gold_answer": "[['foley']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails to retrieve the item name (d_items.label), instead selecting outputevents.value without joining d_items. It also uses the wrong date filter (strftime('%Y-%m',\u2026) = '2100-09' instead of filtering to day '09' of the current month) and joins on admissions.hadm_id rather than restricting to the ICU stay_id. These mistakes alter the overall logic and structure, not just a minor clause fix. Therefore, this is a global error in the query\u2019s join and filtering strategy.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "65fa1808fd08f18ce3e040db": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 11 months ago, what was patient 10038081's last weight measurement?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-11 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.valuenum FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10038081 AND d_items.label = 'daily weight' AND strftime('%Y-%m', chartevents.charttime) > strftime('%Y-%m', 'now', '-11 month') AND strftime('%Y-%m', chartevents.charttime) < strftime('%Y-%m', 'now', '-10 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['67.3']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main issue is in the WHERE clause time filtering: it uses strftime('%Y-%m', ..., 'now') instead of the fixed '2100-12-31 23:59:00' reference and restricts results to the narrow window between 11 and 10 months ago, rather than all measurements since 11 months ago. This is a localized error in the date condition and does not require restructuring the entire query.",
            "error_type": "Local: other-temporal error"
        },
        "c8d18210963921f83f907052": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the top three most commonly diagnosed conditions since 1 year ago?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d_icd_diagnoses.long_title FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE diagnoses_icd.charttime >= datetime('now','1 year') GROUP BY d_icd_diagnoses.long_title ORDER BY count(d_icd_diagnoses.long_title) DESC LIMIT 3",
            "gold_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main fault is in the WHERE clause\u2019s date filter: it uses datetime('now','1 year') instead of the required fixed timestamp '2100-12-31 23:59:00' and a '-1 year' offset. This violates the temporal assumptions but does not change the overall query structure or join logic. Hence, it\u2019s a localized temporal value error.",
            "error_type": "Local: other-temporal error"
        },
        "ba881a6d357833bd3eadf250": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the total volume of input for patient 10035185 on 05/19/this year.",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10035185 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '05-19'",
            "pred_sql": "SELECT sum(amount) FROM inputevents WHERE subject_id = 10035185 AND amount IS NOT NULL AND itemid IN (211, 220739, 225312) AND strftime('%Y-%m-%d', starttime) = '2100-05-19'",
            "gold_answer": "[['2300.0']]",
            "pred_answer": "no such column: amount",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'amount' instead of the correct column 'totalamount'. This is a simple column reference mistake that does not alter the overall query logic or require restructuring of joins or subqueries. It can be fixed locally by using the correct column name.",
            "error_type": "Local: table/column reference error"
        },
        "3739c9896a4cfc33d62e978b": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last occasion that patient 10021118 underwent a excision of right saphenous vein, percutaneous endoscopic approach procedure on the first hospital visit?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021118 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT procedures_icd.charttime FROM procedures_icd INNER JOIN icustays ON procedures_icd.hadm_id = icustays.hadm_id WHERE procedures_icd.subject_id = 10021118 AND procedures_icd.icd_code = '41614100' ORDER BY icustays.intime LIMIT 1",
            "gold_answer": "[['2100-12-04 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally deviates from the intended logic. It joins procedures_icd to icustays instead of filtering procedures by the patient\u2019s first hospital admission via the admissions table. It also hardcodes the ICD code rather than looking it up and never restricts to the first visit. Moreover, it orders by icustays.intime instead of the procedure charttime and returns the earliest ICU stay rather than the last procedure on the first hospital visit. These issues span multiple clauses and disrupt the overall query structure, not just a single clause.",
            "error_type": "Global: incorrect join and missing first visit filter"
        },
        "13bd5a624734c3047cfd61ff": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first sao2 value that patient 10039831 had today?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT chartevents.value FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.hadm_id = 10039831 AND d_items.label = 'SaO2' AND chartevents.charttime LIKE '2100-03-01%' ORDER BY chartevents.charttime LIMIT 1",
            "gold_answer": "[['97.0']]",
            "pred_answer": "no such column: chartevents.value",
            "is_correct": false,
            "reasoning": "The predicted query fails due to a column reference error: it selects chartevents.value, but the correct column name is valuenum. This is a localized mistake in the SELECT clause and does not require reworking the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        }
    },
    "qwen3": {
        "716f26323d1b05f7e996515f": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since 1 year ago, what are the top three most common drugs prescribed within the same hospital visit to male patients aged 60 or above after being diagnosed with fall resulting in striking against other object?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'fall resulting in striking against other object' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'm' ) AND admissions.age >= 60 AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT p.drug, COUNT(DISTINCT p.hadm_id) AS count FROM prescriptions p WHERE p.hadm_id IN ( SELECT a.hadm_id FROM admissions a JOIN patients pt ON a.subject_id = pt.subject_id JOIN diagnoses_icd d ON a.hadm_id = d.hadm_id JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code WHERE pt.gender = 'm' AND a.age >= 60 AND datetime(a.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') AND dd.long_title = 'fall resulting in striking against other object' ) GROUP BY p.drug ORDER BY count DESC LIMIT 3;",
            "gold_answer": "[['0.9% sodium chloride (mini bag plus)'], ['acetaminophen'], ['albuterol 0.083% neb soln'], ['aspirin'], ['atorvastatin'], ['docusate sodium'], ['heparin'], ['ibuprofen'], ['ipratropium bromide neb'], ['iso-osmotic dextrose'], ['ketorolac'], ['lamotrigine'], ['metoprolol tartrate'], ['prednisone'], ['senna'], ['sodium chloride 0.9%  flush'], ['tiotropium bromide'], ['tramadol (ultram)'], ['vancomycin']]",
            "pred_answer": "[['tiotropium bromide', '1.0'], ['tramadol (ultram)', '1.0'], ['vancomycin', '1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL does not enforce the required temporal relationship between diagnosis and prescription (i.e., prescriptions must occur after the fall diagnosis) and misplaces the one\u2013year\u2013ago filter on the admission time rather than on the diagnosis and prescription times. These omissions alter the overall logic of the query\u2014prescriptions before the diagnosis or outside the one\u2013year window might be included\u2014so it is not a local fix but a structural flaw requiring reworking of multiple clauses.",
            "error_type": "Global: incorrect temporal and logical structure"
        },
        "2b873eff97b30a727b9feead": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When was the measurement of patient 10018081's sao2 taken first on the current icu visit?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(chartevents.charttime) AS first_sao2_time FROM chartevents JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10018081 AND chartevents.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND d_items.label = 'o2 saturation pulseoxymetry' AND chartevents.charttime >= ( SELECT intime FROM icustays WHERE hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND outtime IS NULL ORDER BY intime DESC LIMIT 1 ) AND chartevents.charttime <= COALESCE( (SELECT outtime FROM icustays WHERE hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) AND outtime IS NULL ORDER BY intime DESC LIMIT 1), '2100-12-31 23:59:00' );",
            "gold_answer": "[['2100-12-26 16:59:00']]",
            "pred_answer": "[['2100-12-26 17:15:00']]",
            "is_correct": false,
            "reasoning": "The ground truth query restricts events by the ICU stay_id to capture any sao2 measurement tagged to the current ICU visit\u2014even if a measurement timestamp slightly precedes the recorded ICU intime. The predicted query instead filters chartevents by charttime >= icustays.intime (and by hadm_id/subject_id) rather than matching the stay_id directly. As a result, it omits the earliest measurement that occurred just before the recorded ICU intime (16:59:00) and returns the next one (17:15:00). This error is not a minor typo or a simple WHERE\u2010clause fix\u2014it stems from a flawed logical approach to associating chartevents with the ICU stay, requiring restructuring the join/filter strategy. Hence it is a global error.",
            "error_type": "Global: incorrect join/filter logic"
        }
    },
    "llm2sql": {
        "5be413e7ffdc64923bbd30db": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How many patients had other open umbilical herniorrhaphy two or more times?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other open umbilical herniorrhaphy' ) GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(DISTINCT pi.subject_id) FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code WHERE dp.long_title = 'other open umbilical herniorrhaphy' GROUP BY pi.subject_id HAVING COUNT(pi.row_id) >= 2;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall structure (filtering by the procedure name, grouping by subject_id, and requiring at least two occurrences) matches the intent of the ground truth. However, it misuses COUNT(DISTINCT pi.subject_id) together with GROUP BY pi.subject_id and HAVING. This aggregation mistake is isolated to the SELECT/GROUP BY clause and can be fixed by removing the unnecessary DISTINCT (or by restructuring only the aggregation), without altering the join logic or the rest of the query. Hence, this is a localized aggregation error.",
            "error_type": "Local: grouping/aggregation error"
        }
    },
    "dinsql": {
        "5c223874f1f67b99707467b0": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Find out the daily minimum chest tube #1 output volume of patient 10004720 since 4 months ago.",
            "gold_sql": "SELECT MIN(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'chest tube #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime) >= datetime('2100-12-31 23:59:00','-4 month') GROUP BY strftime('%Y-%m-%d',outputevents.charttime)",
            "pred_sql": "SELECT DATE(outputevents.charttime) AS date, MIN(outputevents.value) AS min_output_volume FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10004720 AND d_items.label = 'chest tube",
            "gold_answer": "[['10.0'], ['100.0'], ['130.0'], ['5.0'], ['90.0']]",
            "pred_answer": "unrecognized token: \"'chest tube\"",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error in the WHERE clause \u2013 the string literal for d_items.label is opened with a single quote but never closed (\u2018chest tube), leading to an unrecognized token. This is a localized value\u2010parsing/syntax mistake and does not alter the overall query structure or intended logic.",
            "error_type": "Local: value parsing error"
        },
        "b898ef45d110fe35ad724d35": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the percentile of anion gap 27.0 among all patients of the same age as patient 10004235 on the last hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'anion gap' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10004235 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) ) AS T1 WHERE T1.valuenum = 27.0",
            "pred_sql": "WITH PatientAge AS ( SELECT age FROM admissions WHERE subject_id = 10004235 ORDER BY admittime DESC LIMIT 1 ), AnionGapValues AS ( SELECT DISTINCT valuenum FROM labevents WHERE itemid = (SELECT itemid FROM d_labitems WHERE label = 'anion gap') AND hadm_id IN ( SELECT hadm_id FROM admissions WHERE age = (SELECT age FROM PatientAge) AND dischtime IS NOT NULL ) ), RankedAnionGap AS ( SELECT valuenum, PERCENT_RANK() OVER (ORDER BY valuenum) AS percentile FROM AnionGapValues ) SELECT percentile FROM RankedAnionGap WHERE valuenum = 27.0",
            "gold_answer": "[['0.935']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s overall structure (CTEs, joins, filters) matches the intent of the ground truth, but it locally alters the distribution used for the percentile calculation by taking DISTINCT valuenum before applying PERCENT_RANK. This single\u2010clause change (in the AnionGapValues CTE) breaks the percentile lookup without requiring any restructuring of joins or query flow.",
            "error_type": "Local: incorrect use of DISTINCT before window function"
        }
    },
    "deepseek": {
        "b66afc66d827b1620f9d2dd4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What label is associated with stool?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'stool'",
            "pred_sql": "SELECT label FROM d_labitems WHERE label = 'stool';",
            "gold_answer": "[['stool']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL is querying the wrong table (d_labitems) instead of d_items. The error lies in a local table reference\u2014using the incorrect table for the label \u201cstool.\u201d This does not require a restructuring of the overall query logic, only fixing the table reference.",
            "error_type": "Local: table reference error"
        },
        "b758cf0b3355a13805e92899": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Among patients who received other laparoscopic umbilical herniorrhaphy , what are the top four most frequent medications that followed within the same month?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other laparoscopic umbilical herniorrhaphy' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH filtered_procedures AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS procedure_time FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title = 'other laparoscopic umbilical herniorrhaphy' ) SELECT drug, COUNT(*) AS frequency FROM prescriptions WHERE EXISTS ( SELECT 1 FROM filtered_procedures fp WHERE prescriptions.hadm_id = fp.hadm_id AND datetime(prescriptions.starttime, 'start of month') = datetime(fp.procedure_time, 'start of month') ) GROUP;",
            "gold_answer": "[['bisacodyl']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL ends with \"GROUP;\" which is syntactically incorrect (missing GROUP BY clause and aggregation context). This is a localized syntax mistake in the GROUP clause and does not affect the overall logical structure or intended joins of the query.",
            "error_type": "Local: syntax error"
        }
    },
    "macsql": {
        "9178ec6bc500e893fbbc867a": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Retrieve the top three most common input events this year.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT inputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM inputevents WHERE datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY inputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT `itemid` FROM ( SELECT `itemid`, COUNT(*) AS `event_count` FROM inputevents WHERE strftime('%Y', `starttime`) = '2100' GROUP BY `itemid` ORDER BY `event_count` DESC LIMIT 3 )",
            "gold_answer": "[['dextrose 5%'], ['nacl 0.9%'], ['solution']]",
            "pred_answer": "[['220949.0'], ['225158.0'], ['225943.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters and ranks inputevents by count for the year but only selects the itemid codes rather than the required item labels. It also fails to join d_items to map itemid to its label. This is a localized mistake in the SELECT clause (and missing join), not a fundamental change to the overall query structure or logic.",
            "error_type": "Local: missing join/incorrect column selection"
        },
        "03873cbe3210703b0f9698fa": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What does a pt lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'labevents' AND cost.event_id IN ( SELECT labevents.row_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'pt' ) )",
            "pred_sql": "SELECT AVG(`cost`) AS typical_lab_test_cost FROM cost WHERE `event_type` = 'labevents'",
            "gold_answer": "[['8.95']]",
            "pred_answer": "[['9.651']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to filter costs for the specific 'pt' lab test and instead averages costs over all lab events. The mistake is isolated to the WHERE clause (missing the condition linking to d_labitems.label = 'pt' and the corresponding event_id filter) and does not require restructuring of the query\u2019s joins or overall logic.",
            "error_type": "Local: missing filter in WHERE clause"
        }
    },
    "o4-mini": {
        "b6cc190e8d03c0d1b4cb1b11": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What label corresponds to chloride (serum) (soft)?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'chloride (serum) (soft)'",
            "pred_sql": "SELECT label FROM d_labitems WHERE label = 'chloride (serum) (soft)';",
            "gold_answer": "[['chloride (serum) (soft)']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong table (d_labitems) instead of d_items and applies a WHERE on label rather than abbreviation. This is a specific table reference/column mismatch that can be fixed locally without altering the overall structure of the query.",
            "error_type": "Local: table reference error"
        }
    }
}