{
    "codes": {
        "307575c4925d593d6397e60f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What time was patient 10005817 prescribed a drug for the first time this month?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005817 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.subject_id = 10005817 AND strftime('%Y-%m', prescriptions.starttime) = '2100-12'",
            "gold_answer": "[['2100-12-20 02:00:00']]",
            "pred_answer": "[['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 02:00:00'], ['2100-12-20 03:00:00'], ['2100-12-20 05:00:00'], ['2100-12-20 10:00:00'], ['2100-12-20 11:00:00'], ['2100-12-20 11:00:00'], ['2100-12-20 12:00:00'], ['2100-12-20 17:00:00'], ['2100-12-20 18:00:00'], ['2100-12-20 20:00:00'], ['2100-12-21 08:00:00'], ['2100-12-21 08:00:00'], ['2100-12-21 12:00:00'], ['2100-12-21 13:00:00'], ['2100-12-21 14:00:00'], ['2100-12-21 22:00:00'], ['2100-12-22 08:00:00'], ['2100-12-22 12:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 08:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 09:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 15:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 19:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 20:00:00'], ['2100-12-23 22:00:00'], ['2100-12-24 03:00:00'], ['2100-12-24 08:00:00'], ['2100-12-24 08:00:00'], ['2100-12-24 09:00:00'], ['2100-12-24 09:00:00'], ['2100-12-24 10:00:00'], ['2100-12-24 11:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 13:00:00'], ['2100-12-24 14:00:00'], ['2100-12-24 17:00:00'], ['2100-12-24 17:00:00'], ['2100-12-24 22:00:00'], ['2100-12-25 08:00:00'], ['2100-12-25 08:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-25 19:00:00'], ['2100-12-26 03:00:00'], ['2100-12-26 03:00:00'], ['2100-12-26 04:00:00'], ['2100-12-26 05:00:00'], ['2100-12-26 07:00:00'], ['2100-12-26 08:00:00'], ['2100-12-26 10:00:00'], ['2100-12-26 13:00:00'], ['2100-12-26 14:00:00'], ['2100-12-26 16:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters prescriptions for patient 10005817 in the current month but fails to order the results and apply a LIMIT 1 to return only the first prescription time. This is a mistake localized to the SELECT/ORDER BY/LIMIT clauses and does not alter the overall query structure or relationships.",
            "error_type": "Local: missing-order-limit error"
        },
        "0f28b6cf83f9be5d009b09d5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the name of the medication that patient 10031757 received two or more times in 10/this year?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',prescriptions.starttime) = '10' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10031757 AND strftime('%Y', starttime) = '2100' GROUP BY drug HAVING count(drug) > 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['bisacodyl'], ['ciprofloxacin iv'], ['docusate sodium'], ['iso-osmotic dextrose'], ['lactated ringers'], ['lidocaine viscous 2%'], ['methylnaltrexone'], ['metronidazole (flagyl)'], ['ns'], ['ondansetron'], ['pantoprazole'], ['phosphorus'], ['phytonadione'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['trazodone'], ['vancomycin'], ['vial']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['ciprofloxacin iv'], ['docusate sodium'], ['iso-osmotic dextrose'], ['lactated ringers'], ['methylnaltrexone'], ['metronidazole (flagyl)'], ['ns'], ['ondansetron'], ['pantoprazole'], ['potassium chloride (powder)'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['trazodone'], ['vancomycin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to restrict to October (\u201810\u2019) of the current year and misuses the HAVING clause (count > 2 instead of >= 2). These mistakes are confined to the WHERE and HAVING clauses (temporal and threshold filters) and do not require restructuring the JOINs or overall logic.",
            "error_type": "Local: other-temporal error"
        },
        "e9e653553f144c1fef6b60f3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the number of patients diagnosed with anoxic brain damage, not elsewhere classified since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'anoxic brain damage, not elsewhere classified' ) AND strftime('%Y',diagnoses_icd.charttime) >= '2100' )",
            "pred_sql": "SELECT count(DISTINCT patients.subject_id) FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'anoxic brain damage'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the time filter (`strftime('%Y', diagnoses_icd.charttime) >= '2100'`) and also uses the wrong diagnosis title ('anoxic brain damage' instead of 'anoxic brain damage, not elsewhere classified'). These mistakes are confined to the WHERE clause\u2019s filter conditions and do not require restructuring the joins or overall query logic.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "8caac1d5d3cc5f0858bd498e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Today, what was the first value of sao2 for patient 10039831?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT DISTINCT sao2 FROM chartevents WHERE subject_id = 10039831 ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['97.0']]",
            "pred_answer": "no such column: sao2",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent column \u201csao2\u201d instead of using the correct schema (e.g., chartevents.valuenum after filtering by d_items.label). This is a simple column reference error that does not alter the overall structure or logic of the query\u2014it\u2019s a misspelled/incorrect column name. Therefore it is a local error.",
            "error_type": "Local: column reference error"
        },
        "74ba8675c1d922c076631bf5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many times was sertraline medication prescribed since 1 year ago?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'sertraline' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(row_id) FROM prescriptions WHERE drug = 'sertraline' AND starttime > datetime('2100 12 31 23:59:00','start of day',' -1 year')",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the assumptions in the WHERE clause by misformatting the datetime literal ('2100 12 31 23:59:00' instead of '2100-12-31 23:59:00') and by injecting an unnecessary 'start of day' modifier, as well as using '>' instead of '>='. These are localized mistakes in the value parsing and comparison condition that do not alter the overall query structure or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "99494ba824b48fe796158434": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Pull up the ids of patients who were diagnosed with other specified forms of hearing loss in 2100.",
            "gold_sql": "SELECT DISTINCT admissions.subject_id FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified forms of hearing loss' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT patients.subject_id FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id WHERE diagnoses_icd.icd_code IN ( '428.9', '429.3', '430.1', '430.3', '431.0', '431.1', '431.9', '432.0', '432.1', '432.2', '432.3', '432.9', '433.0', '433.1', '433.9', '434.0', '434.1', '434.9', '435.0', '435.1', '435.9', '436.0', '436.1', '436.9', '437.2', '437.3', '437.4', '437.5', '437.6', '437.7",
            "gold_answer": "[['10021666.0']]",
            "pred_answer": "unrecognized token: \"'437.7\"",
            "is_correct": false,
            "reasoning": "The failure occurs at parse time due to a malformed IN\u2010list literal (\u2018437.7 without a closing quote or parenthesis). This is a localized syntax/value parsing mistake in the WHERE clause and does not require restructuring the overall query logic or its joins.",
            "error_type": "Local: value parsing error"
        },
        "3d479874efc2d645f83e1a5e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Give me the details of the time patient 10005348 was admitted to the hospital.",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10005348",
            "pred_sql": "SELECT admissions.admittime, admissions.dischtime FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE icustays.subject_id = 10005348",
            "gold_answer": "[['2100-09-12 08:30:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query introduces an unnecessary INNER JOIN with the icustays table, altering the intended logic. By joining on admissions.hadm_id = icustays.hadm_id and then filtering on icustays.subject_id, the query only returns admissions associated with ICU stays. Since the patient may have no ICU stay, this join filters out the desired record. The error affects the overall structure and logic of the query, not just a single clause, and thus is a global issue.",
            "error_type": "Global: incorrect-join error"
        },
        "d27c81e4091ff4f9207c8f58": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10020187 have an input for the last time on 02/01/this year?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '02-01' ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT inputevents.starttime FROM inputevents INNER JOIN patients ON inputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y %m %d', inputevents.starttime) = '2100 02 01'",
            "gold_answer": "[['2100-02-01 20:39:00']]",
            "pred_answer": "[['2100-02-01 05:15:00'], ['2100-02-01 14:16:00'], ['2100-02-01 16:26:00'], ['2100-02-01 17:09:00'], ['2100-02-01 19:45:00'], ['2100-02-01 19:45:00'], ['2100-02-01 19:45:00'], ['2100-02-01 20:39:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict inputevents to the patient\u2019s ICU stay (it joins directly to patients instead of filtering by icustays via hadm_id and stay_id) and omits the ORDER BY ... DESC LIMIT 1 needed to return only the last event. This is not a simple typo or isolated condition error but a fundamental flaw in table relationships and overall query structure, altering the intended logic of the query.",
            "error_type": "Global: join and structural logic error"
        },
        "582d2a38c554375225e177ea": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the ways to consume sodium chloride?",
            "gold_sql": "SELECT DISTINCT prescriptions.route FROM prescriptions WHERE prescriptions.drug = 'sodium chloride'",
            "pred_sql": "SELECT d_items.label FROM d_items INNER JOIN inputevents ON d_items.itemid = inputevents.itemid WHERE inputevents.value = 'sodium chloride'",
            "gold_answer": "[['po']]",
            "pred_answer": "no such column: inputevents.value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column inputevents.value. The inputevents table does not have a 'value' column, so this is a simple column reference error. This mistake is localized and does not alter the overall query logic or require restructuring across multiple clauses.",
            "error_type": "Local: column reference error"
        },
        "cb3541829199c9ac184de2b0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find out how many patients were admitted to the hospital.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions",
            "pred_sql": "SELECT count(DISTINCT hadm_id) FROM admissions WHERE dischtime IS NULL",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the query\u2019s intent: it counts distinct admissions (hadm_id) rather than distinct patients (subject_id) and additionally filters only current admissions (dischtime IS NULL). This alters the overall logic and structure, producing a result that does not answer the original question. Fixing this requires changing multiple clauses (SELECT and WHERE), not just a minor typo or syntax issue.",
            "error_type": "Global: wrong aggregation and filtering"
        },
        "99050f13d8de8d68cab8de5a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was patient 10004733's last sao2 on 12/24/this year?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '12-24' ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10004733 AND strftime('%Y-%m-%d', chartevents.charttime) = '2012-12-24' AND chartevents.itemid = 5080",
            "gold_answer": "[['96.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s core mistake is a bad date filter: it hard-codes strftime('%Y-%m-%d',chartevents.charttime) = '2012-12-24' rather than matching month-day against \u2018\u201812-24\u2019\u2019 in the current \u2018\u20182100\u2019\u2019 year as per the assumptions. This is a localized value\u2010parsing/temporal error within the WHERE clause; the rest of the query structure remains intact. Therefore this is a Local error.",
            "error_type": "Local: other-temporal error"
        }
    },
    "deepseek": {
        "64e7a22499d22f3fb08f2d4b": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many days have passed since patient 10018081's last fentanyl (concentrate) input on the current icu visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', inputevents.starttime) ) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'fentanyl (concentrate)' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%j', '2100-12-31 23:59:00') - strftime('%j', inputevents.charttime)) + 1 AS days_since_last_fentanyl FROM inputevents INNER JOIN d_items ON inputevents.itemid = d_items.itemid INNER JOIN icustays ON inputevents.stay_id = icustays.stay_id WHERE inputevents.subject_id = 10018081 AND d_items.label = 'fentanyl (concentrate)' AND icustays.outtime IS NULL ORDER BY inputevents.charttime DESC LIMIT 1;",
            "gold_answer": "[['0.65']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references inputevents.charttime, but the inputevents table uses starttime rather than charttime. This is a simple column reference mistake that does not alter the overall query logic or require restructuring of joins or clauses.",
            "error_type": "Local: table/column reference error"
        },
        "0db05ea2dbcb66080ccf5a6a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Could you tell me the minimum total cost in a hospital where clopidogrel is used in 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'clopidogrel' ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "WITH clopidogrel_cost AS ( SELECT p.hadm_id, SUM(c.cost) AS total_cost FROM prescriptions p JOIN cost c ON p.hadm_id = c.hadm_id AND p.row_id = c.event_id AND c.event_type = 'prescriptions' WHERE p.drug = 'clopidogrel' AND datetime(p.starttime) >= datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY p.hadm_id ) SELECT MIN(total_cost) AS minimum_total_cost FROM clopidogrel_cost;",
            "gold_answer": "[['318.21']]",
            "pred_answer": "[['12.3']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by joining only prescription-related cost entries (using c.event_type = 'prescriptions' and linking p.row_id to c.event_id) rather than summing all costs for each admission as in the ground truth. Additionally, it filters on prescription start times instead of cost charge times and fails to group all event types. These alterations span multiple clauses and require restructuring the query\u2019s overall approach, not just a localized fix.",
            "error_type": "Global: incorrect join and filter logic"
        }
    },
    "macsql": {
        "3759f2f609764584996dba4e": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me patient 10037861's monthly average wbc this year.",
            "gold_sql": "SELECT AVG(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10037861 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'wbc' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY strftime('%Y-%m',labevents.charttime)",
            "pred_sql": "SELECT strftime('%Y-%m', `charttime`) AS `month`, AVG(`valuenum`) AS `average_wbc` FROM `labevents` WHERE `subject_id` = 10037861 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'wbc' ) AND datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY strftime('%Y-%m', `charttime`) ORDER BY `month`",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on labevents.subject_id directly, rather than restricting labevents to this patient\u2019s hospital admissions via hadm_id as in the ground truth. This omission of the join through admissions fundamentally changes the data scope and yields no results. Fixing it requires altering the join logic between tables, not just tweaking a single clause, so it\u2019s a global error.",
            "error_type": "Global: join-key error"
        }
    },
    "llm2sql": {
        "8aaee78b4bbe7adab81c6104": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the five most commonly taken specimens that patients are tested within 2 months after receiving a other endovascular procedures on other vessels procedure?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other endovascular procedures on other vessels' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT me.spec_type_desc, COUNT(*) AS count FROM procedures_icd AS pi JOIN microbiologyevents AS me ON pi.subject_id = me.subject_id AND pi.hadm_id = me.hadm_id WHERE pi.icd_code = (SELECT icd_code FROM d_icd_procedures WHERE long_title = 'endovascular procedures on other vessels') AND datetime(me.charttime) <= datetime(pi.charttime, '+2 months') GROUP BY me.spec_type_desc ORDER BY count DESC LIMIT 5;",
            "gold_answer": "[['stool'], ['urine']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by joining microbiologyevents and procedures_icd on both subject_id and hadm_id (restricting to the same hospital stay) and only applying an upper time bound, whereas the ground truth joins on subject_id across stays with both lower and upper time bounds. It also uses the wrong procedure title filter. These are structural and semantic join errors that alter the intended meaning, not just a minor syntax fix.",
            "error_type": "Global: incorrect join logic"
        },
        "60582cd00e26838f77d59095": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "How much did patient 10031757 weigh the first time it was measured since 3 months ago?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-3 month') ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT valuenum  n  FROM chartevents  n  WHERE subject_id = 10031757  n  AND itemid = (SELECT itemid FROM d_items WHERE label = 'daily weight')  n  AND datetime(charttime) >= datetime('2100-12-31 23:59:00','start of day','-365/4 day')  n  ORDER BY charttime ASC  n  LIMIT 1;",
            "gold_answer": "[['109.6']]",
            "pred_answer": "near \"n\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces invalid aliasing with stray 'n' tokens (e.g., \"SELECT valuenum  n  FROM chartevents  n\" and \"WHERE subject_id = 10031757  n\"), causing a syntax error (near \"n\"). This is a localized mistake in the query syntax rather than a flaw in the overall logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "36e6e4b77fb6bc869d511e26": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When did patient 10027602 enter the hospital the last time via emergency room?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.admission_location = 'emergency room' AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admittime FROM admissions WHERE subject_id = 10027602 AND admission_type = 'emergency' ORDER BY admittime DESC LIMIT 1",
            "gold_answer": "[['2100-11-23 12:05:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only filters on admission_type = 'emergency' instead of admission_location = 'emergency room' and also omits the requirement dischtime IS NOT NULL. Both mistakes occur in the WHERE clause and do not alter the overall query structure or table relationships, making this a localized filter/column reference error.",
            "error_type": "Local: column reference error"
        }
    },
    "o4-mini": {
        "b54149ae47fd749a6b52da4e": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What are the five most frequent lab tests that patients had within the same month after receiving replacement of aortic valve with zooplastic tissue, percutaneous approach?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' ) ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT dl.label FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code JOIN labevents le ON le.hadm_id = p.hadm_id AND datetime(le.charttime, 'start of month') = datetime(p.charttime, 'start of month') AND le.charttime >= p.charttime JOIN d_labitems dl ON le.itemid = dl.itemid WHERE dp.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' GROUP BY dl.label ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['ph'], ['platelet count'], ['pt'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['creatinine'], ['hematocrit'], ['platelet count'], ['sodium'], ['urea nitrogen']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic of how lab events are associated with the procedure. The ground truth joins on patient (subject_id) and then filters by same month and post\u2010procedure time, allowing labs in any hospitalization that month. The predicted SQL instead joins on hadm_id (restricting to the same admission) and uses a >= condition, altering which lab events are counted. This is not a minor syntax fix but a flawed join strategy that changes the query\u2019s intended meaning.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "dinsql": {
        "c3d9bc95379f78cc11a90cb2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the percentile of 2.0 in the rbc lab test with the same age as patient 10022041 on the last hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'rbc' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10022041 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) ) AS T1 WHERE T1.valuenum = 2.0",
            "pred_sql": "WITH patient_age AS ( SELECT age FROM admissions WHERE subject_id = 10022041 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ), age_group AS ( SELECT labevents.valuenum FROM labevents INNER JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.age = (SELECT age FROM patient_age) AND labevents.itemid = (SELECT itemid FROM d_labitems WHERE label = 'rbc') AND admissions.dischtime IS NOT NULL AND admissions.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = admissions.subject_id ORDER BY admittime DESC LIMIT 1 ) ) SELECT ROUND((SUM(CASE WHEN valuenum <= 2.0 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 2) AS percentile FROM age_group",
            "gold_answer": "[['0.5']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query. It restricts labevents to each patient\u2019s most recent admission rather than all admissions of patients of the same age, replaces the PERCENT_RANK() window function with a manual percentage calculation, and misuses a correlated subquery on admissions to filter hadm_id. These errors span multiple clauses (WITH clauses, JOIN, WHERE, and SELECT) and change the intended semantics, requiring major restructuring to correct.",
            "error_type": "Global"
        },
        "d5efaa4569dc08743e756b5c": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the medication that patient 10021312 was prescribed within 2 days after having received a drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic procedure on the last hospital visit?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT procedures_icd.subject_id, procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic' ) ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')",
            "pred_sql": "SELECT DISTINCT P.drug FROM procedures_icd AS PI JOIN d_icd_procedures AS DIP ON PI.icd_code = DIP.icd_code JOIN admissions AS A ON PI.hadm_id = A.hadm_id JOIN prescriptions AS P ON A.hadm_id = P.hadm_id WHERE PI.subject_id = 10021312 AND DIP.long_title = 'drainage of left main bronchus, via natural or artificial opening endoscopic, diagnostic' AND A.dischtime IS NOT NULL AND A.admittime = ( SELECT MAX(admittime) FROM admissions WHERE subject_id = 10021312 AND dischtime IS NOT NULL ) AND datetime(P.starttime) BETWEEN datetime(PI.charttime, '+0 days') AND datetime(PI.charttime, '+2 days')",
            "gold_answer": "[['0.9% sodium chloride'], ['calcium carbonate'], ['famotidine'], ['iso-osmotic sodium chloride'], ['vancomycin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['albuterol 0.083% neb soln'], ['calcium carbonate'], ['famotidine'], ['iso-osmotic sodium chloride'], ['pantoprazole'], ['vancomycin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally similar to the ground truth (same tables, joins, and overall flow). The only semantic mismatch is in the WHERE time condition: it uses\n  \n    datetime(P.starttime) BETWEEN datetime(PI.charttime, '+0 days') AND datetime(PI.charttime, '+2 days')\n  \ninstead of the ground\u2010truth\u2019s stricter `T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')`. This difference in the temporal predicate lets through extra drugs (e.g., albuterol, pantoprazole) and can be fixed by tightening the time condition without altering the rest of the query\u2019s logic or joins. Hence it\u2019s a localized (WHERE\u2010clause) error in the time filter.",
            "error_type": "Local: other-temporal error"
        }
    }
}