{
    "deepseek": {
        "3714936c91049926ca46bd85": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients died after having been diagnosed with delirium due to conditions classified elsewhere within the same month since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'delirium due to conditions classified elsewhere' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL AND datetime(patients.dod) >= datetime('2100-12-31 23:59:00','-1 year') ) as T3 ON T2.subject_id = T3.subject_id AND datetime(T2.charttime,'start of month') = datetime(T3.dod,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS num_patients FROM patients p JOIN diagnoses_icd d ON p.subject_id = d.subject_id JOIN d_icd_diagnoses icd ON d.icd_code = icd.icd_code WHERE icd.long_title = 'Delirium due to known physiological condition' AND datetime(p.dod) >= datetime('2100-12-31 23:59:00', '-1 year') AND STRFTIME('%Y-%;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "unrecognized token: \"'%Y-%;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with an \u201cunrecognized token\u201d error at the partially written STRFTIME format string (\u2018'%Y-%;\u2019). This is a localized syntax/value\u2010parsing issue in the WHERE clause (an improperly terminated string literal), not a deeper misjoin or structural logic flaw. It can be fixed by correctly completing the STRFTIME expression without changing the overall query structure.",
            "error_type": "Local: syntax error"
        },
        "3d5ccd254ab413fb1f98cd8a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you get me the top five most conducted lab tests?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT D.label, COUNT(L.itemid) AS test_count FROM labevents L JOIN d_labitems D ON L.itemid = D.itemid GROUP BY D.label ORDER BY test_count DESC LIMIT 5;",
            "gold_answer": "[['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '1004.0'], ['glucose', '1113.0'], ['hematocrit', '982.0'], ['hemoglobin', '985.0'], ['sodium', '1001.0']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly joins labevents to d_labitems and groups by label, but it uses COUNT and LIMIT to pick the top five and also returns the test_count column. According to the instructions, when asking for the top N items we should use DENSE_RANK() and return only the item names (no counts). This is a mistake localized to the SELECT and ordering logic\u2014it does not require restructuring joins or filters. Therefore, it\u2019s a local error with the ranking/selection clause.",
            "error_type": "Local: top-N ranking error"
        },
        "b39ceb0832a83e0203b3da54": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can i know the 3-month survival rate of patients diagnosed with unspecified disorder of kidney and ureter who were then prescribed sodium bicarbonate?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T3.charttime) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disorder of kidney and ureter' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'sodium bicarbonate' GROUP BY admissions.subject_id HAVING MAX(prescriptions.starttime) = prescriptions.starttime ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime ) AS T3 JOIN patients ON T3.subject_id = patients.subject_id )",
            "pred_sql": "WITH diagnosed_patients AS ( SELECT DISTINCT p.subject_id, p.dod, datetime(d_i.charttime, '3 months') AS three_month_cutoff FROM patients p JOIN diagnoses_icd d ON p.subject_id = d.subject_id JOIN d_icd_diagnoses d_i ON d.icd_code = d_i.icd_code WHERE d_i.long_title = 'Unspecified disorder of kidney and ureter' ), prescribed_patients AS ( SELECT DISTINCT dp.subject_id, dp.dod, dp.three_month_cutoff FROM diagnosed_patients dp JOIN prescriptions pr ON dp.subject_id = pr.subject_id WHERE pr.drug = 'sodium bicarbonate' ) SELECT ( COUNT(CASE WHEN (prescribed_patients.dod IS NULL OR prescribed_patients.dod > prescribed_pat;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with a syntax error near the end of the query (an incomplete CASE expression and missing closing parentheses), indicating a localized syntax issue. This does not reflect a fundamental problem with the overall query logic or structure but rather an isolated mistake in the SELECT clause that can be corrected without reworking the entire query.",
            "error_type": "Local: syntax error"
        },
        "df464401c02feccf6383ef33": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients were treated with reopening of recent thoracotomy site two or more times in 2100?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'reopening of recent thoracotomy site' ) AND strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "found in the output",
            "gold_answer": "[['0.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted query failed to return any result (null) and did not execute correctly, which strongly indicates a localized issue such as a missing or incorrect column/table reference or alias rather than a fundamental restructuring of the query logic. The overall purpose\u2014counting patients with two or more specific procedures in 2100\u2014is intact, but a small reference or syntax mistake prevents execution. Such an error is local to one clause and can be fixed without altering the query\u2019s high-level structure.",
            "error_type": "Local: table/column reference error"
        },
        "60ccf86e473e44100cc743fb": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many days have gone by since the first instance patient 10018081 had a nacl 0.9% input on the current icu visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', inputevents.starttime) ) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'nacl 0.9%' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime ASC LIMIT 1",
            "pred_sql": "WITH current_icu_stay AS ( SELECT stay_id, intime, outtime FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL ), first_nacl_input AS ( SELECT MIN(ie.starttime) AS first_input_time FROM inputevents ie JOIN d_items di ON ie.itemid = di.itemid WHERE ie.subject_id = 10018081 AND di.label = 'NaCl 0.9%' AND ie.stay_id = (SELECT stay_id FROM current_icu_stay) ) SELECT (strftime('%J', '2100-12-31 23:59:00');",
            "gold_answer": "[['5.286']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically incomplete: the SELECT clause ends abruptly with an unclosed strftime call and missing subtraction, parenthesis, and FROM clause. This is a localized syntax error that does not affect the overall query structure or logic and can be fixed by completing the SELECT expression.",
            "error_type": "Local: syntax error"
        }
    },
    "codes": {
        "425379e15087e80b17ca3cdd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time that patient 10002495 had dextrose 5% input on the first icu visit?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10002495 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'dextrose 5%' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime ASC LIMIT 1",
            "pred_sql": "SELECT inputevents.starttime FROM inputevents INNER JOIN icustays ON inputevents.stay_id = icustays.stay_id WHERE inputevents.subject_id = 10002495 AND inputevents.itemid IN (221906, 226559) LIMIT 1",
            "gold_answer": "[['2100-06-01 20:18:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the key requirement of restricting to the patient\u2019s first ICU visit. Instead of identifying the first icustay via ordering and then filtering inputevents for that specific stay, it simply joins icustays without filtering by the earliest intime or ensuring outtime IS NOT NULL. It also fails to map the 'dextrose 5%' label to itemid via d_items. These omissions fundamentally alter the query\u2019s logic and intended result, not just a minor syntax or reference fix.",
            "error_type": "Global: incorrect join/filter logic"
        },
        "27a8bcc8f3251601e02a710f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Can you give me patient 10029291's daily maximum respiratory rate on the first icu visit?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE chartevents.itemid IN (618, 220179) AND icustays.subject_id = 10029291",
            "gold_answer": "[['24.0'], ['25.0'], ['27.0'], ['28.0'], ['28.0'], ['36.0'], ['36.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the subquery logic that isolates the first ICU stay for patient 10029291 (filtering on icustays.outtime IS NOT NULL, ordering by intime, and LIMIT 1), and it also fails to group results by day. Instead, it simply joins chartevents and icustays for all ICU stays of the patient and returns a single maximum value (which even yields None). This error affects the overall structure and intent of the query\u2014the core logic of isolating the first ICU visit and grouping by date is lost\u2014requiring a top\u2010to\u2010bottom rewrite rather than a minor local fix.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "a367fc08b430041367378cb8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have there been since patient 10021487's admission to the hospital?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT strftime('%J', cost.chargetime) - strftime('%J', admissions.admittime) AS days_since_admission FROM admissions INNER JOIN cost ON admissions.hadm_id = cost.hadm_id WHERE admissions.subject_id = 10021487",
            "gold_answer": "[['599.6']]",
            "pred_answer": "[['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.037'], ['0.046'], ['0.046'], ['0.046'], ['0.046'], ['0.046'], ['0.067'], ['0.067'], ['0.067'], ['0.067'], ['0.116'], ['0.151'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.199'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274'], ['0.274']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic: instead of using the fixed current time ('2100-12-31 23:59:00') and multiplying the day difference by 24, it joins the cost table on hadm_id, uses cost.chargetime as the reference time, and fails to filter for the current admission (dischtime IS NULL). This is not a minor syntax or column reference error but a flaw in the overall query structure and data relationships, causing multiple rows of incorrect day differences rather than a single correct hour count.",
            "error_type": "Global: join logic error"
        },
        "cecffd7d3bfbe9c0797c0931": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since patient 10018081's admission to the icu, how many hours have passed?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', icustays.intime) ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL",
            "pred_sql": "SELECT strftime('%J', icustays.outtime) - strftime('%J', icustays.intime) AS days FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10018081",
            "gold_answer": "[['126.817']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly uses icustays.outtime instead of the fixed current timestamp ('2100-12-31 23:59:00') and fails to filter for ongoing ICU stays (outtime IS NULL). This is a mistake in a specific part of the SELECT/WHERE logic (wrong column reference/time calculation) and can be fixed locally without restructuring the entire query.",
            "error_type": "Local: value parsing/column reference error"
        },
        "5e679fe0b5ad3749c2fca7de": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "In 2100, when was the last time that patient 10022017 was diagnosed with ulcerative colitis, unspecified, without complications?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'ulcerative colitis, unspecified, without complications' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd INNER JOIN patients ON diagnoses_icd.subject_id = patients.subject_id WHERE patients.subject_id = 10022017 AND diagnoses_icd.icd_code = 'icd9|416' GROUP BY diagnoses_icd.charttime ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "gold_answer": "[['2100-10-02 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in its WHERE clause: it hardcodes an incorrect ICD code ('icd9|416') instead of looking up the code by long_title, omits the year filter (strftime('%Y') = '2100'), and unnecessarily groups by charttime. These mistakes do not require reworking the overall JOIN and ordering structure of the query\u2014only corrections within the WHERE clause are needed. Thus, this is a localized filter error.",
            "error_type": "Local: where-clause filter error"
        },
        "8b816a044e4c20979a4ae614": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients stayed on vascular this year?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT transfers.hadm_id FROM transfers WHERE transfers.careunit = 'vascular' AND datetime(transfers.intime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT count(icustays.subject_id) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE strftime('%Y', chartevents.charttime) = '2100' AND d_items.label = 'vascular'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "no such column: d_items.label",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references d_items.label without including the d_items table in the FROM/JOIN clause. This is a column reference error\u2014 a localized mistake that does not affect the overall query logic but simply a missing table/column reference.",
            "error_type": "Local: table/column reference error"
        },
        "43f5aab1de5e20213e7c6581": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Show me the total amount of patient 10020187's output on 02/04/2100.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-02-04'",
            "pred_sql": "SELECT sum(outputevents.value) FROM outputevents INNER JOIN patients ON outputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y-%m-%d', outputevents.charttime) = '2010-02-04'",
            "gold_answer": "[['2275.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main mistake is using the wrong date literal: it filters on '2010-02-04' instead of '2100-02-04'. This is a localized error in the WHERE clause value, not a structural or multi-clause logical issue.",
            "error_type": "Local: other-temporal error"
        }
    },
    "o4-mini": {
        "26815672560ddf25199789f4": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is patient 10039831's chloride last measured on the current hospital visit less than the first value measured on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) ORDER BY labevents.charttime DESC LIMIT 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "WITH current_hadm AS ( SELECT hadm_id FROM admissions WHERE subject_id = 10039831 AND dischtime IS NULL ), chloride_vals AS ( SELECT le.valuenum AS val, le.charttime FROM labevents le JOIN d_labitems di ON le.itemid = di.itemid WHERE le.subject_id = 10039831 AND le.hadm_id = (SELECT hadm_id FROM current_hadm) AND di.label = 'chloride' ) SELECT CASE WHEN (SELECT val FROM chloride_vals ORDER BY charttime DESC LIMIT 1) < (SELECT val FROM chloride_vals ORDER BY charttime ASC LIMIT 1) THEN 'true' ELSE 'false' END AS is_last_less_than_first;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['false']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s overall logic\u2014filtering to the current hospital admission and comparing the last versus first chloride values\u2014is correct. The only discrepancy is in the SELECT clause\u2019s output: the ground truth returns a numeric boolean (0.0 for false), while the prediction uses a CASE to return the string 'false'. This is a minor formatting/output mismatch that does not affect the query\u2019s structure or data relationships.",
            "error_type": "Local: output-format error"
        },
        "673fa4c98a81600659a804ce": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What was the last time that patient 10029291 had the maximum value of po2?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'po2' ) ORDER BY labevents.valuenum DESC, labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(c.charttime) AS last_time FROM chartevents c JOIN d_items d ON c.itemid = d.itemid WHERE c.subject_id = 10029291 AND d.label = 'po2' AND c.valuenum = ( SELECT MAX(c2.valuenum) FROM chartevents c2 JOIN d_items d2 ON c2.itemid = d2.itemid WHERE c2.subject_id = 10029291 AND d2.label = 'po2' );",
            "gold_answer": "[['2100-02-25 04:52:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls data from chartevents joined with d_items, treating 'po2' as a vital sign, whereas 'po2' is a lab test stored in labevents joined with d_labitems. This is a fundamental mis\u2010use of tables and joins, violating the schema assumptions and requiring restructuring the entire query rather than a simple fix in a single clause.",
            "error_type": "Global: incorrect table usage"
        },
        "896e06dbfb4e1c4a502cc43b": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "When did patient 10018845 have the maximum value of heart rate for the first time?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018845 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.valuenum DESC, chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT ce.charttime FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10018845 AND di.label = 'heart rate' AND ce.valuenum = ( SELECT MAX(ce2.valuenum) FROM chartevents ce2 JOIN d_items di2 ON ce2.itemid = di2.itemid WHERE ce2.subject_id = 10018845 AND di2.label = 'heart rate' ) ORDER BY ce.charttime ASC LIMIT 1;\"",
            "gold_answer": "[['2100-10-30 13:00:00']]",
            "pred_answer": "You can only execute one statement at a time.",
            "is_correct": false,
            "reasoning": "The predicted SQL uses ce.subject_id directly to filter heart rate events, but it fails to restrict to the patient\u2019s ICU stays via admissions and icustays (as required by the schema relationships). This omission changes the query\u2019s logic by including irrelevant chartevents outside the intended ICU context. Fixing this requires restructuring the JOINs and subqueries to enforce the proper hadm_id and stay_id filtering, not just a minor tweak in one clause.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "qwen3": {
        "2a4f10719bce5c9db91062c4": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many instances of implantation or replacement of carotid sinus stimulation device, total system procedures have been completed in 2100?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'implantation or replacement of carotid sinus stimulation device, total system' ) AND strftime('%Y',procedures_icd.charttime) = '2100'",
            "pred_sql": "SELECT COUNT(*) AS total_procedures FROM procedures_icd JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Implantation or replacement of carotid sinus stimulation device, total system procedures' AND strftime('%Y', procedures_icd.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the WHERE clause string literal for d_icd_procedures.long_title. It incorrectly adds \u201cprocedures\u201d at the end and capitalizes the first letter, so no rows match. This is a minor, localized error in the literal value, not in the overall query structure or joins.",
            "error_type": "Local: value parsing error"
        },
        "02c1ac678276183eba4aa39d": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Count the number of times that patient 10019172 had calcium gluconate on last month/20.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019172 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'calcium gluconate' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '20'",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE subject_id = 10019172 AND drug = 'calcium gluconate' AND datetime(starttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', starttime) = '20';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to count occurrences of 'calcium gluconate', whereas the question and assumptions specify using the inputevents table (with d_items mapping) for input-related events. This misuse of the core table alters the entire logic of the query, not just a localized condition or syntax. Fixing it requires rewriting the query to use inputevents and join to d_items, which is a structural change affecting the overall intended meaning.",
            "error_type": "Global: incorrect table selection"
        }
    },
    "macsql": {
        "dcb2ec58e559aa7ae7427d09": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What is patient 10004422's difference in weight from the last value measured on the last hospital visit compared to the second to last value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH LastTwoWeights AS ( SELECT valuenum FROM chartevents WHERE subject_id = 10004422 AND hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10004422 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ) AND itemid = ( SELECT itemid FROM d_items WHERE label = 'daily weight' ) ORDER BY charttime DESC LIMIT 2 ) SELECT MAX(valuenum) - MIN(valuenum) AS weight_difference FROM LastTwoWeights",
            "gold_answer": "[['-2.3']]",
            "pred_answer": "[['2.3']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s mistake is confined to how it computes the difference between the two retrieved weight values \u2013 it uses MAX(valuenum)\u2013MIN(valuenum) rather than subtracting the most recent measurement minus the second-most-recent measurement. This aggregation-localizes the error to the SELECT logic (it also loses the sign of the change). The overall query structure (CTE, filters) remains intact, so this is a Local error.",
            "error_type": "Local: incorrect aggregation"
        }
    },
    "llm2sql": {
        "8556595488ba5fe19d017b8a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Tell me the sum of the input volume for patient 10022017 since 293 days ago.",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-293 day')",
            "pred_sql": "SELECT SUM(amount) AS total_input_volume FROM inputevents WHERE subject_id = 10022017 AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-293 days')",
            "gold_answer": "[['24007.0']]",
            "pred_answer": "no such column: amount",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `amount` instead of the correct `totalamount`. This is a localized mistake in the SELECT clause and does not alter the overall logic or structure of the query. It can be fixed by using the correct column name without restructuring the query.",
            "error_type": "Local: table/column reference error"
        },
        "58a8d29f9daf5d4d12948ba5": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was the last time patient 10012853 was diagnosed with dizziness and giddiness in 2100?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'dizziness and giddiness' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10012853 ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(diagnoses_icd.charttime) AS last_diagnosis_time FROM diagnoses_icd JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE diagnoses_icd.subject_id = 10012853 AND d_icd_diagnoses.long_title = 'Dizziness and giddiness' AND strftime('%Y', diagnoses_icd.charttime) = '2100'",
            "gold_answer": "[['2100-04-23 15:36:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the literal 'Dizziness and giddiness' (capitalized) instead of the exact 'dizziness and giddiness' used in the diagnosis table, causing the filter to return no rows. This is a localized mismatch in the WHERE clause string literal and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        }
    }
}