{
    "codes": {
        "95ce217cd523e13855314bb7": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many prescriptions for miconazole 2% cream were made since 1 year ago?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'miconazole 2% cream' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN ( SELECT prescriptions.subject_id FROM prescriptions WHERE drug = 'miconazole 2% cream' AND strftime('%Y', prescriptions.starttime) < strftime('%Y', 'now', '-1 year') GROUP BY prescriptions.subject_id ) AS t2 ON prescriptions.subject_id = t2.subject_id",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restructures the query entirely by using an INNER JOIN over a subquery of subject_ids instead of directly filtering prescriptions by starttime. It also misuses strftime('%Y') comparisons with 'now' rather than the fixed current time and the \u2018-1 year\u2019 offset. This is not a simple typo or missing table reference but a fundamental change in how filtering is applied (using a join with an incorrect subquery) and will require rewriting the query logic, so it is a global error.",
            "error_type": "Global: incorrect join logic"
        },
        "2ed3308d7d44c7cbc0bd7cdd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of patients that were admitted to the hospital since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE strftime('%Y',admissions.admittime) >= '2100'",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE admittime >= '2100-01-01 00:00:00'",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['119.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters by admittime >= '2100-01-01' (equivalent to since 2100), but it uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id). This only overcounts multiple admissions per patient rather than the number of unique patients. This is a localized error in the SELECT clause (missing DISTINCT) and does not require restructuring the query or altering table relationships.",
            "error_type": "Local: missing DISTINCT"
        },
        "13192d30de747f4a5bf8cd8a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the input that patient 10022017 had first had?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT inputevents.itemid FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022017 ) ) ORDER BY inputevents.starttime ASC LIMIT 1 )",
            "pred_sql": "SELECT d_items.label FROM d_items INNER JOIN inputevents ON d_items.itemid = inputevents.itemid WHERE inputevents.subject_id = 10022017 ORDER BY inputevents.charttime ASC LIMIT 1",
            "gold_answer": "[['or crystalloid intake']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted query fails because it orders by inputevents.charttime, but the inputevents table has no charttime column (it uses starttime). This is a simple column reference mistake within the ORDER BY clause and does not alter the overall query structure or logic. Thus, it is a local error.",
            "error_type": "Local: table/column reference error"
        },
        "df8e1a5f567fb53013c261d0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time on 04/03/this year that patient 10020740 had his/her sao2 checked?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '04-03' ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = '10020740' AND itemid = 220277 AND strftime('%Y-%m-%d', charttime) = '2100-04-03'",
            "gold_answer": "[['2100-04-03 00:00:00']]",
            "pred_answer": "[['2100-04-03 00:00:00'], ['2100-04-03 01:00:00'], ['2100-04-03 02:00:00'], ['2100-04-03 03:00:00'], ['2100-04-03 04:00:00'], ['2100-04-03 05:00:00'], ['2100-04-03 06:00:00'], ['2100-04-03 07:00:00'], ['2100-04-03 08:00:00'], ['2100-04-03 09:00:00'], ['2100-04-03 10:00:00'], ['2100-04-03 11:00:00'], ['2100-04-03 12:00:00'], ['2100-04-03 13:00:00'], ['2100-04-03 14:00:00'], ['2100-04-03 15:00:00'], ['2100-04-03 16:00:00'], ['2100-04-03 17:00:00'], ['2100-04-03 18:00:00'], ['2100-04-03 19:00:00'], ['2100-04-03 20:00:00'], ['2100-04-03 21:00:00'], ['2100-04-03 22:00:00'], ['2100-04-03 23:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL drops the required joins through admissions and ICU stays and omits the lookup of the correct itemid via d_items. Instead it filters only on subject_id and a hard-coded itemid, returning all that patient's sao2 measurements on that date rather than the first ICU measurement. This mistake alters the overall logic and table relationships, not just a localized clause.",
            "error_type": "Global: join error"
        },
        "c6babda1b9f713c49ac00e19": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What drug was patient 10004720 prescribed with after the diagnosis of palmar fascial fibromatosis [dupuytren] within 2 days in 12/2100?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT diagnoses_icd.subject_id, diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'palmar fascial fibromatosis [dupuytren]' ) AND strftime('%Y-%m',diagnoses_icd.charttime) = '2100-12' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND strftime('%Y-%m',prescriptions.starttime) = '2100-12' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 day')",
            "pred_sql": "SELECT prescriptions.drug FROM diagnoses_icd INNER JOIN patients ON diagnoses_icd.subject_id = patients.subject_id INNER JOIN prescriptions ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10004720 AND diagnoses_icd.icd_code = 'icd9|41401' AND diagnoses_icd.charttime BETWEEN datetime('2100-12-20') AND datetime('2100-12-22')",
            "gold_answer": "[['0.9% sodium chloride'], ['0.9% sodium chloride'], ['0.9% sodium chloride (mini bag plus)'], ['0.9% sodium chloride (mini bag plus)'], ['0.9% sodium chloride (mini bag plus)'], ['5% dextrose'], ['5% dextrose'], ['artificial tear ointment'], ['bag'], ['bag'], ['bag'], ['chlorhexidine gluconate 0.12% oral rinse'], ['citalopram'], ['divalproex (delayed release)'], ['famotidine'], ['glucagon'], ['glucose gel'], ['insulin'], ['iso-osmotic dextrose'], ['iso-osmotic dextrose'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['lactated ringers'], ['midazolam'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin'], ['vancomycin']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic by: 1) Using a hard-coded incorrect ICD code ('icd9|41401') instead of looking up the code for 'palmar fascial fibromatosis [dupuytren]' in d_icd_diagnoses; 2) Failing to join prescriptions via hadm_id or restrict prescriptions to December 2100; 3) Omitting the 2-day window clause entirely and only filtering diagnoses dates; 4) Joining tables solely on subject_id, ignoring admissions context. These changes affect the overall structure and semantics of the query, not just a single clause.",
            "error_type": "Global: join-logic mismatch error"
        },
        "94d58c7def5ec9cdcb2b0467": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the icu visits of patient 10007058 since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT icustays.stay_id ) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 ) AND strftime('%Y',icustays.intime) >= '2100'",
            "pred_sql": "SELECT count(icustays.hadm_id) FROM icustays INNER JOIN transfers ON icustays.hadm_id = transfers.hadm_id WHERE transfers.subject_id = 10007058 AND transfers.intime BETWEEN '2100-01-01 00:00:00' AND '2100-12-31 23:59:59'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining icustays to transfers rather than filtering icustays by patient admissions. It also counts hadm_id instead of distinct stay_id and applies the time filter to transfers.intime rather than icustays.intime. These changes span multiple clauses (FROM, JOIN, WHERE) and require a structural rewrite to use the correct tables and filters, so this is not a localized syntax mistake but a flaw in the overall query construction.",
            "error_type": "Global: join error"
        },
        "a60f116185911329f2b6d8b8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the change in creatinine in patient 10007058 last measured on the last hospital visit compared to the first value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatinine' ) ORDER BY labevents.charttime DESC LIMIT 1 ) - ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatinine' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT CASE WHEN max(chartevents.valuenum) - min(chartevents.valuenum) IS NULL THEN 0 ELSE max(chartevents.valuenum) - min(chartevents.valuenum) END FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10007058 AND chartevents.itemid = 50961 AND patients.dod IS NOT NULL AND chartevents.valuenum IS NOT NULL",
            "gold_answer": "[['0.1']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic by using the chartevents table instead of labevents for lab values, failing to restrict to the patient\u2019s last hospital admission, and adding an irrelevant death filter (patients.dod IS NOT NULL). These issues span multiple clauses (FROM, JOIN, WHERE, aggregation) and alter the intended meaning of comparing first and last creatinine lab measurements within the last admission, making the error structural and global.",
            "error_type": "Global: join error"
        }
    },
    "dinsql": {
        "682f46170aeaf404bdf67198": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the difference between the total input and output of patient 10005909 on last month/11.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '11' ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '11' )",
            "pred_sql": "SELECT (SELECT COALESCE(SUM(totalamount), 0) FROM inputevents WHERE subject_id = 10005909 AND datetime(starttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-1 year') AND strftime('%m', starttime) = '11') - (SELECT COALESCE(SUM(value), 0) FROM outputevents WHERE subject_id = 10005909 AND datetime(charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-1 year') AND strftime('%m', charttime) = '11') AS input_output_difference",
            "gold_answer": "[['3210.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic in multiple ways: it filters by subject_id instead of joining through admissions \u2192 icustays \u2192 stay_id, and it uses a year-based time window (start of year minus one year and month = '11') rather than the required last month on day 11. These changes span multiple clauses (FROM/WHERE logic and temporal filters) and change the meaning of the query, so the error is not a localized syntax issue but a global logical error.",
            "error_type": "Global: incorrect join and temporal filter"
        },
        "c61af8d575e4a0c46efaad99": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the duration in days of patient 10018845's last stay in the icu?",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018845 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND((JULIANDAY(outtime) - JULIANDAY(intime)), 0) AS duration_days FROM icustays WHERE subject_id = 10018845 ORDER BY intime DESC LIMIT 1",
            "gold_answer": "[['1.491']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a direct subject_id filter on icustays and omits the required `outtime IS NOT NULL` predicate, and then rounds the day difference to zero decimal places. The overall query structure\u2014computing ICU stay duration, ordering by intime and limiting to the last stay\u2014remains the same as the ground truth. The mistakes are confined to the WHERE clause (missing outtime filter) and the SELECT expression (unnecessary rounding), which are localized issues and do not alter the fundamental logic or join strategy of the query.",
            "error_type": "Local: where-clause and rounding error"
        }
    },
    "deepseek": {
        "00d61436285ea67a4cc11692": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you list patient 10026354's monthly maximum mch on the first hospital visit?",
            "gold_sql": "SELECT MAX(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026354 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mch' ) GROUP BY strftime('%Y-%m',labevents.charttime)",
            "pred_sql": "SELECT STRFTIME('%Y-%m', T1.charttime) AS month, MAX(T1.valuenum) AS max_mch FROM labevents T1 INNER JOIN d_labitems T2 ON T1.itemid = T2.itemid INNER JOIN admissions T3 ON T1.hadm_id = T3.hadm_id WHERE T1.subject_id = 10026354 AND T2.label = 'MetHemoglobin' AND T1.charttime >= T3.admittime AND T1.charttime <= T3.dischtime AND T3.ad;",
            "gold_answer": "[['32.2'], ['32.3']]",
            "pred_answer": "no such column: T3.ad",
            "is_correct": false,
            "reasoning": "The predicted SQL has an incorrect column reference in the WHERE clause (AND T3.ad;) which leads to the error 'no such column: T3.ad'. This is a localized syntax/column reference mistake and does not require restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "9e6090c8b2d81c03a6dec400": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients had a diagnosis of accidents occurring in other specified places in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'accidents occurring in other specified places' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT di.subject_id) AS patient_count FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'Accidents occurring in other specified places' AND strftime('%Y', di.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on d.long_title = 'Accidents occurring in other specified places' (with a capital 'A') while the actual title in the database is 'accidents occurring in other specified places' (all lowercase). This is a simple string literal mismatch and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "2514305815b6187c8662dbe7": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What drugs are being added to patient 10018081's prescription today, as compared to yesterday?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT DISTINCT T1.drug AS today_drugs FROM prescriptions T1 LEFT JOIN prescriptions T2 ON T1.drug = T2.drug WHERE T1.subject_id = '10018081' AND datetime(T1.starttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-0 day') AND T2.subject_id = '10018081' AND datetime(T2.starttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day') AND T2.drug IS NULL;",
            "gold_answer": "[['furosemide'], ['soln']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query attempts to use a LEFT JOIN to exclude drugs present yesterday, but it incorrectly places the T2 (yesterday) filters in the WHERE clause instead of the JOIN condition. Because the WHERE predicates on T2 force non-null matches, the LEFT JOIN devolves into no rows. This is a localized mistake in the JOIN logic rather than a fundamental change to the overall structure or table relationships. Therefore, it is a Local error in the JOIN clause.",
            "error_type": "Local: join condition error"
        },
        "d72bd1edb7343d7eb4fffd28": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "List the top three most common diagnoses for patients in their 40s this year.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 40 AND 49 ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d.long_title, COUNT(*) as diagnosis_count FROM admissions a JOIN diagnoses_icd di ON a.hadm_id = di.hadm_id JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE a.age BETWEEN 40 AND 49 AND strftime('%Y', a.admittime) = '2100' GROUP BY d.long_title ORDER BY diagnosis_count DESC LIMIT 3;",
            "gold_answer": "[['acidosis'], ['acute and subacute necrosis of liver'], ['acute kidney failure with lesion of tubular necrosis'], ['acute on chronic diastolic heart failure'], ['acute respiratory failure'], ['alcohol abuse, unspecified'], ['alkalosis'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['atherosclerotic heart disease of native coronary artery with unstable angina pectoris'], ['atrial fibrillation'], ['autistic disorder, current or active state'], ['cocaine abuse, unspecified'], ['congestive heart failure, unspecified'], ['depressive disorder, not elsewhere classified'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['epilepsy, unspecified, not intractable, without status epilepticus'], ['esophageal reflux'], ['gastro-esophageal reflux disease without esophagitis'], ['gout, unspecified'], ['hyperlipidemia, unspecified'], ['long term (current) use of antithrombotics/antiplatelets'], ['long-term (current) use of anticoagulants'], ['mitral valve disorders'], ['personal history of sudden cardiac arrest'], ['postinflammatory pulmonary fibrosis'], ['presence of coronary angioplasty implant and graft'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['depressive disorder, not elsewhere classified', '4.0'], ['gout, unspecified', '3.0'], ['unspecified essential hypertension', '4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on admissions.admittime\u2019s year rather than diagnoses_icd.charttime as required by the specification for \u201cthis year.\u201d This mistake is confined to the WHERE clause\u2019s time condition and does not alter the overall join structure or logic of the query beyond that localized part. Hence, it is a Local error.",
            "error_type": "Local: wrong time filter"
        },
        "d855b598f5dd0b2f67356e05": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you provide me the daily minimum amount of jevity 1.2 (full) that patient 10027602 received last month?",
            "gold_sql": "SELECT MIN(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'jevity 1.2 (full)' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') GROUP BY strftime('%Y-%m-%d',inputevents.starttime)",
            "pred_sql": "SELECT DATE(charttime) AS date, MIN(totalamount) AS daily_min_amount, totalamountuom AS unit FROM inputevents INNER JOIN d_items ON inputevents.itemid = d_items.itemid WHERE subject_id = 10027602 AND label = 'jevity 1.2 (full)';",
            "gold_answer": "[['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0'], ['1000.0']]",
            "pred_answer": "no such column: charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references inputevents.charttime, but the inputevents table has no charttime column (it uses starttime). This is a simple table/column reference error that can be fixed locally without altering the overall query structure.",
            "error_type": "Local: table/column reference error"
        }
    },
    "qwen3": {
        "0687b03f52fe3a7d878757dc": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since the last diagnosis of patient 10004733 with chronic glomerulonephritis with unspecified pathological lesion in kidney on the current hospital visit, how many days have passed?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic glomerulonephritis with unspecified pathological lesion in kidney' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(di.charttime)) AS INTEGER) AS days_passed FROM admissions a JOIN diagnoses_icd di ON a.hadm_id = di.hadm_id JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND d.long_title = 'chronic glomerulonephritis with unspecified pathological lesion in kidney' ORDER BY di.charttime DESC LIMIT 1;",
            "gold_answer": "[['9.522']]",
            "pred_answer": "[['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters to the patient\u2019s current admission and the specified diagnosis, orders by the latest diagnosis time, and limits to one result\u2014so the overall query structure and joins are correct. The only mistake is in the SELECT clause: it uses CAST(julianday(...) - julianday(...)) AS INTEGER, which truncates the fractional day difference instead of preserving and rounding to three decimals using strftime('%J') as required. This is a localized error in the numeric calculation/formatting, not a structural or logical flaw.",
            "error_type": "Local: numerical-formatting error"
        },
        "5e2b6a161db39a1951d0acde": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients diagnosed with chronic kidney disease, stage 3 (moderate), what are the top three most common microbiology tests that followed within the same month since 1 year ago?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage 3 (moderate)' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT m.test_name FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code JOIN microbiologyevents m ON d.subject_id = m.subject_id AND d.hadm_id = m.hadm_id WHERE dicd.long_title = 'Chronic kidney disease stage 3 (moderate)' AND d.charttime >= datetime('2100-12-31', '-1 year') AND d.charttime <= '2100-12-31 23:59:00' AND datetime(m.charttime, 'start of month') = datetime(d.charttime, 'start of month') AND m.charttime >= d.charttime GROUP BY m.test_name ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['anaerobic culture'], ['blood culture, routine'], ['c. difficile pcr'], ['epstein-barr virus ebna igg ab'], ['epstein-barr virus vca-igg ab'], ['epstein-barr virus vca-igm ab'], ['fluid culture'], ['gram stain'], ['rubella igg serology'], ['urine culture']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended query logic by joining microbiologyevents on both subject_id and hadm_id, forcing the test to occur in the same hospital admission as the diagnosis. The ground truth only requires the test to occur within the same calendar month (not necessarily the same admission) and within one year of diagnosis. Furthermore, the predicted SQL fails to restrict microbiology events to the one\u2010year window and mismatches the exact long_title string. These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning of the query, so they cannot be fixed locally.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "macsql": {
        "fa8a33756924ed200b38817c": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Can you tell me about patient 10019917's age on the last hospital visit?",
            "gold_sql": "SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10019917 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%Y', A.`admittime`) - strftime('%Y', P.`dob`)) - (strftime('%m-%d', A.`admittime`) < strftime('%m-%d', P.`dob`)) AS age_at_last_visit FROM admissions AS A JOIN patients AS P ON A.`subject_id` = P.`subject_id` WHERE A.`subject_id` = 10019917 AND A.`dischtime` IS NOT NULL ORDER BY A.`dischtime` DESC LIMIT 1",
            "gold_answer": "[['44.0']]",
            "pred_answer": "[['43.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL computes age manually using date arithmetic on admittime and dob, violating the assumption that age must be retrieved directly from the admissions.age column. This is a mistake localized to the SELECT expression and does not affect the overall query structure or table relationships.",
            "error_type": "Local: other-temporal error"
        },
        "2170941b28cb5fced478fa35": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Give me the four most common output events this year.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT outputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM outputevents WHERE datetime(outputevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY outputevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "WITH RankedEvents AS ( SELECT `itemid`, COUNT(*) AS event_count FROM outputevents WHERE strftime('%Y', `charttime`) = '2100' GROUP BY `itemid` ORDER BY event_count DESC LIMIT 4 ) SELECT `itemid` FROM RankedEvents",
            "gold_answer": "[['cerebral ventricular #1'], ['chest tube #1'], ['foley'], ['void']]",
            "pred_answer": "[['226559.0'], ['226560.0'], ['226588.0'], ['226606.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies the top 4 item IDs by count for this year, but it never joins to the d_items table to retrieve the human-readable labels. As a result, it returns raw itemid values instead of event names. This is a localized mistake in the SELECT clause/join, and does not require restructuring the entire query logic.",
            "error_type": "Local: missing join to lookup table"
        }
    },
    "o4-mini": {
        "d3c9c0d918fe61b341b3952e": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What are the five most frequently administered procedures for patients aged 60 or above in 2100?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d.long_title FROM admissions a JOIN procedures_icd p ON a.hadm_id = p.hadm_id JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE a.age >= 60 AND strftime('%Y', p.charttime) = '2100' GROUP BY d.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['(aorto)coronary bypass of three coronary arteries'], ['(aorto)coronary bypass of two coronary arteries'], ['arterial catheterization'], ['arteriography of cerebral arteries'], ['arteriography of other intra-abdominal arteries'], ['central venous catheter placement with guidance'], ['closed [endoscopic] biopsy of bronchus'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['coronary arteriography using two catheters'], ['drainage of peritoneal cavity, percutaneous approach'], ['drainage of peritoneal cavity, percutaneous approach, diagnostic'], ['endovascular (total) embolization or occlusion of head and neck vessels'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left lower lung lobe, via natural or artificial opening endoscopic'], ['extirpation of matter from left main bronchus, via natural or artificial opening endoscopic'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['hemodialysis'], ['insertion of endotracheal tube'], ['insertion of infusion device into right atrium, percutaneous approach'], ['insertion or replacement of external ventricular drain [evd]'], ['introduction of nutritional substance into upper gi, via natural or artificial opening'], ['left heart cardiac catheterization'], ['measurement of cardiac sampling and pressure, right heart, percutaneous approach'], ['percutaneous abdominal drainage'], ['performance of urinary filtration, multiple'], ['respiratory ventilation, greater than 96 consecutive hours'], ['restriction of intracranial artery with intraluminal device, percutaneous approach'], ['single internal mammary-coronary artery bypass'], ['spinal tap'], ['venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['single internal mammary-coronary artery bypass']]",
            "is_correct": false,
            "reasoning": "1. The predicted SQL correctly joins admissions, procedures_icd, and d_icd_procedures, and filters on age >= 60 and year = '2100', matching the ground truth's overall structure and filters.\n2. It groups by the procedure name and orders by COUNT(*) DESC, then limits to 5 rows, which superficially returns the five most frequent procedures.\n3. However, the question explicitly asks for the \u201cfive most frequently administered procedures,\u201d and the SQL assumptions state that when ranking is requested, DENSE_RANK() must be used to include ties at the cutoff.\n4. The ground truth uses a DENSE_RANK() subquery to include all procedures tied for the top five counts, potentially returning more than five rows.\n5. By using LIMIT 5 instead of DENSE_RANK(), the predicted SQL fails to include tied procedures at rank 5 and violates the ranking assumption.\n6. This mistake is localized to the ranking clause and can be corrected by replacing the LIMIT clause with the appropriate DENSE_RANK() logic. Therefore, it is a local error.",
            "error_type": "Local: ranking function error"
        }
    },
    "llm2sql": {
        "c29ee716c7d8f7b58212f716": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the three most usual lab tests for patients in their 50s since 2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) AND strftime('%Y',labevents.charttime) >= '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d_labitems.label FROM labevents JOIN admissions ON labevents.subject_id = admissions.subject_id AND labevents.hadm_id = admissions.hadm_id JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE admissions.age BETWEEN 50 AND 59 AND admissions.admittime >= '2100-01-01' GROUP BY d_labitems.label ORDER BY COUNT(labevents.itemid) DESC LIMIT 3;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['sodium']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['hemoglobin']]",
            "is_correct": false,
            "reasoning": "The predicted query misplaces the time filter: it uses admissions.admittime >= '2100-01-01' instead of filtering labevents.charttime by year >= 2100. This mistake is confined to the WHERE clause and does not alter the joins or overall structure of the query, so it is a localized (clause-level) error involving a temporal condition.",
            "error_type": "Local: temporal filter error"
        }
    }
}