{
    "qwen3": {
        "4bf5d4ab59d0590189c5795f": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many days have elapsed since the last time on the current hospital visit that patient 10021487 was diagnosed with other chronic nonalcoholic liver disease?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other chronic nonalcoholic liver disease' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(MAX(d.charttime))) AS INTEGER) AS days_elapsed FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code JOIN admissions a ON d.hadm_id = a.hadm_id WHERE d.subject_id = 10021487 AND a.dischtime IS NULL AND dicd.long_title = 'other chronic nonalcoholic liver disease';",
            "gold_answer": "[['24.983']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies the most recent diagnosis and filters for the current admission but then casts the day difference to an integer, truncating the decimal portion. This is a localized error in the SELECT clause (value parsing/formatting) and does not affect the overall query structure or logic of joining tables and filtering. The mistake can be fixed by removing the CAST to integer or preserving the float, without altering the joins or filters.",
            "error_type": "Local: value parsing error"
        },
        "e6bb44f8f32c8755a047c619": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many hours has it been since patient 10004733's last stay in the careunit medicine on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', transfers.intime) ) FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND transfers.careunit = 'medicine' ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT (julianday(COALESCE(t.outtime, '2100-12-31 23:59:00')) - julianday(t.intime)) * 24 AS hours_since_last_stay FROM transfers t JOIN admissions a ON t.hadm_id = a.hadm_id WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND t.careunit = 'medicine' AND t.intime = ( SELECT MAX(intime) FROM transfers WHERE hadm_id = a.hadm_id AND careunit = 'medicine' );",
            "gold_answer": "[['27.933']]",
            "pred_answer": "[['21.738']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s JOIN and filtering logic correctly identifies the patient\u2019s current hospital admission and the most recent medicine careunit transfer. The error lies solely in the SELECT expression: it uses COALESCE(t.outtime, current_time) to compute the duration of the stay, instead of calculating the time elapsed since the transfer\u2019s start (intime) to now. This is a localized mistake in the temporal calculation, not a structural or multi-clause logic error.",
            "error_type": "Local: other-temporal error"
        }
    },
    "codes": {
        "2b0b822ec5196516bf095185": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the total dose of vancomycin that patient 10031757 was prescribed in 10/this year?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10031757 ) AND prescriptions.drug = 'vancomycin' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',prescriptions.starttime) = '10'",
            "pred_sql": "SELECT sum(CASE WHEN prescriptions.starttime LIKE '2100-11%' THEN 1 ELSE 0 END) FROM prescriptions INNER JOIN patients ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10031757 AND prescriptions.drug = 'vancomycin'",
            "gold_answer": "[['4000.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely alters the intended logic of the query. Instead of summing the dose_val_rx for vancomycin prescriptions in October of the current year, it counts prescriptions (using CASE WHEN ... THEN 1) and incorrectly filters for November ('2100-11%') rather than October. It also omits the necessary time-window logic (start-of-year and month filter) and sums the wrong value. These mistakes span both the SELECT aggregation and WHERE date-filtering clauses, fundamentally changing the query\u2019s meaning.",
            "error_type": "Global"
        },
        "e22d99ee8e54c59787862605": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first time that patient 10004733 received a prescription for 0.83% sodium chloride on the current hospital visit?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.drug = '0.83% sodium chloride' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE patients.subject_id = 10004733 AND prescriptions.drug = '0.83% sodium chloride'",
            "gold_answer": "[['2100-12-22 17:00:00']]",
            "pred_answer": "[['2100-12-22 17:00:00'], ['2100-12-23 10:00:00'], ['2100-12-23 10:00:00'], ['2100-12-26 12:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to the current hospital visit (missing `admissions.dischtime IS NULL` filter) and also omits the `ORDER BY prescriptions.starttime ASC LIMIT 1` needed to return only the first time. These are localized mistakes within the WHERE and result-limiting clauses and do not require restructuring the entire query logic.",
            "error_type": "Local: missing current visit filter and missing ORDER BY/LIMIT"
        },
        "a3569887096547747b83607e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many people were admitted to the hospital?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions",
            "pred_sql": "SELECT count(subject_id) FROM admissions",
            "gold_answer": "[['94.0']]",
            "pred_answer": "[['119.0']]",
            "is_correct": false,
            "reasoning": "The error is confined to the SELECT aggregation clause: the predicted SQL uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id). This is a localized mistake in the aggregate function and can be fixed by adding DISTINCT without changing the overall query structure or joins.",
            "error_type": "Local: aggregate-distinct error"
        },
        "27a2491ee7292c776985f138": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients were treated with fusion or refusion of 2-3 vertebrae?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'fusion or refusion of 2-3 vertebrae' ) )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM procedures_icd WHERE icd_code IN ('99.0', '99.1', '99.2', '99.3')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs from the ground truth in the WHERE clause values: it hard-codes an incorrect list of ICD codes instead of selecting the single correct code via the d_icd_procedures lookup. No tables, joins, or overall structure have been changed; only the filtering condition is wrong. This is a localized error in the WHERE clause values.",
            "error_type": "Local: incorrect-value error"
        },
        "2b7dee89179d6e7647d06f5e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much does patient 10039831 change in base excess second measured on the current hospital visit compared to the first value measured on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'base excess' ) ORDER BY labevents.charttime ASC LIMIT 1 OFFSET 1 ) - ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'base excess' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT CASE WHEN count(chartevents.value) > 1 THEN chartevents.valuenum ELSE NULL END - CASE WHEN count(chartevents.value) > 1 THEN chartevents.valuenum ELSE NULL END FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE chartevents.itemid IN (68, 69, 70, 113, 188, 189, 190, 191, 224, 225, 226, 227) AND patients.subject_id = 10039831",
            "gold_answer": "[['2.0']]",
            "pred_answer": "no such column: chartevents.value",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong table (`chartevents` instead of `labevents`), wrong columns (`chartevents.value` which doesn\u2019t exist), incorrect item filters, and fails to restrict to the current hospital visit or properly pick the first and second measurements. This is not a small typo but a fundamentally flawed approach requiring major restructuring\u2014changing tables, joins, and logic\u2014so it\u2019s a global error.",
            "error_type": "Global: incorrect table and logic"
        }
    },
    "o4-mini": {
        "0fb0be9a1f13d846f08f07aa": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Among patients who are diagnosed with presence of coronary angioplasty implant and graft in 2100, what are the top five most common diagnoses that followed within 2 months?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'presence of coronary angioplasty implant and graft' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH index_patients AS ( SELECT di.subject_id, MIN(di.charttime) AS idx_time FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'presence of coronary angioplasty implant and graft' AND strftime('%Y', di.charttime) = '2100' GROUP BY di.subject_id ) SELECT d2.long_title FROM index_patients ip JOIN diagnoses_icd di2 ON di2.subject_id = ip.subject_id JOIN d_icd_diagnoses d2 ON di2.icd_code = d2.icd_code WHERE di2.charttime > ip.idx_time AND di2.charttime <= datetime(ip.idx_time, '+2 months') AND d2.long_title != 'presence of coronary angioplasty implant and graft' GROUP BY d2.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['atherosclerotic heart disease of native coronary artery with unspecified angina pectoris'], ['gastro-esophageal reflux disease without esophagitis'], ['hyperlipidemia, unspecified'], ['nicotine dependence, cigarettes, uncomplicated'], ['presence of aortocoronary bypass graft'], ['presence of coronary angioplasty implant and graft']]",
            "pred_answer": "[['atherosclerotic heart disease of native coronary artery with unspecified angina pectoris'], ['gastro-esophageal reflux disease without esophagitis'], ['hyperlipidemia, unspecified'], ['nicotine dependence, cigarettes, uncomplicated'], ['presence of aortocoronary bypass graft']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a WHERE clause filtering out the index diagnosis ('presence of coronary angioplasty implant and graft'), whereas the ground truth includes it if it reoccurs within the 2-month window. This mistake is confined to a specific filter condition and does not alter the overall structure of joins, grouping, or ranking logic. Hence, it is a localized error in the WHERE clause.",
            "error_type": "Local: filter condition error"
        }
    },
    "macsql": {
        "eed40d16275abbb9b36c549b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me the top four most common microbiology tests this year.",
            "gold_sql": "SELECT T1.test_name FROM ( SELECT microbiologyevents.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM microbiologyevents WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY microbiologyevents.test_name ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT `spec_type_desc`, `test_name` FROM ( SELECT `spec_type_desc`, `test_name`, COUNT(*) AS `test_count`, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS `rank` FROM microbiologyevents WHERE datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY `spec_type_desc`, `test_name` ) AS ranked_tests WHERE `rank` <= 4",
            "gold_answer": "[['blood culture, routine'], ['gram stain'], ['respiratory culture'], ['urine culture']]",
            "pred_answer": "[['blood culture', 'blood culture, routine'], ['mrsa screen', 'mrsa screen'], ['sputum', 'respiratory culture'], ['urine', 'urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply adds spec_type_desc to both the SELECT and GROUP BY clauses, which changes the granularity of the aggregation and returns the wrong grouping. This is a mistake confined to the GROUP BY/SELECT clause and can be fixed by removing spec_type_desc\u2014no overall query restructuring is needed. Therefore, this is a Local error of the GROUP BY type.",
            "error_type": "Local: group-by error"
        }
    },
    "dinsql": {
        "dca9777fd4184fbb7fc955d3": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What microbiology test did patient 10039831 undergo for the first time since 11/2100?",
            "gold_sql": "SELECT microbiologyevents.test_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-11' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE subject_id = 10039831 AND datetime(charttime) >= datetime('2100-12-31 23:59:00', 'start of year') AND strftime('%m', charttime) = '11' ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['mrsa screen']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only checks for month = '11' and charttime \u2265 start of the year, so it misses tests occurring in December 2100. The correct filter should include all dates from November 2100 onward (strftime('%Y-%m',charttime) \u2265 '2100-11'). This mistake is confined to the WHERE time\u2010filter logic and does not require restructuring joins or subqueries.",
            "error_type": "Local: other-temporal error"
        }
    }
}