{
    "macsql": {
        "7ecca1580c68db5def75b1c9": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many patients received a introduction of other therapeutic substance into peripheral artery, percutaneous approach procedure within the same month after having a replacement of aortic valve with zooplastic tissue, percutaneous approach procedure since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'replacement of aortic valve with zooplastic tissue, percutaneous approach' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of other therapeutic substance into peripheral artery, percutaneous approach' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`subject_id`) AS `patient_count` FROM ( SELECT `hadm_id`, `subject_id`, datetime(`charttime`, 'start of month') AS `procedure_month` FROM `procedures_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_procedures` WHERE `long_title` = 'Replacement of aortic valve with zooplastic tissue, percutaneous approach' ) AND datetime(`charttime`) >= datetime('2100-01-01 00:00:00') ) AS T1 JOIN ( SELECT `hadm_id`, `subject_id`, datetime(`charttime`, 'start of month') AS `procedure_month` FROM `procedures_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_procedures` WHERE `long_title` = 'Introduction of other therapeutic substance into peripheral artery, percutaneous approach' ) ) AS T2 ON T1.`subject_id` = T2.`subject_id` AND T1.`procedure_month` = T2.`procedure_month`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to apply the year filter on the second subquery ('introduction' procedure) and omits the condition ensuring the introduction occurs after the replacement within the same month (T1.charttime < T2.charttime). These mistakes alter the fundamental logic of the query rather than being localized syntax or naming issues.",
            "error_type": "Global: join-logic error"
        },
        "4a2a1c26c8244fba33d6203f": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What is the daily minimum level of patient 10003400's hematocrit?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003400 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'hematocrit' ) GROUP BY strftime('%Y-%m-%d',labevents.charttime)",
            "pred_sql": "SELECT DATE(`charttime`) AS `date`, MIN(`valuenum`) AS `min_hematocrit` FROM `labevents` WHERE `subject_id` = 10003400 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'hematocrit' ) GROUP BY DATE(`charttime`) ORDER BY `date`",
            "gold_answer": "[['31.3'], ['31.5'], ['34.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only filters labevents by subject_id and a single itemid, rather than using the hadm_id subquery to restrict events to that patient\u2019s admissions as in the ground truth. This mistake is confined to the WHERE clause (an incorrect column reference/condition) and doesn\u2019t change the overall query structure or logic beyond that clause. Hence it\u2019s a localized error.",
            "error_type": "Local: column reference error"
        },
        "c5bef388e557ec34b8d4bad5": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "When was the last time when patient 10021938 had the maximum value of sao2 on 11/02/this year?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021938 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',chartevents.charttime) = '11-02' ORDER BY chartevents.valuenum DESC, chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(`charttime`) AS last_time_max_sao2 FROM `chartevents` WHERE `subject_id` = 10021938 AND `itemid` = ( SELECT `itemid` FROM `d_items` WHERE `label` = 'o2 saturation pulseoxymetry' ) AND `valuenum` = ( SELECT MAX(`valuenum`) FROM `chartevents` WHERE `subject_id` = 10021938 AND `itemid` = ( SELECT `itemid` FROM `d_items` WHERE `label` = 'o2 saturation pulseoxymetry' ) AND datetime(`charttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', `charttime`) = '02' ) AND datetime(`charttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', `charttime`) = '02'",
            "gold_answer": "[['2100-11-02 04:01:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the time filter logic by using the start of the month and checking for day '02' in the current month (December) instead of filtering for November 2nd of the current year. This is not a minor syntax mistake but a core logical error that alters the intended date window. Additionally, the ground truth limits to ICU stays via stay_id while the prediction uses subject_id directly, further changing the scope of events. These mistakes span multiple clauses and require restructuring the query logic to correct.",
            "error_type": "Global: incorrect date logic"
        }
    },
    "codes": {
        "9e464232cfb26d809dde71e4": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "For the first time last month, when was patient 10026354 prescribed a drug?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026354 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE patients.subject_id = 10026354 ORDER BY prescriptions.starttime DESC LIMIT 1",
            "gold_answer": "[['2100-11-01 12:00:00']]",
            "pred_answer": "[['2100-11-09 10:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the requirement to restrict prescriptions to last month and instead returns the most recent prescription overall. Fixing this would require adding the datetime filter on last month and reversing the sort order to ascending before limiting \u2014 changes that alter the core logic, not just a minor clause. Therefore, it\u2019s a global error in the query\u2019s overall filtering and ordering logic.",
            "error_type": "Global: incorrect filtering and ordering"
        },
        "94b916696ee9cfe48eca3e41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When has patient 10020740 been given hemoglobin test for the first time in 03/2100?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'hemoglobin' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) AND strftime('%Y-%m',labevents.charttime) = '2100-03' ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = 10020740 AND itemid IN (5122, 5123, 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131) AND strftime('%Y-%m', charttime) = '2100-03'",
            "gold_answer": "[['2100-03-23 16:50:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the chartevents table for hemoglobin tests using a hard\u2010coded list of itemids, whereas the ground truth correctly uses labevents joined to d_labitems to identify hemoglobin via its label. This is not a simple syntax or column name mistake but a fundamental misplacement of the data source (wrong table and lookup logic), altering the entire intention of retrieving the first lab hemoglobin event. Fixing this requires restructuring the FROM clause and correct table joins rather than a minor local adjustment.",
            "error_type": "Global: wrong-table error"
        },
        "1a914c34a8501fbeec1d63d3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the daily maximum value of heart rate for patient 10004733 on the current hospital visit?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN admissions ON chartevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10004733 AND chartevents.valuenum IS NOT NULL AND chartevents.itemid IN (211, 220045)",
            "gold_answer": "[['107.0'], ['111.0'], ['115.0'], ['117.0'], ['118.0'], ['130.0'], ['140.0'], ['91.0'], ['93.0']]",
            "pred_answer": "[['140.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the date-based GROUP BY, fails to restrict to the current ICU stay(s) of the current hospital visit, and combines tables incorrectly by joining chartevents directly to admissions instead of filtering by stay_id and dischtime IS NULL. This fundamentally alters the query\u2019s logic and structure (returning a single maximum rather than daily maxima), requiring significant restructuring beyond a localized fix.",
            "error_type": "Global: incorrect query structure"
        },
        "3a7c1bad5e1498d7555d626a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many current patients belong to the 60 or above age category?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age >= 60",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE age >= 60",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['60.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the crucial WHERE condition admissions.dischtime IS NULL that restricts to current patients and also fails to use COUNT(DISTINCT subject_id), counting all admissions rather than unique patients. These omissions change the overall logic of the query, not just a single clause or reference, thus it is a structural/logical error requiring modification across multiple parts of the query.",
            "error_type": "Global: missing current patient filter"
        },
        "0cfc184df990beed1921b205": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Among patients in their 60 or above since 2100, what are the top three most frequent diagnoses?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',diagnoses_icd.charttime) >= '2100' GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT diagnoses_icd.icd_code FROM patients INNER JOIN diagnoses_icd ON patients.subject_id = diagnoses_icd.subject_id WHERE patients.dob < '2100-01-01' AND patients.gender = 'm' AND patients.dod IS NULL AND patients.age >= 60 GROUP BY diagnoses_icd.icd_code ORDER BY count(diagnoses_icd.icd_code) DESC LIMIT 3",
            "gold_answer": "[['atrial fibrillation'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "no such column: patients.age",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column patients.age, causing the error 'no such column: patients.age'. This is a localized issue with an incorrect column reference and can be fixed by using the correct column (e.g., admissions.age) or adjusting the join, without changing the overall query structure.",
            "error_type": "Local: table/column reference error"
        },
        "05b8b940351626bb60292719": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What medication was patient 10021487 prescribed two or more times on the current hospital visit?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE hadm_id = ( SELECT max(hadm_id) FROM admissions WHERE subject_id = 10021487 ) GROUP BY drug HAVING count(drug) > 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['acetazolamide'], ['acetazolamide sodium'], ['bisacodyl'], ['calcium carbonate'], ['chlorhexidine gluconate 0.12% oral rinse'], ['diphenhydramine'], ['famotidine'], ['furosemide'], ['insulin'], ['iso-osmotic dextrose'], ['lactated ringers'], ['metronidazole (flagyl)'], ['midazolam'], ['neutra-phos'], ['ns'], ['plasmalyte'], ['potassium chloride (powder)'], ['sodium bicarbonate'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['famotidine'], ['furosemide'], ['insulin'], ['iso-osmotic dextrose'], ['lactated ringers'], ['metronidazole (flagyl)'], ['midazolam'], ['ns'], ['plasmalyte'], ['sodium chloride 0.9%  flush'], ['soln'], ['vancomycin']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query in two ways: (1) It identifies the \"current hospital visit\" by taking MAX(hadm_id) instead of using admissions.dischtime IS NULL, and (2) it uses HAVING COUNT(drug) > 2 rather than >= 2. These changes span multiple clauses (the admission\u2010selection subquery and the HAVING condition) and thus affect the overall intended semantics of the query, not just a localized clause.",
            "error_type": "Global: incorrect admission selection and threshold logic"
        }
    },
    "llm2sql": {
        "43e58def8b692bf30cd5500a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the minimum total hospital cost that involved other incision of brain?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other incision of brain' ) ) GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(c.total_cost) AS min_total_hospital_cost FROM ( SELECT p.hadm_id, SUM(c.cost) AS total_cost FROM cost AS c JOIN procedures_icd AS p ON c.hadm_id = p.hadm_id JOIN d_icd_procedures AS d ON p.icd_code = d.icd_code WHERE c.event_type = 'procedures_icd' AND d.long_title = 'other incision of brain' GROUP BY p.hadm_id ) AS c;",
            "gold_answer": "[['1721.22']]",
            "pred_answer": "[['25.16']]",
            "is_correct": false,
            "reasoning": "The ground truth query sums all cost entries for each admission that involved the specified brain incision procedure and then finds the minimum of those total costs. The predicted query, however, filters cost.event_type = 'procedures_icd' and only sums procedure-specific costs for those admissions. This fundamentally changes the intended logic\u2014only a subset of costs are aggregated rather than the total hospital cost. Fixing this requires removing or altering the event_type filter and reworking the join/filter logic, not just a localized clause tweak.",
            "error_type": "Global: incorrect event_type filter"
        },
        "3ec41e32d5474dbca8ba9aa3": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the daily maximum systolic blood pressure for patient 10013049 since 03/07/2100?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10013049 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' ) AND strftime('%Y-%m-%d',chartevents.charttime) >= '2100-03-07' GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT DATE(chartevents.charttime) AS day, MAX(chartevents.valuenum) AS max_systolic_bp FROM chartevents JOIN d_items ON chartevents.itemid = d_items.itemid WHERE chartevents.subject_id = 10013049 AND d_items.label = 'arterial blood pressure systolic' AND DATE(chartevents.charttime) >= DATE('2100-07-03') GROUP BY day;",
            "gold_answer": "[['129.0'], ['133.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only differs in small parts of the WHERE clause and grouping\u2014but the overall structure (grouping by day, taking MAX, joining d_items) is unchanged. The major mistake is the date literal: it uses DATE('2100-07-03') (July 3rd) instead of '2100-03-07' (March 7th), causing no rows to match. This is a localized value/parsing error in the WHERE clause and does not alter the query\u2019s overall logic or joins.",
            "error_type": "Local: value parsing error"
        }
    },
    "dinsql": {
        "3ca78723b4c115f0d4b675be": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Which are the top three most frequent output events in 2100?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT outputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM outputevents WHERE strftime('%Y',outputevents.charttime) = '2100' GROUP BY outputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT itemid FROM outputevents WHERE strftime('%Y', charttime) = '2100' GROUP BY itemid ORDER BY COUNT(*) DESC LIMIT 3",
            "gold_answer": "[['chest tube #1'], ['foley'], ['void']]",
            "pred_answer": "[['226559.0'], ['226560.0'], ['226588.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL groups and limits directly on outputevents.itemid, returning raw item IDs instead of the required labels from d_items. The error is confined to the SELECT clause and the lack of a join to d_items; the overall query structure (filtering by year, grouping, ordering, and limiting) is correct. This is a fixable, isolated mistake (adding a JOIN and selecting d_items.label) without altering the query\u2019s logical flow or relationships.",
            "error_type": "Local: missing join/column reference error"
        },
        "82e01448e95df2146a69a821": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the organism in the first bronchoalveolar lavage microbiology test of patient 10021487 this month?",
            "gold_sql": "SELECT microbiologyevents.org_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) AND microbiologyevents.spec_type_desc = 'bronchoalveolar lavage' AND microbiologyevents.org_name IS NOT NULL AND datetime(microbiologyevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT org_name FROM microbiologyevents WHERE subject_id = 10021487 AND spec_type_desc = 'bronchoalveolar lavage' AND datetime(charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') ORDER BY charttime ASC LIMIT 1",
            "gold_answer": "[['staph aureus coag +']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters by subject_id, spec_type_desc, and month, but it omits the condition AND org_name IS NOT NULL. As a result, the first record returned has a NULL org_name, whereas the ground truth explicitly excludes NULL organism names. This is an error confined to the WHERE clause and can be fixed by adding the missing filter without changing the overall query structure.",
            "error_type": "Local: missing non-null filter"
        },
        "6ede363c7d448d9e34bdc68d": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the top five most frequent diagnoses that patients are given within 2 months after being diagnosed with body mass index 35.0-35.9, adult in 2100?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'body mass index 35.0-35.9, adult' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT DISTINCT di.subject_id, di.hadm_id, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code WHERE dicd.long_title = 'body mass index 35.0-35.9, adult' AND strftime('%Y', di.charttime) = '2100' ), subsequent_diagnoses AS ( SELECT DISTINCT di.subject_id, di.icd_code, dicd.long_title FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN initial_diagnosis id ON di.subject_id = id.subject_id AND di.hadm_id = id.hadm_id WHERE di.charttime > id.charttime AND di.charttime <= datetime(id.charttime, '+2 months') ) SELECT long_title FROM subsequent_diagnoses GROUP BY long_title ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['atrial fibrillation'], ['autistic disorder, current or active state'], ['long-term (current) use of anticoagulants'], ['personal history of sudden cardiac arrest'], ['postprocedural fever'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts subsequent diagnoses to the same hadm_id (hospital admission) as the initial BMI diagnosis, whereas the correct logic should consider any diagnosis within two months after the initial BMI diagnosis regardless of admission. This erroneous join condition fundamentally changes the query\u2019s intended meaning by excluding valid diagnoses in other admissions, requiring more than a localized fix.",
            "error_type": "Global: incorrect join condition"
        }
    },
    "deepseek": {
        "ed2471d0dc2d2a5bbd67ea07": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most frequently given microbiology tests for the patients who had previously received introduction of other thrombolytic into peripheral vein, percutaneous approach within the same month, this year?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of other thrombolytic into peripheral vein, percutaneous approach' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH ranked_tests AS ( SELECT test_name, COUNT(*) as test_count, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) as rank FROM microbiologyevents WHERE EXISTS ( SELECT 1 FROM procedures_icd p_icd JOIN d_icd_procedures d_icd_p ON p_icd.icd_code = d_icd_p.icd_code WHERE d_icd_p.long_title = 'int;",
            "gold_answer": "[['urine culture']]",
            "pred_answer": "unrecognized token: \"'int;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL is truncated and contains an unrecognized token \"'int;\" which is a syntax error in that specific part of the query. This does not reflect a misunderstanding of table relationships or the overall intended logic, but rather a localized parsing issue.",
            "error_type": "Local: syntax error"
        },
        "649c3bfef97979fa17d56b18": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the last time the sao2 of patient 10014078 registered less than 94.0 since 269 days ago?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10014078 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum < 94.0 AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-269 day') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT T1.charttime FROM chartevents T1 JOIN admissions T2 ON T1.hadm_id = T2.hadm_id JOIN patients T3 ON T2.subject_id = T3.subject_id WHERE T3.subject_id = 10014078 AND T1.label = 'o2 saturation pulseoxymetry' AND T1.valueuom = '%' AND T1.valuenum < 94.0 AND T1.charttime >= datetime('2100-12-31 23:59:00', '-269 day') ORDER BY T1.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-09-09 04:02:00']]",
            "pred_answer": "no such column: T1.label",
            "is_correct": false,
            "reasoning": "The predicted SQL attempts to reference a column T1.label, but the chartevents table does not have a column named label. This is a column reference error in the WHERE clause and can be fixed by joining the d_items table to filter by label. The overall query structure and logic remain unchanged.",
            "error_type": "Local: column reference error"
        },
        "b5bc2ad81764fb6d8d587b9a": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the minimum total cost for the hospital when a immunoglobulin m lab test is involved this year?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'immunoglobulin m' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS min_total_cost FROM ( SELECT hadm_id, SUM(cost) AS total_cost FROM cost JOIN labevents ON cost.event_id = labevents.row_id JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE d_labitems.label LIKE '%immunoglobulin m%' AND STRFTIME('%Y', labevents.charttime) = '2100' GROUP BY hadm_id ) ;",
            "gold_answer": "[['8671.46']]",
            "pred_answer": "ambiguous column name: hadm_id",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to an ambiguous column reference on hadm_id: both cost and labevents tables have a hadm_id, and the unqualified hadm_id in the SELECT and GROUP BY is ambiguous. This is a localized naming error that can be fixed by prefixing the column with its table alias (e.g., cost.hadm_id) without changing the overall query structure.",
            "error_type": "Local: ambiguous-column reference error"
        },
        "38b6a4148e29acfdd741bed1": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the name of the output which patient 10027602 had for the first time since 03/08/2100?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) >= '2100-03-08' ORDER BY outputevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT d_items.label FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10027602;",
            "gold_answer": "[['foley']]",
            "pred_answer": "[['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1'], ['cerebral ventricular #1']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the date filter (since 03/08/2100), the logic to restrict to the patient's ICU stays via icustays and admissions, and the ordering/limit to pick the first event. Instead, it simply joins outputevents by subject_id, returning all distinct outputs for the patient. Fixing this requires restructuring the entire query\u2014adding the date condition, joining through admissions and icustays, and applying ORDER BY and LIMIT\u2014so this is not a local fix but a global rewrite of the query logic.",
            "error_type": "Global: incorrect query logic"
        }
    },
    "o4-mini": {
        "e180639ff26c46761fd5b8f7": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Count the number of times that patient 10008454 took labetalol since 148 days ago.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10008454 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'labetalol' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-148 day')",
            "pred_sql": "SELECT COUNT(*) AS labetalol_count FROM prescriptions WHERE subject_id = 10008454 AND drug LIKE 'labetalol%' AND datetime(starttime) >= datetime('2100-12-31 23:59:00','-148 days');",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the data source and structure: it queries prescriptions rather than inputevents and omits the necessary joins/subqueries through admissions and icustays. This is not a minor syntax fix but a fundamental logical error that alters the intended meaning of the query.",
            "error_type": "Global: wrong table reference"
        }
    },
    "qwen3": {
        "3572f327afb3fe1874b00603": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients that are given heparin flush (100 units/ml) in 2100, what are the top four drugs that they are prescribed afterwards within the same month?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'heparin flush (100 units/ml)' AND strftime('%Y',prescriptions.starttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE strftime('%Y',prescriptions.starttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH heparin_item AS ( SELECT itemid FROM d_items WHERE label = 'heparin flush (100 units/ml)' ) SELECT p.drug FROM prescriptions p WHERE EXISTS ( SELECT 1 FROM inputevents i JOIN heparin_item h ON i.itemid = h.itemid WHERE i.subject_id = p.subject_id AND strftime('%Y-%m', i.starttime) = strftime('%Y-%m', p.starttime) AND p.starttime > i.starttime AND i.starttime BETWEEN '2100-01-01' AND '2100-12-31' ) GROUP BY p.drug ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['0.83% sodium chloride'], ['acetaminophen iv'], ['aspirin'], ['clonidine patch 0.3 mg/24 hr'], ['iso-osmotic dextrose'], ['lactated ringers'], ['nicardipine iv'], ['olanzapine (disintegrating tablet)'], ['ondansetron'], ['pantoprazole'], ['phosphorus'], ['potassium chloride replacement (critical care and oncology)'], ['quetiapine fumarate'], ['ranitidine (liquid)'], ['sodium chloride 0.9%'], ['sodium chloride 0.9%  flush'], ['vancomycin'], ['vial']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets where to find the heparin flush events and which table drives the logic. It uses inputevents joined to d_items to identify 'heparin flush (100 units/ml)' instead of directly querying the prescriptions table (as the ground truth does) and then joining to subsequent prescriptions. This error spans multiple clauses and alters the entire structure and meaning of the query\u2014it uses the wrong source table for the key event and thus never matches any prescriptions afterward, leading to an empty result.",
            "error_type": "Global: incorrect-table-join error"
        }
    }
}