{
    "llm2sql": {
        "c883954cdceee4237e2e39f4": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the top five most frequent procedures that patients received within the same month after being diagnosed with other nonspecific abnormal finding of lung field?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other nonspecific abnormal finding of lung field' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT P.long_title, COUNT(*) as frequency nFROM procedures_icd PI nJOIN diagnoses_icd DI ON PI.subject_id = DI.subject_id AND PI.hadm_id = DI.hadm_id nJOIN d_icd_procedures P ON PI.icd_code = P.icd_code nJOIN d_icd_diagnoses D ON DI.icd_code = D.icd_code nWHERE D.long_title = 'other nonspecific abnormal finding of lung field' n AND strftime('%Y-%m', PI.charttime) = strftime('%Y-%m', DI.charttime) nGROUP BY P.long_title nORDER BY frequency DESC nLIMIT 5;",
            "gold_answer": "[['hemodialysis']]",
            "pred_answer": "near \"nFROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains stray 'n' characters before FROM and JOIN clauses (e.g., 'nFROM'), causing a syntax error. This is a localized issue in the SELECT statement formatting and does not affect the overall query logic or table relationships.",
            "error_type": "Local: syntax error"
        },
        "fb4971631664b9b3ac5360bd": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was patient 10019568's first minimum urea nitrogen in 02/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019568 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '02' ORDER BY labevents.valuenum ASC, labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(T1.charttime) AS first_minimum_time FROM labevents AS T1 INNER JOIN d_labitems AS T2 ON T1.itemid = T2.itemid WHERE T1.subject_id = 10019568 AND T2.label = 'urea nitrogen' AND datetime(T1.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') AND strftime('%m', T1.charttime) = '02' ORDER BY T1.valuenum ASC, T1.charttime ASC LIMIT 1",
            "gold_answer": "[['2100-02-05 04:30:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic used to find the first minimum urea nitrogen measurement in February. It replaces the hadm_id subquery with a direct subject_id filter, misapplies the date filter by using 'start of month' instead of restricting to February of this year via 'start of year' plus month check, and aggregates MIN(charttime) instead of ordering by valuenum then charttime. These errors span multiple clauses and change the intended query structure and logic, making this a global error.",
            "error_type": "Global: incorrect overall query logic"
        },
        "1279690fd165c9155890598f": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the total void output in patient 10022880 on 04/03/2100?",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022880 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'void' AND d_items.linksto = 'outputevents' ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-04-03'",
            "pred_sql": "SELECT SUM(value) FROM outputevents WHERE subject_id = 10022880 AND datetime(charttime, 'start of day') = datetime('2100-04-03 23:59:00', 'start of day', '-0 day')",
            "gold_answer": "[['200.0']]",
            "pred_answer": "[['1260.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters by subject_id and date, but omits the condition restricting outputevents to the 'void' item label (and linksto). This is a missing WHERE clause filter, which is a localized issue that can be fixed by adding the appropriate condition. The overall query structure and joins remain correct.",
            "error_type": "Local: missing-where-filter"
        }
    },
    "codes": {
        "8a0fa0a4a8e13bb1dbb8d82f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the weight difference of patient 10004720's body last measured on the last hospital visit compared to the value second to last measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT sum(CASE WHEN charttime = ( SELECT max(charttime) FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720 ) THEN value ELSE 0 END) - sum(CASE WHEN charttime = ( SELECT max(charttime) - 1 FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720 ) THEN value ELSE 0 END) AS weight_difference FROM chartevents WHERE itemid IN ( 226559, 226560 ) AND subject_id = 10004720",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'value' in the chartevents table instead of using the correct 'valuenum' column. This is a simple column reference error and does not change the overall structure or logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "f789a1f2bea694b1a95b300a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of current patients whose ages fall within the 20s.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age BETWEEN 20 AND 29",
            "pred_sql": "SELECT count(DISTINCT patients.subject_id) FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id WHERE patients.dob LIKE '20%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['94.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause by filtering on patients.dob with a LIKE pattern instead of using the admissions.age BETWEEN 20 AND 29 condition. The overall query structure (JOIN, COUNT, DISTINCT) is correct, and no major re-structuring is needed\u2014only the filtering logic in the WHERE clause is wrong.",
            "error_type": "Local: incorrect filter condition"
        },
        "eb2c3da7830258ad243c4ce5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What did patient 10020740 receive on 03/25/this year as their last input?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT inputevents.itemid FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND datetime(inputevents.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',inputevents.starttime) = '03-25' ORDER BY inputevents.starttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT inputevents.itemid FROM inputevents INNER JOIN patients ON inputevents.subject_id = patients.subject_id WHERE patients.subject_id = 10020740 AND strftime('%m/%d/%Y', inputevents.charttime) = '03/25/2100'",
            "gold_answer": "[['insulin - regular']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non\u2010existent column inputevents.charttime (the inputevents table uses starttime, not charttime), causing a column reference error. This is a mistake localized to a specific part of the query and can be corrected without changing the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "1bf033967053a3e88b60ffec": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many current patients are there?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is null",
            "pred_sql": "SELECT count(DISTINCT icustays.subject_id) FROM icustays INNER JOIN patients ON icustays.subject_id = patients.subject_id WHERE patients.dod IS NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['64.0']]",
            "is_correct": false,
            "reasoning": "The predicted query completely changes the intent by using icustays and filtering on patients.dod IS NULL (alive patients) rather than using admissions.dischtime IS NULL (current hospital admissions). It joins the wrong tables and applies the wrong condition, altering the overall logic of what constitutes a \u201ccurrent patient.\u201d This is not a minor syntax or reference fix but a fundamental misuse of tables and conditions that requires restructuring the query.",
            "error_type": "Global: wrong table and logic"
        },
        "cc60f6274f29d0fdfd7ff5a3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the name of the lab test that patient 10027445 received first on the first hospital visit?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027445 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT labevents.itemid FROM labevents INNER JOIN d_labitems ON labevents.itemid = d_labitems.itemid WHERE labevents.subject_id = 10027445 ORDER BY labevents.charttime ASC LIMIT 1",
            "gold_answer": "[['chloride']]",
            "pred_answer": "[['50902.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to the patient's first hospital visit (it uses subject_id instead of hadm_id and omits the admissions/dischtime filter) and also selects the itemid rather than the lab test label. These mistakes are not localized to a single clause but alter the fundamental logic and table relationships of the query, requiring structural changes (adding subqueries, correct joins, and correct SELECT target) to fix.",
            "error_type": "Global: join error"
        },
        "ae2f9d639134a58571aa307a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients had a immunology (cmv) test administered since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'immunology (cmv)' AND strftime('%Y',microbiologyevents.charttime) >= '2100' )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM chartevents WHERE itemid IN (612, 613, 614, 615, 618)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the chartevents table with arbitrary itemids and omits any filter on spec_type_desc or charttime, completely ignoring the microbiologyevents table and the time constraint since 2100. This is not a minor fix in a single clause but a fundamental mis\u2010use of the wrong table and logic, altering the intent of the query. Therefore, this is a Global error.",
            "error_type": "Global: incorrect table/logical error"
        },
        "5fd041e7cb3585b0f8296e59": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Can you tell me the maximum total hospital cost if it involves a rbc, pleural lab test in 2100?",
            "gold_sql": "SELECT MAX(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'rbc, pleural' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) AS num FROM cost INNER JOIN labevents ON cost.event_id = labevents.row_id WHERE labevents.itemid IN (40549, 50512)",
            "gold_answer": "[['15682.43']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the query structure: it joins cost to labevents via cost.event_id = labevents.row_id (missing cost.event_type filter), omits the year filter on cost.chargetime, fails to group by hadm_id to compute per-admission totals, and never takes the MAX of those totals. These are not isolated typos but a fundamentally different and incorrect join/aggregation approach, requiring extensive restructuring to fix. Therefore this is a Global error.",
            "error_type": "Global: incorrect join logic"
        },
        "7f27ac59bfdf5715227dfcb0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the total multivitamins amount given for patient 10038933 on the last icu visit?",
            "gold_sql": "SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038933 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'multivitamins' AND d_items.linksto = 'inputevents' )",
            "pred_sql": "SELECT sum(inputevents.totalamount) FROM icustays INNER JOIN inputevents ON icustays.stay_id = inputevents.stay_id WHERE inputevents.subject_id = 10038933 AND inputevents.itemid IN (226559, 226560, 226561, 226562, 226584, 226585, 226586, 226587, 226588, 226589, 226590, 226591, 227510, 227511, 227512, 227513, 227514, 227515, 227517, 227518, 227519, 227520, 227521, 227522, 22752",
            "gold_answer": "[['1001.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the logic to identify the patient\u2019s last ICU stay (no filter on icustays.outtime, no ORDER BY icustays.intime DESC LIMIT 1). Instead it simply joins all stays and filters by subject_id, fundamentally changing the query\u2019s meaning. Correcting this requires restructuring the query to select the single latest stay, not just fixing a minor clause.",
            "error_type": "Global: join-logic error"
        },
        "4e1182880796183b551f7b41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last procedure patient 10018081 had since 2100?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT procedures_icd.icd_code FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ORDER BY procedures_icd.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.row_id = procedures_icd.icd_code WHERE procedures_icd.subject_id = 10018081 ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "gold_answer": "[['enteral infusion of concentrated nutritional substances']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL has two localized mistakes: it joins d_icd_procedures.row_id to procedures_icd.icd_code (instead of matching on icd_code) and it omits the WHERE clause filtering procedures since 2100. Both errors are confined to specific clauses (JOIN and WHERE) and do not require restructuring the overall query logic or table relationships, so this is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "69100638c56508dbe7fa9a28": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the total number of patients diagnosed with unspecified disease of pericardium who didn't revisit the hospital within the same month this year.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disease of pericardium' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified disease of pericardium' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND datetime(T2.charttime,'start of month') = datetime(admissions.admittime,'start of month') )",
            "pred_sql": "SELECT count(icustays.subject_id) FROM icustays INNER JOIN diagnoses_icd ON icustays.hadm_id = diagnoses_icd.hadm_id WHERE diagnoses_icd.icd_code = '25005' AND strftime('%Y-%m', icustays.intime) != strftime('%Y-%m', diagnoses_icd.charttime)",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally departs from the intended logic. The question asks for counting patients diagnosed with \u2018unspecified disease of pericardium\u2019 within this year (using the d_icd_diagnoses lookup) who did not have another hospital admission in the same month of their diagnosis. The ground truth uses admissions and diagnoses_icd, filters by the diagnosis long_title, restricts to this year, and subtracts those with a same-month readmission. The predicted SQL instead: \u2022 Uses icustays rather than admissions to track hospital visits. \u2022 Hard-codes icd_code = '25005' instead of looking up the code by long_title. \u2022 Joins icustays to diagnoses_icd on hadm_id, counting ICU stays not in the same year-month rather than counting patients without a same-month readmission. \u2022 Uses a simple strftime comparison for month inequality, which does not implement the \u201cno revisit in the same month\u201d logic correctly. These errors span multiple clauses (wrong tables, wrong code lookup, wrong joins, and the wrong filtering strategy) and require reworking the entire query structure to align with the question\u2019s intent. This is a Global error.",
            "error_type": "Global: incorrect join and filtering logic"
        }
    },
    "qwen3": {
        "f832db59e9dda84d9dd14c15": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What are the four most frequent procedures for patients who are in their 60 or above this year?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM patients JOIN procedures_icd ON patients.subject_id = procedures_icd.subject_id JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE 2100 - CAST(strftime('%Y', patients.dob) AS INTEGER) >= 60 GROUP BY d_icd_procedures.long_title ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['arterial catheterization'], ['arteriography of cerebral arteries'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['coronary arteriography using two catheters'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left lower lung lobe, via natural or artificial opening endoscopic'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['insertion of endotracheal tube'], ['left heart cardiac catheterization'], ['single internal mammary-coronary artery bypass']]",
            "pred_answer": "[['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['single internal mammary-coronary artery bypass']]",
            "is_correct": false,
            "reasoning": "The predicted SQL makes two localized mistakes in the WHERE clause: it calculates age manually from patients.dob instead of using the admissions.age column (violating the age calculation assumption) and it omits the filter to restrict procedures to the current year. These errors are confined to a specific part of the query (the WHERE conditions) and do not require restructuring the overall query logic or JOINs.",
            "error_type": "Local: value parsing error"
        },
        "07e6a4280f821838e9ccbeae": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the number of times that patient 10020740 had a or ebl event since 05/10/2100?",
            "gold_sql": "SELECT COUNT(*) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'or ebl' AND d_items.linksto = 'outputevents' ) AND strftime('%Y-%m-%d',outputevents.charttime) >= '2100-05-10'",
            "pred_sql": "SELECT COUNT(*) AS event_count FROM chartevents c JOIN d_items d ON c.itemid = d.itemid WHERE c.subject_id = 10020740 AND d.label IN ('a', 'ebl') AND c.charttime >= '2100-05-10';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses chartevents rather than outputevents and filters on c.subject_id directly, looking for labels 'a' or 'ebl', which fundamentally changes the data source and join logic. The correct query must aggregate from outputevents linked via stay_id and admissions for the given patient and filter for the specific 'or ebl' item. This is not a minor typo or syntax issue but a change in the core tables and relationships, requiring a restructuring of the query.",
            "error_type": "Global: join error"
        },
        "5a69bd52eccb00f7bc5a5dbd": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Give me the three most frequent procedures performed in 2100.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE strftime('%Y',procedures_icd.charttime) = '2100' GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT d.long_title FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE strftime('%Y', p.charttime) = '2100' GROUP BY p.icd_code, d.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['central venous catheter placement with guidance'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery'], ['other electroshock therapy'], ['percutaneous abdominal drainage']]",
            "pred_answer": "[['central venous catheter placement with guidance'], ['enteral infusion of concentrated nutritional substances'], ['extracorporeal circulation auxiliary to open heart surgery']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a simple GROUP BY with ORDER BY COUNT DESC and LIMIT 3 to pick the \u201ctop 3\u201d procedures, whereas the ground truth uses DENSE_RANK to include all procedures tied at the 3rd most frequent count. This is not simply a typo or missing column\u2014it is a fundamental logical difference in how rankings and ties are handled, requiring a restructure of the query (adding a window function and subquery) to match the intended behavior. Therefore, it is a Global error.",
            "error_type": "Global: incorrect ranking strategy"
        }
    },
    "dinsql": {
        "0a6429bfa48cdb76d66bf9a6": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many days has it been since patient 10021487's hospital admission?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT julianday('2100-12-31') - julianday(admittime) AS days_since_admission FROM admissions WHERE subject_id = 10021487 AND dischtime IS NULL ORDER BY admittime DESC LIMIT 1",
            "gold_answer": "[['24.983']]",
            "pred_answer": "[['23.984']]",
            "is_correct": false,
            "reasoning": "The predicted query localizes its mistake to the date literal and function used for computing the difference. Instead of using the full timestamp '2100-12-31 23:59:00' with strftime('%J',\u2026) as in the ground truth, it uses julianday('2100-12-31') and julianday(admittime), thereby dropping the time component and producing an off-by-one-day result. This is a contained error in the temporal calculation (date literal/format) and does not affect the overall query structure or logic of filtering the current admission. Hence it is a Local error.",
            "error_type": "Local: other-temporal error"
        }
    },
    "deepseek": {
        "e2c04c788a4363be6217e18e": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the output of patient 10019172 for the last time on last month/20?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT outputevents.itemid FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019172 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '20' ORDER BY outputevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT outputevents.subject_id, outputevents.charttime, outputevents.value, outputevents.valueuom FROM outputevents WHERE outputevents.subject_id = 10019172 AND datetime(outputevents.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', outputevents.charttime) = '20' ORDER BY outputevents.charttime DESC;",
            "gold_answer": "[['foley']]",
            "pred_answer": "[['10019172.0', '2100-11-20 11:30:00', '120.0', 'ml'], ['10019172.0', '2100-11-20 11:30:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 11:30:00', '700.0', 'ml'], ['10019172.0', '2100-11-20 11:45:00', '10.0', 'ml'], ['10019172.0', '2100-11-20 12:06:00', '270.0', 'ml'], ['10019172.0', '2100-11-20 12:07:00', '15.0', 'ml'], ['10019172.0', '2100-11-20 12:35:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 13:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 13:00:00', '340.0', 'ml'], ['10019172.0', '2100-11-20 14:00:00', '400.0', 'ml'], ['10019172.0', '2100-11-20 14:00:00', '70.0', 'ml'], ['10019172.0', '2100-11-20 15:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 15:00:00', '290.0', 'ml'], ['10019172.0', '2100-11-20 16:00:00', '250.0', 'ml'], ['10019172.0', '2100-11-20 16:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 17:01:00', '160.0', 'ml'], ['10019172.0', '2100-11-20 17:01:00', '50.0', 'ml'], ['10019172.0', '2100-11-20 18:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 18:00:00', '80.0', 'ml'], ['10019172.0', '2100-11-20 19:00:00', '100.0', 'ml'], ['10019172.0', '2100-11-20 19:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 20:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 20:00:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 21:00:00', '30.0', 'ml'], ['10019172.0', '2100-11-20 21:00:00', '40.0', 'ml'], ['10019172.0', '2100-11-20 22:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 22:00:00', '32.0', 'ml'], ['10019172.0', '2100-11-20 23:00:00', '20.0', 'ml'], ['10019172.0', '2100-11-20 23:00:00', '25.0', 'ml']]",
            "is_correct": false,
            "reasoning": "The predicted SQL drops the critical joins through admissions \u2192 icustays \u2192 outputevents and instead filters only by subject_id, completely altering which outputevents are considered. It also selects the wrong columns (subject_id, charttime, value, valueuom) rather than the item label via d_items, and omits the LIMIT 1. These mistakes span multiple clauses (SELECT, FROM, WHERE, ORDER BY) and require substantial restructuring of the query\u2019s logic and joins to fix. Hence this is a Global error.",
            "error_type": "Global: incorrect join and selection logic"
        },
        "e084964b489539a157781a7c": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the minimum total cost of the hospital visits that involve a procedure called excision of right saphenous vein, percutaneous endoscopic approach since 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' ) ) AND strftime('%Y',cost.chargetime) >= '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS minimum_total_cost FROM ( SELECT c.hadm_id, SUM(c.cost) AS total_cost FROM cost c JOIN procedures_icd p ON c.event_id = p.row_id AND c.event_type = 'procedures_icd' JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'excision of right saphenous vein, percutaneous endoscopic approach' AND p.charttime >= '2100-01-01' GROUP BY c.hadm_id ) ;",
            "gold_answer": "[['3838.48']]",
            "pred_answer": "[['8.6']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only sums costs directly tied to the specific procedure (c.event_type = 'procedures_icd' joined to procedures_icd) and filters by the procedure\u2019s charttime, rather than summing all costs (diagnoses, procedures, labs, prescriptions) for the entire hospital admission and filtering by cost.chargetime\u2019s year as required. This error alters the overall logic and intended aggregation scope of the query, requiring a structural rewrite to include all cost event types and apply the correct date filter. Hence, it is not a localized mistake but a fundamental logical/structural flaw.",
            "error_type": "Global: join logic error"
        },
        "bd6c49415465061eab77c735": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Count the number of patients diagnosed with catatonic type schizophrenia, unspecified who did not come to the hospital within 2 months this year.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'catatonic type schizophrenia, unspecified' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'catatonic type schizophrenia, unspecified' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND datetime(admissions.admittime) BETWEEN datetime(T2.charttime) AND datetime(T2.charttime,'+2 month') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) AS patient_count FROM patients p JOIN diagnoses_icd d_icd ON p.subject_id = d_icd.subject_id JOIN d_icd_diagnoses diag ON d_icd.icd_code = diag.icd_code LEFT JOIN admissions a ON p.subject_id = a.subject_id WHERE diag.long_title = 'catatonic type schizophrenia, unspecified' AND a.admittime > '2100-10-01 00:00:00' OR a.admittime IS NULL;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to capture the intended logic in several fundamental ways. It does not restrict diagnoses to this year, uses a hard\u2010coded date literal ('2100-10-01') instead of dynamically computing the two\u2010month window from each diagnosis date, misplaces the OR condition (causing incorrect inclusion of patients with no admissions), and generally restructures the joins to patients and admissions incorrectly. These issues span multiple clauses (JOINs, WHERE filters, date computations) and require a wholesale rewrite of the query logic rather than a simple fix in one clause.",
            "error_type": "Global: logical inconsistency across query components"
        }
    }
}