{
    "qwen3": {
        "e5859415e36580c3966e0e1a": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Get me the top four most frequent lab tests that patients had within the same hospital visit after receiving venous catheterization, not elsewhere classified since 1 year ago.",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT d.label FROM labevents l JOIN procedures_icd p ON l.hadm_id = p.hadm_id JOIN d_icd_procedures dproc ON p.icd_code = dproc.icd_code JOIN d_labitems d ON l.itemid = d.itemid WHERE dproc.long_title = 'insertion of venous catheter, not elsewhere classified' AND p.charttime >= datetime('2100-12-31 23:59:00', '-1 year') AND l.charttime > p.charttime GROUP BY l.itemid, d.label ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['white blood cells']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to match any procedure rows because it filters on d_icd_procedures.long_title = 'insertion of venous catheter, not elsewhere classified', whereas the correct title is 'venous catheterization, not elsewhere classified'. This is a mismatch in the literal value used in the WHERE clause, causing no rows to pass the filter. The rest of the query structure (joins, grouping, ordering) aligns with the intended logic, so only the literal in the WHERE clause is wrong.",
            "error_type": "Local: value parsing error"
        },
        "0c7b4ddbcc44458591fe84e6": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Among patients who are prescribed with albumin 5% (25g / 500ml), what are the top three most frequently prescribed drugs at the same time?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'albumin 5% (25g / 500ml)' ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT p2.drug FROM prescriptions p1 JOIN prescriptions p2 ON p1.hadm_id = p2.hadm_id WHERE p1.drug = 'albumin 5% (25g / 500ml)' AND p2.drug != 'albumin 5% (25g / 500ml)' GROUP BY p2.drug ORDER BY COUNT(DISTINCT p2.hadm_id) DESC LIMIT 3;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen iv'], ['albumin 5% (25g / 500ml)'], ['aspirin'], ['aspirin ec'], ['chlorhexidine gluconate 0.12% oral rinse'], ['ciprofloxacin iv'], ['docusate sodium'], ['docusate sodium (liquid)'], ['ezetimibe'], ['fish oil (omega 3)'], ['ketorolac'], ['metronidazole (flagyl)'], ['milk of magnesia'], ['ns'], ['sodium bicarbonate'], ['soln']]",
            "pred_answer": "[['furosemide'], ['lactated ringers'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins prescriptions on hadm_id only, grouping co\u2010prescriptions by admission rather than by the exact same start time for patients prescribed albumin. This violates the requirement to match on both subject and datetime for simultaneous prescriptions and fundamentally changes the query\u2019s intended logic. Fixing it requires restructuring the join to include starttime equality and using a proper ranking mechanism rather than a simple GROUP BY on hadm_id, so this is a global error.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "codes": {
        "de36ead34dfb7547293729b8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find the number of current patients in the 40s age group.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime IS NULL AND admissions.age BETWEEN 40 AND 49",
            "pred_sql": "SELECT count(subject_id) FROM patients WHERE strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) >= 40 AND strftime('%Y', admissions.admittime) - strftime('%Y', patients.dob) <= 49",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: admissions.admittime",
            "is_correct": false,
            "reasoning": "The predicted query simply selects from patients and tries to reference admissions.admittime without including the admissions table or a JOIN. This is a column/table reference mistake localized to the FROM clause and column usage, not a fundamental restructuring or logical flaw across multiple clauses.",
            "error_type": "Local: table/column reference error"
        },
        "c0bd2660fb0bdea2e7a08d76": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last time that patient 10026255 has been prescribed medication via the po/ng route since 02/2100?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10026255 ) AND prescriptions.route = 'po/ng' AND strftime('%Y-%m',prescriptions.starttime) >= '2100-02' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN transfers ON prescriptions.subject_id = transfers.subject_id WHERE prescriptions.route = 'po/ng' AND prescriptions.subject_id = 10026255 AND transfers.intime LIKE '2100-02%' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "gold_answer": "[['2100-10-20 20:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query logic by joining prescriptions to transfers on subject_id, rather than restricting prescriptions by the patient's hospital admission (hadm_id) and filtering prescriptions.starttime by month. It also uses transfers.intime LIKE '2100-02%' instead of strftime on prescriptions.starttime to capture all dates since February 2100. This incorrect join and date filtering changes the intended relationships and cannot be fixed by a minor, localized edit; it requires restructuring the query logic.",
            "error_type": "Global: join error"
        },
        "fa62adda98cb0b27d5a3c37b": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Get me patient 10020187's total output volume on 02/04/2100.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020187 ) ) AND strftime('%Y-%m-%d',outputevents.charttime) = '2100-02-04'",
            "pred_sql": "SELECT sum(outputevents.amount) FROM patients INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10020187 AND strftime('%Y-%m-%d', outputevents.charttime) = '2021-04-02'",
            "gold_answer": "[['2275.0']]",
            "pred_answer": "no such column: outputevents.amount",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a non\u2010existent column outputevents.amount instead of outputevents.value, causing a column reference error. It also incorrectly filters the date as '2021-04-02' instead of '2100-02-04', but the primary failure is the wrong column name. These are confined to specific parts (SELECT and WHERE) and do not alter the overall query structure or table relationships.",
            "error_type": "Local: table/column reference error"
        },
        "ddda565150df1f4e6bed69dd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10021118 get his/her first prescription of atorvastatin this month?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021118 ) AND prescriptions.drug = 'atorvastatin' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN patients ON prescriptions.subject_id = patients.subject_id WHERE patients.subject_id = 10021118 AND prescriptions.drug = 'atorvastatin' AND strftime('%Y-%m', prescriptions.starttime) = '2100-11' LIMIT 1",
            "gold_answer": "[['2100-12-01 20:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s main logical error is in the WHERE clause time filter: it uses strftime('%Y-%m') = '2100-11', which filters for November instead of December (\u201cthis month\u201d). This is a misinterpretation of the \u2018this month\u2019 requirement. The rest of the query structure (JOIN, subject_id filter, drug filter, LIMIT) is intact, so fixing the month literal or time expression corrects the query without altering its overall structure or joins. This is therefore a localized error in the temporal filter.",
            "error_type": "Local: temporal filter error"
        }
    },
    "o4-mini": {
        "f40498776baac42e6f79ba96": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What is the total hospital cost for patient 10007058's stay?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10007058 )",
            "pred_sql": "SELECT SUM(c.cost) AS total_cost FROM cost c JOIN admissions a ON c.hadm_id = a.hadm_id WHERE a.subject_id = 10007058  AND a.dischtime IS NULL;",
            "gold_answer": "[['1689.76']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds a filter on admissions.dischtime IS NULL, thus summing only ongoing admissions rather than all admissions for the patient as required. This changes the overall logic and scope of the query, not just a minor syntax or column issue, and requires restructuring the filter logic across clauses. Therefore, it is a global error affecting the intended result.",
            "error_type": "Global: incorrect filter logic"
        },
        "1c7f17a31b72c45986809025": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "What is the number of patients who died after diagnosis of hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified within the same month this year?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime AND datetime(T1.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL AND datetime(patients.dod,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) as T3 ON T2.subject_id = T3.subject_id AND datetime(T2.charttime,'start of month') = datetime(T3.dod,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) AS num_patients FROM diagnoses_icd d JOIN d_icd_diagnoses diag ON d.icd_code = diag.icd_code JOIN patients p ON d.subject_id = p.subject_id WHERE diag.long_title = 'hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified' AND p.dod IS NOT NULL AND datetime(p.dod) > datetime(d.charttime) -- same month and same year (this year = 2100) AND strftime('%Y-%m', d.charttime) = strftime('%Y-%m', p.dod) AND strftime('%Y', d.charttime) = '2100';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply joins diagnoses_icd to patients and counts rows where the death date is after the diagnosis date in the same YYYY-MM period of 2100. It never enforces that the diagnosis is the patient\u2019s first diagnosis of that code in 2100 (no GROUP BY/HAVING or subquery to take MIN(charttime)). This fundamentally changes the query logic and leads to overcounting. Fixing it would require restructuring the query (adding a subquery or aggregation) rather than a simple tweak in one clause. Therefore, this is a global logic error.",
            "error_type": "Global: incorrect first\u2010diagnosis filtering"
        }
    },
    "deepseek": {
        "3baa8855ca9c0ddb5058fd05": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "List the top four most common lab tests for patients with age 60 or above, since 2100.",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age >= 60 ) AND strftime('%Y',labevents.charttime) >= '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT T3.label AS lab_test, COUNT(T1.row_id) AS test_count FROM labevents T1 INNER JOIN admissions T2 ON T1.hadm_id = T2.hadm_id INNER JOIN d_labitems T3 ON T1.itemid = T3.itemid WHERE T2.age >= 60 AND T1.charttime >= '2100-01-01' GROUP BY T3.label ORDER BY test_count DESC LIMIT 4;",
            "gold_answer": "[['bicarbonate'], ['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '497.0'], ['glucose', '538.0'], ['hemoglobin', '478.0'], ['sodium', '497.0']]",
            "is_correct": false,
            "reasoning": "The error lies in how the top\u2010four ranking is performed. The ground truth uses DENSE_RANK() over the counts to include all items tied in the top four, per the SQL assumptions. The predicted query instead simply orders by count and applies LIMIT 4, which is a localized mistake in the ranking method\u2014it doesn\u2019t require restructuring joins, filters, or aggregation logic. This is a local error in the ranking approach.",
            "error_type": "Local: ranking error"
        },
        "14963d54e851b1a8321a7a99": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "On the current hospital visit, how many hours have passed since the first time patient 10018081 was prescribed iso-osmotic sodium chloride?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'iso-osmotic sodium chloride' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "WITH current_admission AS ( SELECT hadm_id FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL ) SELECT ( (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', ( SELECT MIN(starttime) FROM prescriptions WHERE subject_id = 10018081 AND hadm_id = (SELECT hadm_id FROM current_admission) AND drug = 'iso-osmotic sodium chloride' ))) / 3600 ) AS hours_since_first_prescription;",
            "gold_answer": "[['125.983']]",
            "pred_answer": "[['125.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses Unix timestamps divided by 3600 to calculate hours, which in SQLite results in truncation to one decimal place (125.0) instead of the more precise 125.983 derived from Julian day differences. This is a localized arithmetic/precision issue in the SELECT clause and does not affect the overall query structure or table relationships.",
            "error_type": "Local: arithmetic precision error"
        },
        "c9f901a6e53a46bf81c90cce": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly prescribed drugs for patients diagnosed with alcohol abuse, unspecified previously within the same hospital visit, since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'alcohol abuse, unspecified' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT drug, COUNT(*) as count FROM prescriptions JOIN diagnoses_icd ON prescriptions.hadm_id = diagnoses_icd.hadm_id WHERE diagnoses_icd.icd_code = 'icd10|F10.9' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY drug ORDER BY count DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['furosemide'], ['lactated ringers']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the step of linking each prescription to the specific diagnosis event (it only joins on hadm_id, not subject_id, and never enforces that the prescription occurs after the alcohol\u2010abuse diagnosis). It also fails to restrict the diagnosis itself to within the past year. These missing filters and the flawed join strategy fundamentally change the query\u2019s logic (it will count any prescriptions in the past year for any patient admissions with an alcohol\u2010abuse code, even if the diagnosis happened long before or the prescription preceded the diagnosis). Fixing this would require restructuring multiple clauses, not just tweaking a single condition. Therefore it\u2019s a Global error.",
            "error_type": "Global: join/logical error"
        },
        "151e424b9f47e06ee148af99": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How much does the diastolic blood pressure change in patient 10018081 last measured on the current icu visit compared to the second to last value measured on the current icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT (SELECT valueuom FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic')) AS unit, (SELECT valuenum FROM ( SELECT charttime, valuenum FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL) ORDER BY charttime DESC LIMIT 1 )) AS last_value, (SELECT valuenum FROM ( SELECT charttime, valuenum FROM chartevents WHERE subject_id = 10018081 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure diastolic') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10018081 AND outtime IS NULL) ORDER BY charttime DESC;",
            "gold_answer": "[['11.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is truncated and has a syntax error near the final ORDER BY clause (missing closing parentheses, missing OFFSET and LIMIT termination). This is a localized syntax issue that does not alter the overall intended query logic structure. Therefore, it is a local error.",
            "error_type": "Local: syntax error"
        },
        "31688f4b90a3c63cc2398fdb": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me patient 10018081's maximum systolic blood pressure yesterday.",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT MAX(ce.valuenum) AS max_sys_bp FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10018081 AND di.label = 'arterial blood pressure systolic' AND date(ce.charttime) = date('2100-12-31') - 1;",
            "gold_answer": "[['163.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails only in its WHERE clause date filtering: it uses `date(ce.charttime) = date('2100-12-31') - 1`, which is not a valid SQLite expression for \u201cyesterday,\u201d causing no rows to match. The rest of the query structure (joins, column references) is correct. This is a localized error in the temporal comparison logic that can be fixed by using the proper SQLite date/datetime functions.",
            "error_type": "Local: other-temporal error"
        }
    },
    "macsql": {
        "d5c558ebb05afce593fd36f1": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of times that patient 10001725 received a laboratory test for specific gravity this year.",
            "gold_sql": "SELECT COUNT(*) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'specific gravity' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10001725 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT COUNT(*) FROM `labevents` WHERE `subject_id` = 10001725 AND `itemid` = ( SELECT `itemid` FROM `d_labitems` WHERE `label` = 'specific gravity' ) AND datetime(`charttime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict lab events by the patient\u2019s admission (hadm_id) and instead filters directly on labevents.subject_id. The ground truth joins through admissions to get the correct hadm_id(s) for subject 10001725. This mistake lies entirely in the WHERE clause (wrong table/column filter) and does not require restructuring joins or the overall logic beyond that clause. Therefore it is a Local error.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "6a1efc65db66092dc4f820c8": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What was the total amount of nicardipine iv that they prescribed to patient 10004733 on the current hospital visit?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'nicardipine iv'",
            "pred_sql": "SELECT SUM(CAST(`dose_val_rx` AS REAL)) AS total_nicardipine_iv FROM prescriptions WHERE `subject_id` = 10004733 AND `hadm_id` = ( SELECT `hadm_id` FROM admissions WHERE `subject_id` = 10004733 AND `dischtime` IS NULL ) AND `drug` = 'nicardipine' AND `route` = 'iv'",
            "gold_answer": "[['160.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause\u2019s drug filter: it splits the drug name into drug = 'nicardipine' AND route = 'iv', whereas the ground truth and schema assume the drug column stores 'nicardipine iv' as a single value. All other parts of the query structure (SUM aggregation, current visit subquery, hadm_id linkage) are correct. This is a localized mistake in the value filtering rather than a structural or multi-clause logic error.",
            "error_type": "Local: value parsing error"
        }
    },
    "llm2sql": {
        "d11cac5a8a03f36ce1590487": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the count of patients who stayed in neurology?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT transfers.hadm_id FROM transfers WHERE transfers.careunit = 'neurology' )",
            "pred_sql": "SELECT COUNT(DISTINCT icustays.subject_id) FROM icustays WHERE first_careunit = 'neurology' OR last_careunit = 'neurology';",
            "gold_answer": "[['19.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the icustays table and its first_careunit/last_careunit columns instead of using the transfers table with careunit to identify neurology stays. This is a fundamental change in data source and logic\u2014ICU stays are not the same as careunit transfers, and it fails to capture all neurology stays. The error spans the main FROM clause and entire filtering logic, so it requires restructuring the query rather than a minor fix.",
            "error_type": "Global: wrong table/structure error"
        },
        "10ba039ccd53a6d92815ad48": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among those who are prescribed with fish oil (omega 3), what are the five most commonly prescribed drugs at the same time this year?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'fish oil (omega 3)' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH FishOilAdmissions AS ( SELECT DISTINCT hadm_id FROM prescriptions WHERE drug = 'fish oil (omega 3)' AND strftime('%Y', starttime) = '2100' ) SELECT drug FROM prescriptions WHERE hadm_id IN (SELECT hadm_id FROM FishOilAdmissions) AND strftime('%Y', starttime) = '2100' AND drug != 'fish oil (omega 3)' GROUP BY drug ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['albumin 5% (25g / 500ml)'], ['aspirin'], ['aspirin ec'], ['chlorhexidine gluconate 0.12% oral rinse'], ['clopidogrel'], ['docusate sodium'], ['docusate sodium (liquid)'], ['fenofibrate'], ['fish oil (omega 3)'], ['ibuprofen'], ['omeprazole'], ['pravastatin'], ['topiramate (topamax)'], ['vitamin d']]",
            "pred_answer": "[['0.9% sodium chloride'], ['0.9% sodium chloride (mini bag plus)'], ['albumin 25% (12.5g / 50ml)'], ['insulin'], ['metoprolol tartrate']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simply selects all other drugs prescribed at any time during the same admission year for patients who ever got fish oil, rather than only those drugs given at the exact same timestamp as the fish oil prescription. It never joins or equates the prescription times, so it fundamentally changes the intended logic of \u201cat the same time.\u201d Fixing this requires restructuring the query to join the prescriptions table on the time of the fish oil event, not just filtering by admission or year. This is a global logic/join error.",
            "error_type": "Global: incorrect join logic"
        },
        "6db688d075e7d3fad38757ad": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When was the first time patient 10008454 came to the hospital through emergency room?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10008454 AND admissions.admission_location = 'emergency room' AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT admittime FROM admissions WHERE subject_id = 10008454 AND admission_type = 'emergency' ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['2100-12-02 06:31:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause (admission_type = 'emergency' instead of admission_location = 'emergency room') and omits the dischtime IS NOT NULL filter. This is a localized mistake in the predicate without altering the overall query structure or table relationships.",
            "error_type": "Local: table/column reference error"
        },
        "a90dde825f1d13fd69a5b809": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the last medication that patient 10019385 was prescribed?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019385 ) ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10019385 ORDER BY stoptime DESC LIMIT 1",
            "gold_answer": "[['acetaminophen']]",
            "pred_answer": "[['aspirin']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the correct table but orders by the wrong timestamp column (stoptime instead of starttime). This is a mistake confined to the ORDER BY clause and does not alter the overall query structure or table relationships.",
            "error_type": "Local: ORDER BY column error"
        }
    },
    "dinsql": {
        "8c9285958fb27bcd8453fdc9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Tell me the average total hospital cost that includes other specified diseases of pancreas this year.",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified diseases of pancreas' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT AVG(total_cost) FROM ( SELECT SUM(DISTINCT C.cost) AS total_cost FROM cost AS C INNER JOIN diagnoses_icd AS D ON C.event_id = D.row_id AND C.event_type = 'diagnoses_icd' INNER JOIN d_icd_diagnoses AS DI ON D.icd_code = DI.icd_code WHERE DI.long_title = 'other specified diseases of pancreas' AND strftime('%Y', C.chargetime) = '2100' GROUP BY C.hadm_id )",
            "gold_answer": "[['1276.33']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by restricting cost to only diagnosis-related events (C.event_type = 'diagnoses_icd' and join on D.row_id) and by using SUM(DISTINCT C.cost), instead of summing all cost types for each admission as required. This isn\u2019t a simple typo or syntax fix; it alters multiple clauses (JOIN, WHERE, aggregation) and thus the overall meaning of the query. Therefore, this is a global error in the join/aggregation logic.",
            "error_type": "Global: incorrect join/aggregation logic"
        }
    }
}