{
    "dinsql": {
        "98c9e3b4254aa471e55df9d2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "In 2100, what are the top five most frequently given diagnoses that patients received within 2 months after being diagnosed with body mass index 35.0-35.9, adult?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'body mass index 35.0-35.9, adult' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT di.subject_id, di.hadm_id, di.charttime AS diagnosis_time FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code WHERE dicd.long_title = 'body mass index 35.0-35.9, adult' AND strftime('%Y', di.charttime) = '2100' ), subsequent_diagnoses AS ( SELECT di.subject_id, di.icd_code, dicd.long_title, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN initial_diagnosis id ON di.subject_id = id.subject_id AND di.hadm_id = id.hadm_id WHERE di.charttime > id.diagnosis_time AND di.charttime <= datetime(id.diagnosis_time, '+2 months') ) SELECT sd.long_title FROM subsequent_diagnoses sd GROUP BY sd.long_title ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['atrial fibrillation'], ['autistic disorder, current or active state'], ['long-term (current) use of anticoagulants'], ['personal history of sudden cardiac arrest'], ['postprocedural fever'], ['unspecified essential hypertension']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a join on both subject_id and hadm_id when linking subsequent diagnoses to the initial BMI diagnosis. The ground truth logic only ties on subject_id (tracking diagnoses within two months regardless of hospital admission), but the prediction erroneously restricts subsequent diagnoses to the same hadm_id. This fundamentally changes the intended meaning and yields no results. This is not a simple syntax or column reference issue but a flawed join strategy that alters the query\u2019s logic.",
            "error_type": "Global: incorrect join condition"
        },
        "0857ac0dceb68b3f5ee03095": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was the last lab test that patient 10000032 received?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10000032 ) ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "MAX(labevents.charttime) AS last_lab_test_time FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.subject_id = 10000032 AND admissions.dischtime IS NOT NULL",
            "gold_answer": "[['2100-08-27 06:15:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an extra filter on admissions.dischtime IS NOT NULL, which limits the search to only discharged admissions. The question asks for the last lab test across all admissions for patient 10000032, regardless of discharge status. Removing or correcting this WHERE-clause condition would fix the error without altering the overall query structure or requiring a different join strategy\u2014so it is a localized issue in the WHERE clause.",
            "error_type": "Local: incorrect WHERE-clause filter"
        },
        "5d688256a3f07899fb175ce8": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was patient 10010471's first hospital visit?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10010471 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "admissions.admittime FROM admissions WHERE admissions.subject_id = 10010471 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "gold_answer": "[['2100-05-21 17:05:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the column list, which is a localized syntax error in the SELECT clause and does not affect the overall logic or table relationships.",
            "error_type": "Local: syntax error"
        }
    },
    "macsql": {
        "c91416da12da067358414797": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many patients were given gt flush in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT icustays.hadm_id FROM icustays WHERE icustays.stay_id IN ( SELECT inputevents.stay_id FROM inputevents WHERE inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'gt flush' AND d_items.linksto = 'inputevents' ) AND strftime('%Y',inputevents.starttime) = '2100' ) )",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT `subject_id` FROM `prescriptions` WHERE `drug` = 'gt flush' AND strftime('%Y', `starttime`) = '2100' ) AS unique_patients",
            "gold_answer": "[['16.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to find 'gt flush', whereas the correct query must use inputevents joined through d_items (inputevents.itemid filtered by d_items.label = 'gt flush'). This is not a simple column or syntax mistake but a fundamental misplacement of the data source, altering the entire logic and relationships of the query.",
            "error_type": "Global: wrong table for event"
        },
        "34dca7a12c49d40177dee03e": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Which care unit did patient 10038999 last enter since 1 year ago, based on the transfer record?",
            "gold_sql": "SELECT transfers.careunit FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038999 ) AND transfers.careunit IS NOT NULL AND datetime(transfers.intime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT `careunit` FROM transfers WHERE `subject_id` = 10038999 AND datetime(`intime`) >= datetime('2100-12-31 23:59:00', '-1 year') ORDER BY `intime` DESC LIMIT 1",
            "gold_answer": "[['med/surg']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the condition AND transfers.careunit IS NOT NULL, so it returns a NULL careunit rather than filtering it out and returning the last non-null careunit. This is a small, localized mistake in the WHERE clause and does not alter the overall query structure or logic of joining via hadm_id and ordering by intime.",
            "error_type": "Local: missing IS NOT NULL filter"
        }
    },
    "deepseek": {
        "ed7541fc091169b8d4b3e386": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was patient 10021312's metronidazole input the last time on the last icu visit?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'metronidazole' AND d_items.linksto = 'inputevents' ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT inputevents.charttime AS last_metronidazole_input FROM inputevents INNER JOIN icustays ON inputevents.subject_id = icustays.subject_id AND inputevents.hadm_id = icustays.hadm_id WHERE inputevents.subject_id = 10021312 AND inputevents.drug = 'metronidazole' AND icustays.outtime IS NOT NULL ORDER BY inputevents.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-09-18 07:35:00']]",
            "pred_answer": "no such column: inputevents.charttime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `inputevents.charttime` (the correct column is `inputevents.starttime`). This is a localized column reference error and does not involve a fundamental flaw in the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "29a295d23e2237b2efe347a6": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients since 2100 underwent a stool test?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'stool' AND strftime('%Y',microbiologyevents.charttime) >= '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT le.subject_id) AS num_patients FROM labevents le JOIN d_labitems dli ON le.itemid = dli.itemid WHERE dli.label LIKE '%stool%' AND le.charttime >= '2100-01-01';",
            "gold_answer": "[['16.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the labevents and d_labitems tables to look for a 'stool' label in lab tests, whereas the ground truth correctly queries the microbiologyevents table filtering on spec_type_desc = 'stool'. This is not a minor syntax or column naming mistake; it fundamentally alters the query\u2019s logic by targeting the wrong table and event type. Fixing this would require restructuring the query to use microbiologyevents instead of labevents, which is beyond a localized change.",
            "error_type": "Global: incorrect table usage"
        }
    },
    "codes": {
        "1afa09110562c5799d193e9a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Please list the monthly maximum volume of straight cath that was output by patient 10015931.",
            "gold_sql": "SELECT MAX(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015931 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'straight cath' AND d_items.linksto = 'outputevents' ) GROUP BY strftime('%Y-%m',outputevents.charttime)",
            "pred_sql": "SELECT sum(outputevents.volume) FROM outputevents INNER JOIN transfers ON outputevents.subject_id = transfers.subject_id WHERE transfers.subject_id = 10015931 AND outputevents.itemid IN (226560, 225664) AND transfers.intime = ( SELECT min(transfers.intime) FROM transfers INNER JOIN outputevents ON transfers.subject_id = outputevents.subject_id WHERE transfers.subject_id = 10015931 AND outputevents.itemid IN (226560, 225664) )",
            "gold_answer": "[['200.0']]",
            "pred_answer": "no such column: outputevents.volume",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column outputevents.volume, causing a column reference error. This is a localized mistake in the SELECT clause and does not affect the overall query structure or intended logic of grouping by month and taking maxima.",
            "error_type": "Local: column reference error"
        },
        "bc8d116f59e4fd75687ea181": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What new medications were introduced for patient 10004733 today compared to the ones yesterday?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(prescriptions.starttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT sum(CASE WHEN strftime('%Y %m %d', inputevents.starttime) = strftime('%Y %m %d', 'now') THEN 1 ELSE 0 END) - sum(CASE WHEN strftime('%Y %m %d', inputevents.starttime) = strftime('%Y %m %d', 'now', '-1 day') THEN 1 ELSE 0 END) AS difference FROM inputevents INNER JOIN prescriptions ON inputevents.subject_id = prescriptions.subject_id WHERE inputevents.subject_id = 10004733",
            "gold_answer": "[['iso-osmotic dextrose'], ['pantoprazole'], ['quetiapine fumarate'], ['ranitidine (liquid)'], ['sodium chloride 0.9%'], ['vial']]",
            "pred_answer": "[['-1360.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely deviates from the intended logic: it uses inputevents instead of prescriptions, sums counts with CASE statements, and joins tables improperly on subject_id rather than focusing on prescriptions.starttime and EXCEPT logic. This is not a minor typo or local reference issue but a fundamental restructuring that fails to retrieve distinct new medications. The join strategy, choice of tables, and aggregation approach all break the overall query logic.",
            "error_type": "Global: flawed join strategy"
        },
        "4e6fc34a2642a459d3ea7032": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the first time that the respiratory rate readings of patient 10018423 were recorded?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018423 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT min(CHARTTIME) FROM chartevents WHERE subject_id = 10018423 AND itemid = 220052",
            "gold_answer": "[['2100-05-21 19:16:00']]",
            "pred_answer": "[['2100-05-21 19:10:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to follow the required multi\u2010step filtering: it does not filter chartevents by ICU stay (via admissions and icustays) nor does it map the 'respiratory rate' label through the d_items table. Instead it directly uses subject_id and a hardcoded itemid, which fundamentally changes the data scope and joins. This is not a simple typo or clause\u2010level fix but rather a flawed overall join and filtering strategy that alters the intended query logic.",
            "error_type": "Global: incorrect join strategy"
        },
        "1c43c26e60ec75a3a8e84c7e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last monocytes result for patient 10006053 last month?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'monocytes' ) AND datetime(labevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT itemid, value, unitname FROM chartevents WHERE subject_id = 10006053 AND itemid IN (1278, 1280, 1281, 1282, 1283, 1284, 1285) AND strftime('%Y-%m', charttime) = '2015-05' AND value IS NOT NULL ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['5.0']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly references non-existent columns (\u201cvalue\u201d and \u201cunitname\u201d) in chartevents instead of the correct columns (e.g., valuenum, valueuom) or using the labevents table. This is a localized mistake in column naming and table choice and does not reflect a deeper logical flaw across multiple clauses.",
            "error_type": "Local: table/column reference error"
        },
        "a05f7f0e4fab0a67cd2533c2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the difference in the respiratory rate value for patient 10022281 second measured on the first icu visit compared to the first value measured on the first icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022281 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 OFFSET 1 ) - ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022281 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT julianday(icustays.outtime) - julianday(icustays.intime) AS days , outputevents.value - lead(outputevents.value) OVER (PARTITION BY icustays.subject_id ORDER BY icustays.intime) AS diff FROM outputevents INNER JOIN icustays ON outputevents.stay_id = icustays.stay_id WHERE icustays.subject_id = 10022281 AND outputevents.itemid IN (618, 220051)",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely diverges from the intended logic: it uses the outputevents table (meant for fluid/output data) instead of chartevents (where vital signs like respiratory rate are stored), employs a window function and unintended time difference calculation instead of selecting the first and second respiratory rate measurements, and fails to isolate the first ICU stay or properly filter by the 'respiratory rate' item. This error spans multiple clauses and requires restructuring the entire query logic, making it a global error.",
            "error_type": "Global: incorrect table usage and query structure"
        },
        "70a08b004ba90593af33dfab": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Could you provide me with patient 10035185's hospital admission time since 2100?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10035185 AND strftime('%Y',admissions.admittime) >= '2100'",
            "pred_sql": "SELECT admissions.admittime FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10035185 AND admissions.dischtime IS NULL",
            "gold_answer": "[['2100-05-17 12:53:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by introducing an unnecessary INNER JOIN to icustays and filtering on admissions.dischtime IS NULL (i.e., current admissions) instead of selecting all admissions since year 2100. It omits the required date filter and changes the query\u2019s intent from \u201cadmissions since 2100\u201d to \u201ccurrent admissions,\u201d which is a structural flaw affecting the overall logic rather than a localized syntax mistake.",
            "error_type": "Global: incorrect join strategy"
        },
        "d3b5eb094434de4c257a6007": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What does it cost to have percutaneous transluminal coronary angioplasty [ptca]?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'procedures_icd' AND cost.event_id IN ( SELECT procedures_icd.row_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous transluminal coronary angioplasty [ptca]' ) )",
            "pred_sql": "SELECT DISTINCT cost.cost FROM procedures_icd INNER JOIN cost ON procedures_icd.row_id = cost.event_id INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Percutaneous transluminal coronary angioplasty [ptca]'",
            "gold_answer": "[['6.67']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails simply because it doesn\u2019t match the procedure title text (it uses a capital \u201cPercutaneous\u2026\u201d whereas the stored long_title is lowercase) and also omits the cost.event_type filter \u2013 both of which are local WHERE\u2010clause issues. The overall join structure is correct; only the predicate is wrong and fixable without reworking the joins or logic. Therefore this is a Local error (value parsing/mismatch in the WHERE clause).",
            "error_type": "Local: value parsing error"
        }
    },
    "llm2sql": {
        "d1ec7624c207f188607287da": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "When on the last hospital visit did patient 10019777 receive their last venous catheterization, not elsewhere classified procedure?",
            "gold_sql": "SELECT procedures_icd.charttime FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019777 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) ORDER BY procedures_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT T2.charttime FROM admissions AS T1 JOIN procedures_icd AS T2 ON T1.hadm_id = T2.hadm_id WHERE T1.subject_id = 10019777 AND T1.dischtime IS NOT NULL AND T2.icd_code = 'icd9|3961' ORDER BY T1.admittime DESC, T2.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-03-05 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters procedures_icd.icd_code = 'icd9|3961' instead of looking up the correct ICD code via d_icd_procedures.long_title and then matching that code. This is a localized mistake in the literal/value used for icd_code, not a flaw in the overall join structure or query logic. It can be fixed by correcting the icd_code filter to the proper code (or by joining to d_icd_procedures), without changing the global query flow. Therefore, it\u2019s a Local error.",
            "error_type": "Local: value parsing error"
        },
        "cd938096c303c8fb80cb2cba": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Which are the five most common lab tests?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d_labitems.label, COUNT(*) AS test_count FROM labevents JOIN d_labitems ON labevents.itemid = d_labitems.itemid GROUP BY d_labitems.label ORDER BY test_count DESC LIMIT 5",
            "gold_answer": "[['chloride'], ['creatinine'], ['hematocrit'], ['sodium'], ['urea nitrogen']]",
            "pred_answer": "[['chloride', '1004.0'], ['glucose', '1113.0'], ['hematocrit', '982.0'], ['hemoglobin', '985.0'], ['sodium', '1001.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only uses ORDER BY \u2026 LIMIT 5 and returns both the label and the count, which (1) violates the requirement to use DENSE_RANK() for a top-N query when ties matter, and (2) returns the counts even though the question asks only for the test names. These are localized mistakes in the SELECT/ORDER BY/LIMIT portion rather than a fundamental restructuring of the query logic or table relationships.",
            "error_type": "Local: top-N ranking and projection error"
        },
        "9f32059ca482dff67930ffcd": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was patient 10015860's last value of anion gap in 10/this year?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015860 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'anion gap' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '10' ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT ce.valuenum FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10015860 AND di.label = 'anion gap' AND datetime(ce.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m', ce.charttime) = '10' ORDER BY ce.charttime DESC LIMIT 1;",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the wrong table and joins the wrong dimension table. It uses chartevents and d_items for anion gap instead of labevents and d_labitems, fundamentally changing the data source and filtering. This is not a simple column typo or filter mistake but a flawed table join that breaks the intended logic of retrieving lab results. Therefore, it is a global error involving an incorrect join and table selection.",
            "error_type": "Global: incorrect join/table selection"
        },
        "1a8e3475cdccab3b8d13051c": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What was the length of stay in days for patient 10005817's first hospital visit?",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10005817 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT CAST(JULIANDAY(dischtime) - JULIANDAY(admittime) AS INTEGER) AS length_of_stay_days FROM admissions WHERE subject_id = 10005817 ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['8.556']]",
            "pred_answer": "[['8.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in two localized ways: it omits the filter `dischtime IS NOT NULL` and it casts the day difference to an integer, truncating the fractional part. These are errors confined to the WHERE clause and the SELECT expression, and do not alter the overall query structure or table relationships. The query\u2019s logic (computing length of stay) remains intact, but the missing filter and the integer cast lead to an incorrect, truncated value. Hence, this is a localized error in value parsing/post-processing rather than a fundamental structural mistake.",
            "error_type": "Local: value parsing error"
        }
    },
    "o4-mini": {
        "0cd28c037e9d62e1fe5d608a": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "List the top five most common diagnoses in 2100.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE strftime('%Y',diagnoses_icd.charttime) = '2100' GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT d.long_title FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE strftime('%Y', di.charttime) = '2100' GROUP BY d.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['atrial fibrillation'], ['coronary atherosclerosis of native coronary artery'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['atrial fibrillation'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a simple GROUP BY with ORDER BY and LIMIT 5 instead of the DENSE_RANK() approach mandated for a \u201ctop N\u201d query. As a result, it drops entries that tie on the fifth rank (e.g., \u201ccoronary atherosclerosis of native coronary artery\u201d). Fixing this requires restructuring the query to use a ranking window function and then filtering by rank, not just tweaking a WHERE clause or column name. This is therefore a global error affecting the overall logic of retrieving the correct top-N results with ties.",
            "error_type": "Global: incorrect ranking logic"
        }
    },
    "qwen3": {
        "877c95aa1614afdd964a50bb": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What was patient 10018081's first output time of foley yesterday?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'foley' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day') ORDER BY outputevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(outputevents.charttime) AS first_output_time FROM outputevents JOIN d_items ON outputevents.itemid = d_items.itemid WHERE outputevents.subject_id = 10018081 AND d_items.label = 'Foley catheter' AND datetime(outputevents.charttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day');",
            "gold_answer": "[['2100-12-30 00:00:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the wrong label ('Foley catheter' instead of the correct 'foley') in the WHERE clause against d_items.label. This is a localized mistake in the value literal\u2014not a structural issue with joins, subqueries, or overall query logic. Correcting the label string would allow the query to return the proper result without altering its structure or join logic.",
            "error_type": "Local: value parsing error"
        }
    }
}