{
    "qwen3": {
        "8f72cf5cec372b0b9bd6f99a": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Among patients diagnosed with staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified, what are the top three most common lab tests conducted within the same hospital visit since 1 year ago?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "SELECT d.label FROM diagnoses_icd JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id JOIN labevents ON admissions.hadm_id = labevents.hadm_id JOIN d_labitems d ON labevents.itemid = d.itemid WHERE d_icd_diagnoses.long_title = 'staphylococcus infection in conditions classified elsewhere and of unspecified site, staphylococcus, unspecified' AND datetime(admissions.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY d.label ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['magnesium'], ['ph'], ['sodium']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic of the query by joining diagnoses, admissions, and lab events directly and only filtering on admissions.admittime within the last year. It fails to:\n1. Restrict lab tests to those occurring after the diagnosis time within the same hospital visit.\n2. Filter lab events based on their own charttime relative to one year ago.\n3. Use a ranking function or subquery to ensure the correct top-N logic based on the count of lab tests following diagnosis.\nThese issues span multiple clauses (JOIN conditions, WHERE filters, grouping/ordering logic) and require restructuring the query\u2019s core approach, not just a local fix to a specific clause or reference.",
            "error_type": "Global: incorrect join and filtering logic"
        },
        "fc6b47d6d81425ba4da6b862": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many prescriptions were prescribed for oxycodone-acetaminophen (5mg-325mg)?",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.drug = 'oxycodone-acetaminophen (5mg-325mg)'",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE drug = 'oxycodone-acetaminophen 5mg-325mg';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong string literal in the WHERE clause: 'oxycodone-acetaminophen 5mg-325mg' instead of the correct 'oxycodone-acetaminophen (5mg-325mg)'. This is a localized mistake in the value used for filtering and can be fixed by correcting the literal without altering the overall structure or logic of the query.",
            "error_type": "Local: value parsing error"
        }
    },
    "codes": {
        "447895a7f06300c849dd8e20": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many times did patient 10039997 have other computer assisted surgery this year?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other computer assisted surgery' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT count(procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE procedures_icd.subject_id = 10039997 AND d_icd_procedures.long_title LIKE '%other computer-assisted surgery%' AND strftime('%Y', procedures_icd.charttime) = '2100'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses a LIKE pattern with a hyphen ('other computer-assisted surgery') that does not match the actual long_title ('other computer assisted surgery'). This is a localized mistake in the WHERE clause\u2019s string comparison and can be fixed by correcting the literal. The overall query structure (joins, filters by year and patient) remains appropriate, so the error is local.",
            "error_type": "Local: value parsing error"
        },
        "bda21105f3950d0b64ef875d": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me the average overall hospital cost that includes cyanocobalamin this year?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'cyanocobalamin' ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) / count(DISTINCT admissions.hadm_id) FROM cost INNER JOIN prescriptions ON cost.event_id = prescriptions.row_id WHERE strftime('%Y', prescriptions.starttime) = '2100' AND prescriptions.drug = 'cyanocobalamin'",
            "gold_answer": "[['1605.733']]",
            "pred_answer": "no such column: admissions.hadm_id",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error arises from referencing admissions.hadm_id in the COUNT(DISTINCT ...) clause even though the admissions table is never included in the FROM or JOIN clauses. This is a localized column/table reference mistake and can be corrected by adding the appropriate JOIN to admissions without restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "d7ab6fd0f27ec7c921da3de4": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Tell me patient 10021487's minimum magnesium, urine value since 12 months ago?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'magnesium, urine' ) AND datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-12 month')",
            "pred_sql": "SELECT itemid, value FROM outputevents WHERE subject_id = 10021487 AND itemid IN (225168, 51301) AND charttime > datetime('now', '-12 month') GROUP BY itemid, value ORDER BY value LIMIT 1",
            "gold_answer": "[['13.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the outputevents table instead of the labevents table, selects incorrect itemids, uses the wrong columns (value vs valuenum), filters by subject_id rather than hadm_id via admissions, and employs 'now' instead of the fixed datetime. These errors span multiple clauses and fundamentally alter the intended data source and logic, requiring a full restructuring of the query.",
            "error_type": "Global: wrong table/logic"
        },
        "52fcf1b4e0958500d0e297af": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10004720 have their last chest tube #1 output since 174 days ago?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'chest tube #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime) >= datetime('2100-12-31 23:59:00','-174 day') ORDER BY outputevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT outputevents.charttime FROM chartevents INNER JOIN outputevents ON chartevents.row_id = outputevents.row_id WHERE outputevents.subject_id = 10004720 AND chartevents.itemid = 225667 AND outputevents.itemid = 225667 AND outputevents.charttime >= ( SELECT max(charttime) - 174 FROM outputevents WHERE subject_id = 10004720 AND itemid = 225667 )",
            "gold_answer": "[['2100-12-08 18:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an INNER JOIN between chartevents and outputevents on row_id, which is not a valid relationship and fundamentally changes the query\u2019s logic. It also misuses chartevents rather than outputevents for the chest tube output, applies an incorrect subquery for the date filter (subtracting 174 from a timestamp), and fails to follow the schema\u2019s correct linking via d_items. These issues span multiple clauses and require restructuring the overall query (fixing the join, using the correct table, and properly computing the date filter), so this is not a localized typo but a structural, logical error.",
            "error_type": "Global: join error"
        }
    },
    "dinsql": {
        "cb642fb7cc4c268dd4595162": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the minimum lymphs value for patient 10027602 since 11/2100?",
            "gold_sql": "SELECT MIN(labevents.valuenum) FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'lymphs' ) AND strftime('%Y-%m',labevents.charttime) >= '2100-11'",
            "pred_sql": "SELECT MIN(valuenum) AS min_lymphs_value FROM labevents WHERE subject_id = 10027602 AND itemid = (SELECT itemid FROM d_labitems WHERE label = 'lymphs') AND datetime(charttime,'start of month') = datetime('2100-12-31 23:59:00','start of year') AND strftime('%m',charttime) >= '11'",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL mis\u2010filters the charttime date. Instead of using the correct strftime('%Y-%m',\u2026) \u2265 '2100-11', it incorrectly compares datetime(charttime,'start of month') to datetime('2100-12-31 23:59:00','start of year') and then only checks the month part. This is a local mistake in the WHERE clause\u2019s date condition and does not alter the overall query structure or table relationships. It\u2019s a localized temporal filter error.",
            "error_type": "Local: temporal filter error"
        },
        "f359ba8d9b0dd17dff365533": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "When was patient 10027602 last discharged from the hospital?",
            "gold_sql": "SELECT admissions.dischtime FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.dischtime DESC LIMIT 1",
            "pred_sql": "dischtime FROM admissions WHERE subject_id = 10027602 AND dischtime IS NOT NULL ORDER BY dischtime DESC LIMIT 1",
            "gold_answer": "[['2100-12-14 14:45:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the column list, causing a syntax error in the SELECT clause. This is a localized mistake that does not alter the overall intended query logic and can be fixed by adding the missing keyword.",
            "error_type": "Local: syntax error"
        },
        "c3f597f199ba2bb53d59195e": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Could you tell me the name of the drug that was prescribed two or more times since 10/2100 to patient 10009628?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, COUNT( prescriptions.starttime ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10009628 ) AND strftime('%Y-%m',prescriptions.starttime) >= '2100-10' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT drug FROM prescriptions WHERE subject_id = 10009628 AND datetime(starttime) >= datetime('2100-10-01') GROUP BY drug HAVING COUNT(DISTINCT starttime) >= 2",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['5% dextrose (excel bag)'], ['acetaminophen'], ['aspirin ec'], ['atorvastatin'], ['docusate sodium'], ['furosemide'], ['glipizide'], ['glucagon'], ['insulin'], ['insulin human regular'], ['iso-osmotic dextrose'], ['isosorbide dinitrate'], ['ketorolac'], ['metformin (glucophage)'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['mupirocin nasal ointment 2%'], ['ondansetron'], ['sodium chloride 0.9%  flush'], ['syringe'], ['vancomycin'], ['warfarin']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['aspirin ec'], ['atorvastatin'], ['docusate sodium'], ['furosemide'], ['glipizide'], ['glucagon'], ['insulin'], ['insulin human regular'], ['isosorbide dinitrate'], ['ketorolac'], ['metformin (glucophage)'], ['metoclopramide'], ['metoprolol tartrate'], ['milk of magnesia'], ['mupirocin nasal ointment 2%'], ['ondansetron'], ['sodium chloride 0.9%  flush'], ['warfarin']]",
            "is_correct": false,
            "reasoning": "The only structural difference is within the aggregation: the predicted SQL uses COUNT(DISTINCT starttime) instead of COUNT(starttime). This incorrect DISTINCT suppresses multiple prescriptions that happen at the same timestamp, causing drugs to be dropped from the results. No joins or overall query logic are altered\u2014only a localized misuse of DISTINCT in the HAVING clause\u2014so it\u2019s a local aggregation error.",
            "error_type": "Local: COUNT-distinct error"
        }
    },
    "deepseek": {
        "db61cc4531aaaed9ef749ac0": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients died after having been diagnosed with other specified places as the place of occurrence of the external cause within the same hospital visit?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT T1.subject_id, T1.charttime, T1.hadm_id FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other specified places as the place of occurrence of the external cause' ) ) AS T1 GROUP BY T1.subject_id HAVING MIN(T1.charttime) = T1.charttime ) AS T2 JOIN ( SELECT patients.subject_id, admissions.hadm_id, patients.dod FROM admissions join patients ON patients.subject_id = admissions.subject_id WHERE patients.dod IS NOT NULL ) as T3 ON T2.subject_id = T3.subject_id AND T2.hadm_id = T3.hadm_id",
            "pred_sql": "SELECT COUNT(D;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is just \u201cSELECT COUNT(D;\u201d which is an incomplete statement causing a \u201cnear \";\": syntax error\u201d. This error is confined to the SELECT clause and is purely a syntax mistake, not affecting the overall logic or table relationships. It can be fixed by completing the SELECT expression without altering the query\u2019s structure.",
            "error_type": "Local: syntax error"
        },
        "ddd17c278574bd46086445c4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How much is the total hospital cost of patient 10001217 during the stay on the last hospital visit?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10001217 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 )",
            "pred_sql": "SELECT SUM(c.cost) AS total_cost FROM cost c WHERE c.subject_id = 10001217 AND c.hadm_id = ( SELECT a.hadm_id FROM admissions a WHERE a.subject_id = 10001217 AND a.dischtime IS NOT NULL ORDER BY a.admittime DESC LIMIT 1;",
            "gold_answer": "[['1721.22']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error: it is missing a closing parenthesis for the subquery and the semicolon is placed incorrectly. This is a localized mistake in the WHERE clause/subquery and does not alter the overall query structure or logic.",
            "error_type": "Local: syntax error"
        },
        "14430504237cf96f2e13d98f": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Find the length in days of the last icu stay for patient 10012552.",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10012552 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND( (strftime('%j', datetime(outtime)) - strftime('%j', datetime(intime))) + ((strftime('%H:%M:%S', outtime) - strftime('%H:%M:%S', intime)) > '00:00:00'), 0 ) AS length_days FROM icustays WHERE subject_id = 10012552 AND outtime IS NOT NULL ORDER BY intime DESC LIMIT 1;",
            "gold_answer": "[['3.159']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s main mistake lies in the SELECT expression: it uses strftime('%j') (day of year) and a custom boolean time\u2010offset trick, rather than strftime('%J') (Julian day) to compute precise day differences. This error is confined to the date arithmetic in the SELECT clause and does not alter the overall query structure (joins, filters, ordering remain intact). Therefore, it is a local error in the temporal calculation.",
            "error_type": "Local: other-temporal error"
        }
    },
    "llm2sql": {
        "de9eceabb0bb25042457a7fc": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Count the number of patients who underwent a microbiological blood culture ( myco/f lytic bottle) test.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT microbiologyevents.hadm_id FROM microbiologyevents WHERE microbiologyevents.spec_type_desc = 'blood culture ( myco/f lytic bottle)' )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) FROM microbiologyevents WHERE test_name = 'myco/f lytic bottle';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on the wrong column: it uses test_name = 'myco/f lytic bottle' instead of spec_type_desc = 'blood culture ( myco/f lytic bottle)'. This is a localized column reference error in the WHERE clause and does not require restructuring the query\u2019s overall logic.",
            "error_type": "Local: table/column reference error"
        }
    },
    "macsql": {
        "4a482741fa67f8148276f6d1": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Is the respiratory rate level of patient 10039997 last measured on the last icu visit greater than the second to last measurement on the last icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) > ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039997 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'respiratory rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH LastTwoMeasurements AS ( SELECT `valuenum` FROM chartevents WHERE `subject_id` = 10039997 AND `stay_id` = ( SELECT `stay_id` FROM icustays WHERE `subject_id` = 10039997 ORDER BY `intime` DESC LIMIT 1 ) AND `itemid` = ( SELECT `itemid` FROM d_items WHERE `label` = 'respiratory rate' ) ORDER BY `charttime` DESC LIMIT 2 ) SELECT CASE WHEN COUNT(*) < 2 THEN 'Insufficient data' WHEN MAX(`valuenum`) = MIN(`valuenum`) THEN 'False' ELSE 'True' END AS result FROM LastTwoMeasurements",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['True']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic in multiple ways. First, it picks the most recent icustay for the patient simply by ORDER BY intime without requiring that it be a completed (outtime IS NOT NULL) ICU visit or even linking through admissions.hadm_id. Second, it ignores the d_items.linksto filter when finding the respiratory rate itemid. Third\u2014and most critically\u2014it aggregates the two measurements with MAX and MIN, which can tell only if the two values differ, not whether the latest measurement is greater than the second\u2010latest. Fixing these issues requires rewriting major parts of the query (subquery criteria and comparison logic), not just tweaking a single clause. Therefore this is a global logic/structure error.",
            "error_type": "Global: flawed logical comparison"
        },
        "f85c9633e450cf4188a98ce3": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Get me the top three most common diagnoses.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT `icd_code` FROM ( SELECT `icd_code`, DENSE_RANK() OVER (ORDER BY `count` DESC) AS `rank` FROM ( SELECT `icd_code`, COUNT(*) AS `count` FROM `diagnoses_icd` GROUP BY `icd_code` ) AS `subquery` ) AS `ranked_diagnoses` WHERE `rank` <= 3",
            "gold_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other and unspecified hyperlipidemia'], ['unspecified essential hypertension']]",
            "pred_answer": "[['icd9|25000'], ['icd9|2724'], ['icd9|4019']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly ranks the top three ICD codes by frequency but then returns the raw `icd_code` values rather than the corresponding diagnosis names (`long_title`). It never joins to `d_icd_diagnoses` to map codes to descriptions. This is a localized mistake in the SELECT clause/table reference and doesn\u2019t require restructuring the query logic or joins beyond adding the missing join and selecting the correct column.",
            "error_type": "Local: table/column reference error"
        },
        "85fb28226e6564139064b07b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What are the five most frequently ordered medications that male patients aged 60 or above are prescribed within the same month after they are diagnosed with chronic kidney disease, stage iii (moderate)?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage iii (moderate)' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'm' ) AND admissions.age >= 60 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT T4.`drug` FROM `prescriptions` AS T4 JOIN ( SELECT DISTINCT T1.`subject_id`, T1.`hadm_id`, T1.`charttime` FROM `diagnoses_icd` AS T1 JOIN `patients` AS T2 ON T1.`subject_id` = T2.`subject_id` JOIN `admissions` AS T3 ON T1.`hadm_id` = T3.`hadm_id` WHERE T1.`icd_code` = (SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` = 'chronic kidney disease, stage iii (moderate)') AND T2.`gender` = 'm' AND (strftime('%Y', T1.`charttime`) - strftime('%Y', T2.`dob`)) - (strftime('%m-%d', T1.`charttime`) < strftime('%m-%d', T2.`dob`)) >= 60 ) AS T5 ON T4.`subject_id` = T5.`subject_id` AND T4.`hadm_id` = T5.`hadm_id` WHERE datetime(T4.`starttime`, 'start of month') = datetime(T5.`charttime`, 'start of month') GROUP BY T4.`drug` ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['albuterol inhaler'], ['allopurinol'], ['aspirin'], ['furosemide'], ['insulin'], ['ipratropium-albuterol neb'], ['isosorbide dinitrate'], ['losartan potassium'], ['multivitamin iv'], ['pantoprazole'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['allopurinol'], ['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins prescriptions back to the diagnosis using both subject_id AND hadm_id, restricting medications to the same hospital admission rather than any prescription within the same month post\u2010diagnosis. It also fails to enforce the prescription time being after the diagnosis time and even computes age manually instead of using the stored age column. These mistakes alter the fundamental logic of which prescriptions get counted and require restructuring the JOIN/WHERE logic, so this is a global logic error.",
            "error_type": "Global: join relationship error"
        },
        "9816a74cbdf3f4145eb7ec03": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Tell me the top four most frequent procedures that patients received within the same hospital visit after being diagnosed with spontaneous bacterial peritonitis since 1 year ago.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'spontaneous bacterial peritonitis' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT `icd_code` FROM ( SELECT `icd_code`, COUNT(*) AS `frequency` FROM `procedures_icd` WHERE `hadm_id` IN ( SELECT DISTINCT `hadm_id` FROM `diagnoses_icd` WHERE `icd_code` = (SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` LIKE '%spontaneous bacterial peritonitis%') AND datetime(`charttime`) >= datetime('2100-12-31 23:59:00', '-1 year') ) GROUP BY `icd_code` ORDER BY `frequency` DESC LIMIT 4 )",
            "gold_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['control of epistaxis by anterior nasal packing'], ['insertion of endotracheal tube'], ['laryngoscopy and other tracheoscopy'], ['other endoscopy of small intestine'], ['percutaneous abdominal drainage'], ['rhinoscopy'], ['venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['icd9|4513'], ['icd9|5491'], ['icd9|9604'], ['icd9|9671']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic. It selects procedures for any admission that had a spontaneous bacterial peritonitis diagnosis in the last year, but it does not enforce that each procedure occurred after the diagnosis time within the same visit. It also fails to join on patient and admission to compare charttimes and does not retrieve the procedure names. These errors span multiple clauses (WHERE, subquery logic, missing temporal join) and alter the intended meaning of the query, requiring a restructuring of the overall query logic.",
            "error_type": "Global: incorrect temporal join and filtering logic"
        },
        "ffcc6f49ecc0514e980b88a6": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Among patients in their 20s , what are the top four prescribed drugs since 1 year ago?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT drug FROM ( SELECT PR.drug, COUNT(*) AS count, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS rank FROM prescriptions PR JOIN ( SELECT DISTINCT A.subject_id FROM admissions A JOIN patients P ON A.subject_id = P.subject_id WHERE A.age BETWEEN 20 AND 29 ) AS SubQuery ON PR.subject_id = SubQuery.subject_id WHERE datetime(PR.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY PR.drug ) AS RankedDrugs WHERE rank <= 4",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['metoprolol tartrate'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['acetaminophen'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters patients by subject_id instead of hadm_id, thereby aggregating prescriptions across all admissions for those patients rather than only those admissions where age is between 20 and 29. This incorrect join on subject_id versus hadm_id fundamentally changes the query logic and results, requiring a structural correction beyond a local fix.",
            "error_type": "Global: incorrect join relationship"
        },
        "55a31e2e8ac1f0d6f0026827": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Since 1 year ago, how many patients received 0.9% sodium chloride within 2 months after being diagnosed with candidal stomatitis?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'candidal stomatitis' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = '0.9% sodium chloride' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) AS num_patients FROM ( SELECT DISTINCT T1.subject_id, T1.hadm_id, T1.charttime AS diagnosis_time FROM diagnoses_icd AS T1 JOIN d_icd_diagnoses AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title = 'candidal stomatitis' AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') ) AS D JOIN prescriptions AS P ON D.subject_id = P.subject_id AND D.hadm_id = P.hadm_id WHERE P.drug = '0.9% sodium chloride' AND datetime(P.starttime) <= datetime(D.diagnosis_time, '+2 months') AND datetime(P.starttime) >= datetime(D.diagnosis_time)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL references T1.subject_id in the outer SELECT, but the subquery is aliased as D, not T1. This is a simple aliasing/column reference mistake\u2014T1 is undefined in that scope. It can be fixed locally by changing T1.subject_id to D.subject_id without altering the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        }
    },
    "o4-mini": {
        "6573046adda4f59cefa283c3": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "On last month/28, when did patient 10027602 the first time have a cerebral ventricular #1 event?",
            "gold_sql": "SELECT outputevents.charttime FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'cerebral ventricular #1' AND d_items.linksto = 'outputevents' ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '28' ORDER BY outputevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT ce.charttime FROM chartevents ce JOIN d_items di ON ce.itemid = di.itemid WHERE ce.subject_id = 10027602 AND di.label = 'cerebral ventricular #1' AND datetime(ce.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', ce.charttime) = '28' ORDER BY ce.charttime LIMIT 1;",
            "gold_answer": "[['2100-11-28 00:00:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the chartevents table instead of the outputevents table, fundamentally altering the data source and join logic. The ground truth query correctly restricts to outputevents linked via icustays and admissions, whereas the prediction treats the event as a chart event. This is not a simple column or syntax mistake but changes the overall structure and intended meaning of the query.",
            "error_type": "Global: wrong-table join error"
        }
    }
}