{
    "codes": {
        "df83a028a4e46a89dbf24e19": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Reveal the drug name that was first prescribed to patient 10004733 via the pr route on the current hospital visit?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND prescriptions.route = 'pr' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT prescriptions.drug FROM prescriptions INNER JOIN transfers ON prescriptions.subject_id = transfers.subject_id INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE prescriptions.subject_id = 10004733 AND prescriptions.route = 'pr' AND admissions.dischtime IS NULL",
            "gold_answer": "[['bisacodyl']]",
            "pred_answer": "[['bisacodyl'], ['bisacodyl'], ['bisacodyl'], ['bisacodyl']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended query logic by introducing unnecessary JOINs with the transfers table and failing to restrict to a single result via ORDER BY and LIMIT. Instead of selecting the patient\u2019s current admission directly from admissions and then finding the earliest pr prescription, it joins through transfers (using subject_id rather than hadm_id) and omits ordering and limiting. This is not a simple syntax or value error but a structural flaw in how tables are related, requiring a rework of the query\u2019s join strategy.",
            "error_type": "Global: incorrect-join error"
        },
        "b9a2ff88ca010b7574afeae1": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Show me patient 10015272's daily maximum weight since 1 year ago.",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10015272 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'daily weight' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY strftime('%Y-%m-%d',chartevents.charttime)",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10015272 AND chartevents.itemid IN (226739) AND chartevents.valuenum IS NOT NULL AND chartevents.charttime >= datetime('now', 'start of day', '-1 year')",
            "gold_answer": "[['53.2']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in local parts of the query: it fails to GROUP BY day to produce daily maxima, uses the wrong datetime reference (\u2018now\u2019 instead of the fixed '2100-12-31 23:59:00'), and joins unnecessarily on patients.subject_id rather than filtering stays. None of these require reworking the overall query structure or table relationships\u2014they are localized mistakes in the SELECT, WHERE, and GROUP BY logic.",
            "error_type": "Local: missing-group-by error"
        },
        "ee927aa21353efb9824a2eec": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the last medication that was prescribed to patient 10021312 on the first hospital visit?",
            "gold_sql": "SELECT prescriptions.drug FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021312 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.drug FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.subject_id = 10021312 ORDER BY prescriptions.starttime LIMIT 1",
            "gold_answer": "[['morphine sulfate (oral solution) 2 mg/ml']]",
            "pred_answer": "[['alprazolam']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict prescriptions to the first hospital visit and omits the necessary join or subquery on the admissions table to identify that visit. Instead, it simply joins patients to prescriptions by subject_id and orders by starttime (ascending by default), returning the patient\u2019s very first prescription across all admissions. This is a structural flaw affecting multiple clauses (missing admissions filter, wrong ORDER BY direction) and changes the intended logical flow of the query. Therefore, it is a global error.",
            "error_type": "Global: incorrect join logic"
        },
        "a17e722e5ac11cdc8a2d53bb": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10003400 come for the last time to the hospital?",
            "gold_sql": "SELECT admissions.admittime FROM admissions WHERE admissions.subject_id = 10003400 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1",
            "pred_sql": "SELECT admissions.dischtime FROM transfers INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE transfers.subject_id = 10003400 ORDER BY transfers.intime DESC LIMIT 1",
            "gold_answer": "[['2100-06-14 02:25:00']]",
            "pred_answer": "[['2100-06-15 15:05:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the transfers table and its intime field rather than the admissions table and the admittime field. It also omits the filter to exclude ongoing admissions (dischtime IS NOT NULL). By joining the wrong table and selecting the wrong timestamp, it fundamentally alters the query logic and intended meaning. Fixing this requires restructuring the query (changing the FROM table, JOIN, selected column, and WHERE condition), not just a minor tweak to one clause.",
            "error_type": "Global: join error"
        },
        "1197269430f8f6a19263132a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When was the last time when patient 10011398 was prescribed simvastatin and aspirin at the same time on the first hospital visit?",
            "gold_sql": "SELECT T1.starttime FROM ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND prescriptions.drug = 'simvastatin' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND prescriptions.drug = 'aspirin' ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) ORDER BY T1.starttime DESC LIMIT 1",
            "pred_sql": "SELECT prescriptions.starttime FROM prescriptions INNER JOIN transfers ON transfers.subject_id = prescriptions.subject_id WHERE prescriptions.subject_id = 10011398 AND prescriptions.drug IN ('simvastatin', 'aspirin') ORDER BY transfers.intime DESC LIMIT 1",
            "gold_answer": "[['2100-12-27 10:00:00']]",
            "pred_answer": "[['2100-12-26 10:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets the question\u2019s logic. It joins the prescriptions table to the transfers table (irrelevant for determining the first hospital visit from admissions), fails to restrict to the first hospital admission, and uses a simple IN clause rather than ensuring simvastatin and aspirin were prescribed at the exact same time. Fixing this would require restructuring the query with subqueries or self-joins on prescriptions and correctly targeting admissions, not just a minor tweak in a single clause.",
            "error_type": "Global: join error"
        },
        "ff6f325e6403ec05870aa381": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Is the mcv value second measured on the first hospital visit in patient 10006053 less than the value from the first measurement on the first hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mcv' ) ORDER BY labevents.charttime ASC LIMIT 1 OFFSET 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006053 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mcv' ) ORDER BY labevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT CASE WHEN sum(CASE WHEN itemid = 723 AND valuenum < 0 THEN 1 ELSE 0 END) > 0 THEN 'Yes' ELSE 'No' END FROM labevents WHERE subject_id = 10006053 AND valuenum < 0",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['No']]",
            "is_correct": false,
            "reasoning": "The predicted SQL does not address the comparison between the second and first MCV measurements on the first hospital visit. Instead, it filters for negative valuenum values and sums them, completely ignoring the required ordering, item filtering for 'mcv', and visit selection logic. This error spans the entire query logic, not just a small syntactic mistake, and thus requires a fundamental restructuring to match the intended meaning.",
            "error_type": "Global: logical inconsistency"
        },
        "398bf7703c6f1d987464d94f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Find out how many patients were discharged from the hospital this year.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is NOT NULL AND datetime(admissions.dischtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT count(subject_id) FROM admissions WHERE dischtime BETWEEN '2100-1-1 00:00:00' AND '2100-12-31 23:59:59'",
            "gold_answer": "[['90.0']]",
            "pred_answer": "[['38.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id), causing it to count admissions rather than unique patients. This is a mistake in the aggregation clause and can be fixed by adding DISTINCT without changing the overall query structure or logic.",
            "error_type": "Local: missing DISTINCT in COUNT"
        }
    },
    "llm2sql": {
        "c1ade02d0de383345c62fe4b": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Count the number of times patient 10025612 received dextrose 5% since 186 days ago.",
            "gold_sql": "SELECT COUNT(*) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10025612 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'dextrose 5%' AND d_items.linksto = 'inputevents' ) AND datetime(inputevents.starttime) >= datetime('2100-12-31 23:59:00','-186 day')",
            "pred_sql": "SELECT COUNT(*) FROM prescriptions WHERE subject_id = 10025612 AND drug = 'dextrose 5%' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-186 days');",
            "gold_answer": "[['37.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and the drug column to count occurrences of 'dextrose 5%', whereas the correct logic requires counting entries in inputevents (joined through icustays and admissions) for input administrations. This is not a simple reference or syntax mistake but a fundamental change in the query\u2019s structure and data relationships, altering the intended meaning. Fixing it requires replacing the entire source table and join logic, so it is a global error.",
            "error_type": "Global: incorrect table selection"
        },
        "c114793bdbb1cbf102c8f53a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the top five drugs that are prescribed to patients within the same month after they are prescribed ondansetron odt this year?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'ondansetron odt' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT P2.drug FROM prescriptions AS P1 INNER JOIN prescriptions AS P2 ON P1.subject_id = P2.subject_id AND strftime('%Y-%m', P1.starttime) = strftime('%Y-%m', P2.starttime) WHERE P1.drug = 'ondansetron odt' AND strftime('%Y', P1.starttime) = '2100' AND P2.drug != 'ondansetron odt' GROUP BY P2.drug ORDER BY COUNT(P2.drug) DESC LIMIT 5;",
            "gold_answer": "[['5% dextrose (excel bag)'], ['bag'], ['iso-osmotic dextrose'], ['lidocaine 1%'], ['midodrine'], ['sodium chloride 0.9%'], ['vancomycin']]",
            "pred_answer": "[['bag'], ['heparin'], ['iso-osmotic dextrose'], ['sodium chloride 0.9%'], ['vancomycin']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly joins prescriptions on the same patient and same month/year, but it omits the crucial condition that the second drug prescription must occur after the ondansetron odt prescription (i.e., P1.starttime < P2.starttime). This missing temporal filter leads to including drugs prescribed before ondansetron odt. The error is localized to the WHERE/JOIN condition without changing the overall query structure or table relationships.",
            "error_type": "Local: missing temporal filter"
        },
        "330a5ea38d543d09f93a3e3c": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Can you tell me the first value of the mchc test for patient 10039708 on the last hospital visit?",
            "gold_sql": "SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039708 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'mchc' ) ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT L.valuenum FROM labevents L + JOIN d_labitems D ON L.itemid = D.itemid + JOIN admissions A ON L.hadm_id = A.hadm_id + WHERE A.subject_id = 10039708 AND D.label = 'mchc' + AND A.admittime = (SELECT MAX(admittime) FROM admissions WHERE subject_id = 10039708 AND dischtime IS NOT NULL) + ORDER BY L.charttime ASC LIMIT 1;",
            "gold_answer": "[['32.3']]",
            "pred_answer": "near \"+\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces stray '+' characters between clauses (e.g., \"FROM labevents L + JOIN d_labitems...\") causing a syntax error near '+'. This is a localized syntax mistake in the JOIN/SELECT construction and does not involve any major logical restructuring or incorrect table relationships. It can be fixed by removing the '+' signs. Therefore, it is a Local error.",
            "error_type": "Local: syntax error"
        }
    },
    "dinsql": {
        "2b72047f0f9ba7d0dd23455f": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many people were given procedure on single vessel two or more times?",
            "gold_sql": "SELECT COUNT(T1.subject_id) FROM ( SELECT admissions.subject_id, COUNT(*) AS C1 FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'procedure on single vessel' ) GROUP BY admissions.subject_id ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(DISTINCT procedures_icd.subject_id) FROM procedures_icd JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'single vessel' GROUP BY procedures_icd.subject_id HAVING COUNT(*) >= 2",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong long_title filter: it checks d_icd_procedures.long_title = 'single vessel' instead of the correct 'procedure on single vessel'. This is a localized mistake in the WHERE clause value rather than a structural issue with joins, grouping, or overall logic. Therefore, it is a fixable error within a single clause without altering the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "d66bbe740798c836c157d171": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the four most frequently ordered lab tests this year for patients of age 20s?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT T2.label FROM labevents AS T1 JOIN d_labitems AS T2 ON T1.itemid = T2.itemid JOIN admissions AS T3 ON T1.hadm_id = T3.hadm_id WHERE strftime('%Y', T1.charttime) = '2100' AND T3.age BETWEEN 20 AND 29 GROUP BY T2.label ORDER BY COUNT(*) DESC LIMIT 4",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['glucose'], ['hematocrit'], ['hemoglobin'], ['white blood cells']]",
            "is_correct": false,
            "reasoning": "The only mistake in the predicted SQL is in the top-N logic: it uses ORDER BY \u2026 LIMIT 4, which returns exactly four tests, whereas the ground-truth uses DENSE_RANK() and includes all tied tests at the fourth rank. All other parts of the query (joins, filters on year and age) match the intent. This is a localized error in the ranking clause and does not require restructuring the overall query logic.",
            "error_type": "Local: incorrect top-N ranking"
        },
        "c30451c0093dfcdea0f2b0bb": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the average total hospital expense that includes unspecified psychosis in 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified psychosis' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT AVG(total_cost) FROM ( SELECT SUM(DISTINCT C.cost) AS total_cost FROM cost AS C INNER JOIN diagnoses_icd AS D ON C.event_id = D.row_id AND C.event_type = 'diagnoses_icd' INNER JOIN d_icd_diagnoses AS DI ON D.icd_code = DI.icd_code INNER JOIN admissions AS A ON C.hadm_id = A.hadm_id WHERE DI.long_title LIKE '%unspecified psychosis%' AND strftime('%Y', A.admittime) = '2100' GROUP BY C.hadm_id ) AS hospital_expenses",
            "gold_answer": "[['1055.09']]",
            "pred_answer": "[['8.23']]",
            "is_correct": false,
            "reasoning": "The predicted SQL errs in the WHERE clause by filtering admissions.admittime instead of cost.chargetime, and joins to admissions needlessly. It also uses SUM(DISTINCT C.cost) instead of SUM(cost.cost). These are confined to a specific clause (the WHERE and aggregate), not altering the overall join strategy or query structure. Hence, this is a localized mistake rather than a fundamental structural or logical flaw.",
            "error_type": "Local: incorrect column reference in WHERE and aggregate clause"
        }
    },
    "deepseek": {
        "9617359518e066b622aeda10": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me the length of stay in days for patient 10008454's first hospital stay.",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10008454 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%j', dischtime) - strftime('%j', admittime)) + 1 AS length_of_stay FROM admissions WHERE subject_id = 10008454 ORDER BY admittime ASC LIMIT 1;",
            "gold_answer": "[['10.39']]",
            "pred_answer": "[['11.39']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake is confined to the way it computes the date difference: it uses the '%j' specifier (day of year) instead of '%J' (Julian day) and arbitrarily adds +1, rather than correctly subtracting Julian days. This is a localized error in the temporal calculation and does not affect other parts of the query\u2019s structure or logic.",
            "error_type": "Local: other-temporal error"
        },
        "b509e60b483133af19e1b147": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "On the last hospital visit, when was the last prescription of fexofenadine issued for patient 10019385?",
            "gold_sql": "SELECT prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019385 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'fexofenadine' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(p.starttime) AS last_prescription_time FROM prescriptions p WHERE p.subject;",
            "gold_answer": "[['2100-03-24 04:00:00']]",
            "pred_answer": "no such column: p.subject",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column p.subject, causing a column reference error. This is a localized mistake in the WHERE clause and can be fixed without changing the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "6742c3d30fd69b03c9e78123": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you provide the top three most frequent lab tests that within the same month took place after patients had other repair of vessel since 1 year ago?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'other repair of vessel' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH vessel_repair AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS procedure_date FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title = 'other repair of vessel' AND p.charttime >= '2099-12-31' AND p.charttime <= '2100-12-31' ) SELECT DISTINCT dl.label AS lab_test FROM labevents le JOIN d_labitems dl ON le.itemid =;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['mch'], ['mchc'], ['mcv'], ['platelet count'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error in the JOIN clause: 'JOIN d_labitems dl ON le.itemid =;'. It is missing the right-hand side of the equality (dl.itemid) and thus fails to form a valid join. This is a localized mistake in the JOIN syntax and does not reflect a broader logical misunderstanding of the query\u2019s intent.",
            "error_type": "Local: syntax error"
        },
        "89925fb3592920a7a4050587": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me what was the total rectal tube output of patient 10029291 on the last icu visit.",
            "gold_sql": "SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) AND outputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'rectal tube' AND d_items.linksto = 'outputevents' )",
            "pred_sql": "SELECT SUM(oe.value) AS total_rectal_tube_output FROM outputevents oe INNER JOIN d_items di ON oe.itemid = di.itemid INNER JOIN icustays icu ON oe.stay_id = icu.stay_id WHERE oe.subject_id;",
            "gold_answer": "[['775.0']]",
            "pred_answer": "[['693796.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to implement the core logic of the question: it does not filter for the specific patient (10029291), does not restrict to the last ICU stay, and does not filter outputevents to \u2018rectal tube\u2019 via d_items.label and linksto. Instead, it leaves an incomplete WHERE clause and broadly joins all stays and all outputevents. These omissions break the overall query structure and intended multi\u2010step filtering logic, so it is not a localized typo or minor JOIN mistake but a fundamental logical/structural error.",
            "error_type": "Global: missing filter conditions"
        }
    },
    "macsql": {
        "6ec3d822a1210a2412249fd7": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What were the number of patients who received a creatine kinase (ck) test?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatine kinase (ck)' ) )",
            "pred_sql": "SELECT COUNT(DISTINCT `subject_id`) AS num_patients FROM labevents WHERE `itemid` IN ( SELECT DISTINCT `itemid` FROM d_labitems WHERE `label` LIKE '%creatine kinase%' OR `label` LIKE '%ck%' )",
            "gold_answer": "[['40.0']]",
            "pred_answer": "[['46.0']]",
            "is_correct": false,
            "reasoning": "The predicted query errs only in the WHERE clause\u2019s filter: it uses a broad LIKE '%creatine kinase%' OR '%ck%' match on d_labitems.label, which pulls in additional tests containing the substring \"ck\". This is a localized mistake in the filtering logic and does not alter the overall query structure or relationships between tables. Hence, it\u2019s a local error.",
            "error_type": "Local: incorrect filter condition"
        }
    },
    "o4-mini": {
        "b5e2d13489c812114277fe37": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Can you list the top five most frequent procedures that patients received within 2 months after having venous catheterization, not elsewhere classified this year?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "WITH venous AS ( SELECT pi.subject_id, pi.hadm_id, pi.charttime AS ven_time FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code WHERE dp.long_title = 'venous catheterization, not elsewhere classified' AND strftime('%Y', pi.charttime) = '2100' ) SELECT dp.long_title FROM procedures_icd pi JOIN d_icd_procedures dp ON pi.icd_code = dp.icd_code JOIN venous v ON pi.subject_id = v.subject_id AND pi.hadm_id = v.hadm_id AND pi.charttime > v.ven_time AND pi.charttime <= datetime(v.ven_time, '+2 months') WHERE dp.long_title <> 'venous catheterization, not elsewhere classified' GROUP BY dp.long_title ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['arteriography of femoral and other lower extremity arteries'], ['arteriography of other intra-abdominal arteries'], ['arteriography of other specified sites'], ['central venous catheter placement with guidance'], ['closed [aspiration] [needle] [percutaneous] biopsy of pancreas'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['control of epistaxis by anterior nasal packing'], ['diagnostic ultrasound of digestive system'], ['insertion of endotracheal tube'], ['laryngoscopy and other tracheoscopy'], ['other endoscopy of small intestine'], ['other endovascular procedures on other vessels'], ['rhinoscopy'], ['transcatheter embolization for gastric or duodenal bleeding']]",
            "pred_answer": "[['arteriography of other intra-abdominal arteries'], ['insertion of endotracheal tube'], ['other endoscopy of small intestine'], ['rhinoscopy'], ['transcatheter embolization for gastric or duodenal bleeding']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts the follow-up procedures to the same hadm_id (hospital admission) as the venous catheterization, whereas the question asks for any procedures received by the same patient within two months after venous catheterization (regardless of admission). This incorrect join on hadm_id fundamentally changes which procedures are counted and excludes valid procedures in other admissions, altering the overall logic of the query rather than just a small clause.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "qwen3": {
        "170e423407c175442b5fc342": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When did patient 10004733 have a last creatine kinase (ck) laboratory test in 12/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'creatine kinase (ck)' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '12' ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT l.charttime FROM labevents l JOIN d_labitems d ON l.itemid = d.itemid WHERE l.subject_id = 10004733 AND d.label = 'creatine kinase' AND strftime('%Y-%m', l.charttime) = '2100-12' ORDER BY l.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-12-31 21:15:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on d.label = 'creatine kinase' instead of the exact label 'creatine kinase (ck)', so it returns no rows. This is a localized mistake in the WHERE clause value, not a structural or multi-clause logic issue.",
            "error_type": "Local: value parsing error"
        }
    }
}