{
    "codes": {
        "b73cc134ff0fb035aea49ce3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 1 year ago, what are the top five most commonly taken specimens for patients within the same hospital visit after the diagnosis of disruption of external operation (surgical) wound?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'disruption of external operation (surgical) wound' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT inputevents.itemid FROM inputevents INNER JOIN icustays ON inputevents.hadm_id = icustays.hadm_id WHERE icustays.row_id = 1 AND inputevents.itemid IN (30046, 30049, 30015, 30227) AND inputevents.starttime > icustays.intime AND icustays.los >= 1 AND icustays.los <= 14 GROUP BY icustays.hadm_id ORDER BY count(icustays.hadm_id) DESC LIMIT 5",
            "gold_answer": "[['blood culture'], ['catheter tip-iv'], ['pleural fluid'], ['swab'], ['tissue']]",
            "pred_answer": "no such column: icustays.los",
            "is_correct": false,
            "reasoning": "The predicted SQL uses entirely the wrong tables and columns (inputevents and icustays with a non\u2010existent los column) instead of microbiologyevents and spec_type_desc. This is not a small typo or local reference fix: it fundamentally alters the query\u2019s logic and intended meaning, joining unrelated tables and filtering by incorrect fields. Hence it is a global structural/join error.",
            "error_type": "Global: join error"
        },
        "16cba15cbeaf0d1e2eb9b512": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last height record for patient 10003046?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003046 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'height (cm)' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT charttime FROM chartevents WHERE subject_id = 10003046 AND itemid = 223835 ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['173.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL commits errors confined to specific clauses without altering the overall query structure. It selects the wrong column (charttime instead of valuenum) and uses a hard-coded itemid rather than joining through d_items to filter 'height (cm)'. These are localized reference mistakes in the SELECT and WHERE clauses that can be corrected without restructuring the query logic.",
            "error_type": "Local: column reference error"
        },
        "3721cde992fe90dd17278b65": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have passed since the patient 10021487 was admitted to the hospital?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', admissions.admittime) ) FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL",
            "pred_sql": "SELECT strftime('%J', admissions.admittime) - strftime('%J', admissions.dischtime) AS days FROM admissions WHERE admissions.subject_id = 10021487",
            "gold_answer": "[['599.6']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL stays within the same admissions table and does not alter the overall query structure or logical flow (no additional joins or subqueries). Its mistakes are confined to specific parts: it subtracts admissions.dischtime (instead of using the fixed current time) from admissions.admittime, fails to filter for the current admission (dischtime IS NULL), and omits multiplying the day difference by 24 to get hours. These are localized errors in column reference, filtering, and calculation, not a structural or multi-clause logic flaw.",
            "error_type": "Local: column reference and calculation errors"
        },
        "6a5ef8d7e70eca0770704d97": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of patients who were tested this year for absolute basophil count.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT labevents.hadm_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'absolute basophil count' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT count(DISTINCT subject_id) FROM microbiologyevents WHERE strftime('%Y', charttime) = '2100' AND test_name = 'Absolute basophil count (abs)'",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the microbiologyevents table and the test_name column to find 'Absolute basophil count (abs)', whereas the question and ground truth require checking labevents.itemid via d_labitems.label for 'absolute basophil count'. This is not a simple typo or missing alias but a completely incorrect table and column choice that changes the entire logic of the query.",
            "error_type": "Global: incorrect-table selection"
        },
        "90e45765e5cde71e74fdd56f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "among patients in their 50s , what are the top three prescribed drugs?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT prescriptions.drug FROM patients INNER JOIN admissions ON patients.subject_id = admissions.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE strftime('%Y', patients.dob) > strftime('%Y', 'now') - 50 ORDER BY prescriptions.starttime DESC LIMIT 3",
            "gold_answer": "[['0.9% sodium chloride'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['glucagon'], ['insulin'], ['vial']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets both the age filter and the ranking requirement. It uses a birth\u2010year comparison instead of filtering admissions.age between 50 and 59, and it orders by prescription start time rather than counting and ranking drugs by frequency. This is not a small fix within a single clause but a pervasive logic error affecting the overall structure and intended meaning of the query.",
            "error_type": "Global: incorrect age filter and ranking logic"
        },
        "c7cc751a4a76814634ed9b9a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What does a procedure under the name excision of axillary lymph node cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'procedures_icd' AND cost.event_id IN ( SELECT procedures_icd.row_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'excision of axillary lymph node' ) )",
            "pred_sql": "SELECT procedures_icd.icd_code FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.icd_code = procedures_icd.icd_code WHERE d_icd_procedures.long_title = 'Excision of axillary lymph node'",
            "gold_answer": "[['8.16']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the cost table and the filtering on cost.event_type, selecting only the ICD code instead of retrieving the cost. Fixing this would require adding the cost table, its WHERE clause, and the nested subquery logic, which alters the overall structure and purpose of the query rather than a minor clause adjustment.",
            "error_type": "Global"
        }
    },
    "macsql": {
        "8cf537f8c0490a2c2e20f1e4": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Among patients who received percutaneous abdominal drainage , what are the top three most frequent medications that followed within 2 months since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous abdominal drainage' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT T3.drug, COUNT(*) AS freq FROM prescriptions AS T3 JOIN ( SELECT DISTINCT T1.subject_id, T1.hadm_id FROM procedures_icd AS T1 JOIN d_icd_procedures AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title LIKE '%percutaneous abdominal drainage%' ) AS T4 ON T3.subject_id = T4.subject_id AND T3.hadm_id = T4.hadm_id WHERE datetime(T3.starttime) >= datetime('2100-12-31 23:59:00', '-1 year') AND datetime(T3.starttime) < datetime('2100-12-31 23:59:00', '-1 year', '+2 months') GROUP BY T3.drug ORDER BY freq DESC LIMIT 3 )",
            "gold_answer": "[['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic by applying a fixed two-month window from exactly one year ago across all patients, rather than finding medications that occurred within two months after each patient\u2019s individual procedure date. It also drops the critical condition tying prescriptions\u2019 start times to the procedure charttime. Fixing this requires restructuring the query (using a correlated join or subquery) to compare each procedure\u2019s charttime to prescription times, not simply filtering on a static date range. This is not a simple typo or local syntax fix but a global change in query structure and logic.",
            "error_type": "Global: incorrect time filter and join logic"
        },
        "f3e413b353d189c95c0a9693": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Show me the top three most common input events.",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.itemid IN ( SELECT T1.itemid FROM ( SELECT inputevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM inputevents GROUP BY inputevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "WITH RankedEvents AS ( SELECT `itemid`, COUNT(*) AS `event_count`, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS `rank` FROM `inputevents` GROUP BY `itemid` ) SELECT `itemid` FROM RankedEvents WHERE `rank` <= 3",
            "gold_answer": "[['dextrose 5%'], ['nacl 0.9%'], ['solution']]",
            "pred_answer": "[['220949.0'], ['225158.0'], ['225943.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly ranks the top three inputevents by count but only selects the itemid values from the inputevents table and never joins to d_items to retrieve the human-readable labels. The missing JOIN to d_items and the incorrect SELECT clause are confined to a specific part of the query and can be fixed by adding the join and selecting d_items.label. This does not require restructuring the overall logic of the ranking query.",
            "error_type": "Local: table reference error"
        },
        "aa55ddc8a4466fa3f1890200": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of medicines that patient 10011398 was prescribed this month.",
            "gold_sql": "SELECT COUNT(*) FROM prescriptions WHERE prescriptions.hadm_id IN( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10011398 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month')",
            "pred_sql": "SELECT COUNT(DISTINCT `drug`) AS num_medicines FROM prescriptions WHERE `subject_id` = 10011398 AND strftime('%Y-%m', `starttime`) = '2100-12'",
            "gold_answer": "[['53.0']]",
            "pred_answer": "[['28.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in its aggregation and minor filtering details within the SELECT/WHERE clauses: it uses COUNT(DISTINCT drug) instead of COUNT(*) and filters on subject_id directly rather than via hadm_id subquery. These are local mistakes in how the count and filter are applied\u2014not a fundamentally different join strategy or overall query structure. Hence this is a localized aggregation/filter error.",
            "error_type": "Local: aggregation error"
        }
    },
    "qwen3": {
        "6adc7de0c5d946d7b38ae9ad": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the total number of patients who underwent insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s) since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s)' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.subject_id) FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'insertion of four or more vascular stents' AND p.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong procedure name in the WHERE clause ('insertion of four or more vascular stents' instead of the target 'insertion of non-drug-eluting peripheral (non-coronary) vessel stent(s)'). This is a localized filter value error; the overall query structure, joins, and aggregations are correct, but the literal used to filter the desired procedure is incorrect.",
            "error_type": "Local: value parsing error"
        },
        "f5f51233cf706b9edea9efe9": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "How many patients received unilateral simple mastectomy within the same hospital visit after being diagnosed with malignant neoplasm of central portion of female breast?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'malignant neoplasm of central portion of female breast' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'unilateral simple mastectomy' ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) FROM diagnoses_icd d JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code JOIN procedures_icd p ON d.subject_id = p.subject_id AND d.hadm_id = p.hadm_id JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dd.long_title = 'Malignant neoplasm of central portion of female breast' AND dp.long_title = 'Unilateral simple mastectomy' AND d.charttime <= p.charttime;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the comparison operator: it uses d.charttime <= p.charttime instead of the strict < used in the ground truth. This is a minor change in the WHERE clause that doesn\u2019t alter the overall query structure or the join logic, and can be fixed by replacing <= with <. Therefore, it is a localized error in the condition, not a flaw in the global logic or joins.",
            "error_type": "Local: comparison operator error"
        },
        "4431d6ea6f7771a84c1d5374": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Which care unit did patient 10027602 last enter on the first hospital visit, as recorded in the transfer record?",
            "gold_sql": "SELECT transfers.careunit FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1 ) AND transfers.careunit IS NOT NULL ORDER BY transfers.intime DESC LIMIT 1",
            "pred_sql": "SELECT t.careunit FROM transfers t JOIN admissions a ON t.hadm_id = a.hadm_id WHERE a.subject_id = 10027602 AND a.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10027602 ORDER BY admittime ASC LIMIT 1 ) ORDER BY t.intime DESC LIMIT 1;",
            "gold_answer": "[['neurology']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is almost correct in structure\u2014it joins transfers to admissions and picks the first admission by admittime, then orders transfers by intime to get the last careunit. However, it fails to filter out admissions without a discharge time (dischtime IS NOT NULL) and does not exclude NULL careunit values (transfers.careunit IS NOT NULL). These omissions are local mistakes in the WHERE clauses and do not require restructuring the whole query.",
            "error_type": "Local: missing filter condition"
        }
    },
    "deepseek": {
        "ca991e457c98ee7a9ad5a817": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How many patients since 1 year ago were diagnosed with other ascites within the same hospital visit after the diagnosis of unspecified viral hepatitis c without hepatic coma?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'unspecified viral hepatitis c without hepatic coma' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other ascites' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d1.subject_id) AS patient_count FROM diagnoses_icd d1 JOIN d_icd_diagnoses diag1 ON d1.icd_code = diag1.icd_code JOIN diagnoses_icd d2 ON d1.subject_id = d2.subject_id AND d1.hadm_id = d2.hadm_id JOIN d_icd_diagnoses diag2 ON d2.icd_code = diag2.icd_code WHERE diag1.long_title = 'unspecified viral hepatitis c without hepatic coma' AND diag2.long_title = 'other ascites' AND datetime(d2.charttime) > datetime(d1.charttime) AND datetime(d1.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') AND d1.hadm_id IN ( SELECT hadm_id FROM admissions WHERE datetime;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL ends abruptly in the WHERE clause with an incomplete IN subquery (`AND d1.hadm_id IN ( SELECT hadm_id FROM admissions WHERE datetime;`), leading to a syntax error near \";\". This is a localized mistake in the WHERE clause rather than a fundamental misunderstanding of the query\u2019s overall structure or logic.",
            "error_type": "Local: syntax error"
        },
        "1d46dfdd771d6cc0e5fd9703": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the cost of a acute pulmonary edema diagnosis?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'acute pulmonary edema' ) )",
            "pred_sql": "SELECT DISTINCT c.cost;",
            "gold_answer": "[['3.23']]",
            "pred_answer": "no such column: c.cost",
            "is_correct": false,
            "reasoning": "The predicted SQL \u2018SELECT DISTINCT c.cost;\u2019 references alias \u2018c\u2019 and column \u2018cost\u2019 without a FROM clause or table alias definition. This is a localized column/table reference issue and can be fixed by adding the proper FROM cost AS c and appropriate WHERE conditions. It does not alter overall query logic, only the SELECT clause\u2019s table reference. Therefore, it is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "04a0aa7059bb30df4a066313": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What was the name of the lab test that patient 10004733 last had on the current hospital visit?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT labevents.itemid FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT dl.label FROM labevents le JOIN admissions a ON le.hadm_id = a.hadm_id JOIN d_labitems dl ON le.itemid = dl.itemid WHERE le.subject_id = 10004733 AND a.dischtime IS NULL -- current hospital visit ORDER BY le.charttime DESC LIMIT 1;",
            "gold_answer": "[['creatine kinase (ck)']]",
            "pred_answer": "[['% hemoglobin a1c'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['alanine aminotransferase (alt)'], ['albumin'], ['albumin'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['alkaline phosphatase'], ['ammonia'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anion gap'], ['anticardiolipin antibody igg'], ['anticardiolipin antibody igm'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['asparate aminotransferase (ast)'], ['base excess'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['basophils'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bicarbonate'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['bilirubin, total'], ['c-reactive protein'], ['c3'], ['c4'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calcium, total'], ['calculated total co2'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride'], ['chloride']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally the same as the ground truth\u2014it joins labevents to admissions and d_labitems\u2014but its WHERE and ORDER BY usage causes it to return all lab tests sharing the latest charttime rather than the single most recent test. This is a localized mistake in the filtering/ordering logic, not a fundamental join or structural error, so it can be fixed by refining the WHERE/ORDER BY clauses. Therefore it is a Local error.",
            "error_type": "Local: other filter/order error"
        },
        "3a03752928a5b5d2dc708683": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Can you show me the top three most frequent lab tests in 2100?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T1.itemid FROM ( SELECT labevents.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM labevents WHERE strftime('%Y',labevents.charttime) = '2100' GROUP BY labevents.itemid ) AS T1 WHERE T1.C1 <= 3 )",
            "pred_sql": "SELECT T1.label, COUNT(*) as frequency FROM d_labitems T1 INNER JOIN labevents T2 ON T1.itemid = T2.itemid;",
            "gold_answer": "[['chloride'], ['hematocrit'], ['sodium']]",
            "pred_answer": "[['phosphate', '26426.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely omits the WHERE filter on year 2100, the GROUP BY clause, the DENSE_RANK/window function logic, and the ORDER/LIMIT to retrieve the top three tests. Instead, it returns an aggregate over the entire dataset without grouping or filtering, altering the query\u2019s fundamental logic and structure. Fixing this would require adding multiple clauses and restructuring the query, not just a minor tweak.",
            "error_type": "Global"
        },
        "a2cc63130b02eeb31226b139": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When did patient 10025612 receive the lr for the last time on 10/01/2100?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10025612 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'lr' AND d_items.linksto = 'inputevents' ) AND strftime('%Y-%m-%d',inputevents.starttime) = '2100-10-01' ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(starttime) AS last_lr_time FROM inputevents WHERE subject_id = 10025612 AND datetime(starttime,'start of day') = datetime('2100-10-01 00:00:00','start of day','-0 day');",
            "gold_answer": "[['2100-10-01 14:40:00']]",
            "pred_answer": "[['2100-10-01 23:05:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to filter inputevents for the \u2018lr\u2019 label (missing the JOIN/WHERE on d_items.itemid with label = 'lr') and also omits the hospital admission/ICU stay linkage; it simply uses subject_id and date. This is a localized mistake in the WHERE clause (missing condition), not a fundamental restructuring of the query\u2019s logic.",
            "error_type": "Local: missing filter on item"
        },
        "76f0161e2704b0fb1eaf5eae": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Count the number of patients for whom ibuprofen was prescribed within the same hospital visit post their diagnosis of panic disorder without agoraphobia.",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'panic disorder without agoraphobia' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'ibuprofen' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) AS patient_count FROM ( SELECT di.subject_id, MIN(di.charttime) AS first_diagnosis_time FROM diagnoses_icd di INNER JOIN d_icd_diagnoses dd ON di.icd_code = dd.icd_code WHERE dd.long_title = 'Panic disorder without agoraphobia' GROUP BY di.subject_id ) d INNER JOIN prescriptions p ON d.subject_id = p.subject_id AND d.first_diagnosis_time < p.start;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: p.start",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column p.start instead of the correct p.starttime in the prescriptions table. This is a simple column reference mistake and does not alter the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        }
    },
    "llm2sql": {
        "9b48cb64daf9115952eca1ce": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among patients diagnosed with pneumonitis due to inhalation of food and vomit, what are the top five most common lab tests that followed within the same month this year?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'pneumonitis due to inhalation of food and vomit' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT D.label FROM ( SELECT L.itemid, COUNT(*) as frequency FROM diagnoses_icd AS DI JOIN d_icd_diagnoses as DID ON DI.icd_code = DID.icd_code JOIN labevents AS L ON DI.subject_id = L.subject_id AND DI.hadm_id = L.hadm_id WHERE DID.long_title = 'pneumonitis due to inhalation of food and vomit' AND datetime(DI.charttime, 'start of month') = datetime(L.charttime, 'start of month') AND datetime(DI.charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') GROUP BY L.itemid ORDER BY frequency DESC LIMIT 5 ) AS TopLabs JOIN d_labitems AS D ON TopLabs.itemid = D.itemid;",
            "gold_answer": "[['alanine aminotransferase (alt)'], ['alkaline phosphatase'], ['anion gap'], ['asparate aminotransferase (ast)'], ['base excess'], ['bicarbonate'], ['bilirubin, total'], ['calcium, total'], ['calculated total co2'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['pco2'], ['ph'], ['phosphate'], ['platelet count'], ['po2'], ['pt'], ['ptt'], ['rdw'], ['rdw-sd'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "[['glucose'], ['magnesium'], ['phosphate'], ['sodium'], ['urea nitrogen']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic by joining diagnoses_icd and labevents on both subject_id and hadm_id, thereby restricting lab results to the same hospital admission rather than simply the same calendar month. Furthermore, it omits the requirement that the lab test occur after the diagnosis (DI.charttime < L.charttime). These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning and result set, not just a localized syntax mistake.",
            "error_type": "Global: incorrect join and missing temporal condition"
        }
    },
    "dinsql": {
        "a0a67e4692f54ef3b4118ab9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "On the last hospital visit, when did patient 10027602 receive the first csf;spinal fluid microbiology test?",
            "gold_sql": "SELECT microbiologyevents.charttime FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND microbiologyevents.spec_type_desc = 'csf;spinal fluid' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(charttime) FROM microbiologyevents WHERE subject_id = 10027602 AND spec_type_desc = 'csf",
            "gold_answer": "[['2100-11-30 04:46:00']]",
            "pred_answer": "unrecognized token: \"'csf\"",
            "is_correct": false,
            "reasoning": "The predicted SQL has a localized syntax error: the string literal for spec_type_desc is not closed (unrecognized token \"'csf\"). This does not change the overall query structure or logic, and can be fixed by properly closing the quote. Therefore, it is a Local error.",
            "error_type": "Local: value parsing error"
        }
    }
}