{
    "deepseek": {
        "5fe8240c72712d70d057790d": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many patients had a diagnosis of hypomagnesemia in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'hypomagnesemia' AND strftime('%Y',diagnosis.diagnosistime) = '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT p.patientunitstayid) AS number_of_patients FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid WHERE d.diagnosisname = 'hypomagnesemia' AND strftime('%Y', d.diagnosistime) = '2100';",
            "gold_answer": "[['22.0']]",
            "pred_answer": "[['24.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts DISTINCT p.patientunitstayid (unique stays) instead of DISTINCT p.uniquepid (unique patients). This is a column reference mistake in the SELECT clause that leads to counting stays rather than patients, resulting in an inflated count. The overall query structure and logic remain correct; only the referenced column needs to be changed.",
            "error_type": "Local: column reference error"
        },
        "81131ba93ea7e3e5e2d9bf82": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many days has it been since patient 025-1000's last stay in ward 908 on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',patient.unitadmittime) ) FROM patient WHERE patient.uniquepid = '025-1000' AND patient.wardid = 908 AND patient.hospitaldischargetime IS NULL ORDER BY patient.unitadmittime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%J', '2100-12-31 23:59:00') - strftime('%J', p.unitdischargetime)) AS days_since_last_stay FROM patient p WHERE p.uniquepid = '025-1000' AND p.wardid = 908 ORDER BY p.unitadmittime DESC LIMIT 1;",
            "gold_answer": "[['9.574']]",
            "pred_answer": "[['6.167']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to restrict to the current hospital visit (it never filters WHERE hospitaldischargetime IS NULL) and furthermore subtracts the Julian day of unitdischargetime instead of unitadmittime. These mistakes change the overall set of rows considered and the core date calculation, so fixing it requires altering both the WHERE clause logic and the SELECT expression. This is not a simple typo or local syntax fix but a fundamental logic error across clauses.",
            "error_type": "Global: incorrect filter and column usage"
        },
        "a75bc868e8e28198c8b93ff2": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the length of the first hospital stay in days for patient 022-101650?",
            "gold_sql": "SELECT strftime('%J',patient.hospitaldischargetime) - strftime('%J',patient.hospitaladmittime) FROM patient WHERE patient.uniquepid = '022-101650' AND patient.hospitaladmittime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%j', hospitaldischargetime) - strftime('%j', hospitaladmittime)) + 1 AS length_of_stay FROM patient WHERE uniquepid = '022-101650' AND hospitaldischargetime IS NOT NULL;",
            "gold_answer": "[['0.747']]",
            "pred_answer": "[['1.747']]",
            "is_correct": false,
            "reasoning": "The predicted SQL miscalculates the length of stay by using strftime('%j') (day\u2010of\u2010year) instead of strftime('%J') (Julian day number) and then arbitrarily adds +1. This is an error localized to the date arithmetic in the SELECT clause. The overall query structure (table, WHERE clause) remains intact.",
            "error_type": "Local: other\u2010temporal error"
        },
        "943bc16041b3fcf7d4976352": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the last ward that patient 002-10157 stayed in on the last hospital visit?",
            "gold_sql": "SELECT patient.wardid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-10157' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ORDER BY patient.unitadmittime DESC LIMIT 1",
            "pred_sql": "SELECT wardid FROM patient WHERE uniquepid = '002-10157' AND hospitaladmittime = (SELECT MAX(hospitaladmittime) FROM patient WHERE uniquepid = '002-10157');",
            "gold_answer": "[['93.0']]",
            "pred_answer": "[['93.0'], ['93.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL picks rows where hospitaladmittime is the maximum for that patient, rather than first identifying the last completed hospital visit (using hospitaldischargetime) and then selecting the last ward within that visit (using unitadmittime). It fails to filter on hospitaldischargetime IS NOT NULL and does not retrieve the ward with the maximum unitadmittime for that specific visit. This is not a simple typo or single\u2010clause fix but a flaw in the overall query logic and structure, requiring reworking the visit identification and ward selection steps.",
            "error_type": "Global: incorrect visit and ward selection logic"
        },
        "131699dc7495ab5b40aa2d69": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the name of the specimen test that was last given to patient 025-10060 on the last hospital visit?",
            "gold_sql": "SELECT microlab.culturesite FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-10060' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) ORDER BY microlab.culturetakentime DESC LIMIT 1",
            "pred_sql": "SELECT lab.labname FROM lab WHERE lab.patientunitstayid = ( SELECT p.patientunitstayid FROM patient p WHERE p.uniquepid = '025-10060' ORDER BY p.hospitaladmittime DESC LIMIT 1 ) ORDER BY lab.labresulttime DESC LIMIT 1;",
            "gold_answer": "[['sputum, expectorated']]",
            "pred_answer": "[['paco2']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the lab table instead of the microlab table to retrieve the specimen test name, and it fails to enforce the discharge condition for selecting the last hospital visit. This is not a minor syntax or column reference fix but a fundamental misselection of the source table and join logic, altering the intended meaning of the query.",
            "error_type": "Global: join/table selection error"
        }
    },
    "codes": {
        "842cbb900711080a31ff45c7": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Find out the monthly minimum urine output volume of patient 012-1111 since 05/2100.",
            "gold_sql": "SELECT MIN(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-1111' ) ) AND intakeoutput.celllabel = 'urine' AND intakeoutput.cellpath LIKE '%output%' AND strftime('%Y-%m',intakeoutput.intakeoutputtime) >= '2100-05' GROUP BY strftime('%Y-%m',intakeoutput.intakeoutputtime)",
            "pred_sql": "SELECT intakeoutput.cellvaluenumeric FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.uniquepid = '012-1111' AND intakeoutput.cellpath = 'PregnancyUrineOutput' AND intakeoutput.intakeoutputtime >= '2005-01-01' AND intakeoutput.cellvaluenumeric > 0",
            "gold_answer": "[['100.0'], ['150.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely misses the core logic of the task. Instead of computing the monthly minimum urine output since May 2100 and grouping by month, it selects raw intakeoutput.cellvaluenumeric values without aggregation or grouping. It uses the wrong date filter (\u20182005-01-01\u2019 instead of '2100-05'), the wrong cellpath filter (equality to 'PregnancyUrineOutput' rather than celllabel = 'urine' and cellpath LIKE '%output%'), and omits the MIN() and GROUP BY strftime('%Y-%m', ...) entirely. These mistakes span multiple clauses and fundamentally alter the query\u2019s intended meaning, requiring a full restructuring of the query logic.",
            "error_type": "Global: incorrect aggregation and filtering"
        },
        "bdaee084287bac68d7820aa5": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Count how many times in 2100 patient 004-12030 had tracheal suctioning.",
            "gold_sql": "SELECT COUNT(*) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-12030' ) ) AND treatment.treatmentname = 'tracheal suctioning' AND strftime('%Y',treatment.treatmenttime) = '2100'",
            "pred_sql": "SELECT count(treatment.treatmentid) FROM patient INNER JOIN treatment ON patient.patientunitstayid = treatment.patientunitstayid WHERE patient.uniquepid = '004-12030' AND treatment.treatmentname = 'tracheostomy tube, suctioning'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the year filter (strftime('%Y', treatment.treatmenttime) = '2100') and uses the wrong treatment name literal ('tracheostomy tube, suctioning' instead of 'tracheal suctioning'). Both issues are isolated to the WHERE clause and can be corrected by adjusting these conditions without changing the overall query structure or joins.",
            "error_type": "Local: value/condition error"
        },
        "21c120826fba86457b825834": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Can you tell me the name of the allergy patient 025-10477 had on the current hospital visit?",
            "gold_sql": "SELECT allergy.allergyname FROM allergy WHERE allergy.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-10477' AND patient.hospitaldischargetime IS NULL ) )",
            "pred_sql": "SELECT allergy.allergyname FROM allergy INNER JOIN patient ON allergy.patientunitstayid = patient.patientunitstayid WHERE allergy.patientunitstayid = 2510477",
            "gold_answer": "[['levaquin'], ['pcn']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters on allergy.patientunitstayid = 2510477, completely missing the JOIN condition on the patient\u2019s unique PID and the requirement that hospitaldischargetime IS NULL for the current visit. It uses the wrong predicate value instead of filtering by patient.uniquepid and current admission. This is a localized mistake in the WHERE clause and does not require restructuring the query\u2019s overall flow or JOIN logic. It\u2019s a simple incorrect filter condition (value parsing/reference error).",
            "error_type": "Local: incorrect filter condition"
        },
        "30bec392082c2be51f011477": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me patient 016-10268's minimum sao2 since 348 days ago.",
            "gold_sql": "SELECT MIN(vitalperiodic.sao2) FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10268' ) ) AND vitalperiodic.sao2 IS NOT NULL AND datetime(vitalperiodic.observationtime) >= datetime('2100-12-31 23:59:00','-348 day')",
            "pred_sql": "SELECT min(vitalperiodic.sao2) FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '016-10268' AND vitalperiodic.observationtime - patient.admissiontime > 348 * 24 * 60 * 60",
            "gold_answer": "[['88.0']]",
            "pred_answer": "no such column: patient.admissiontime",
            "is_correct": false,
            "reasoning": "The predicted SQL references patient.admissiontime, which does not exist in the schema (the patient table has hospitaladmittime and unitadmittime but no admissiontime). This is a localized column reference error and does not require restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "3693b0ad14990ea6976343c2": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "among patients in their 60 or above , what are the top four prescribed drugs?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age >= 60 ) GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT medication.drugname FROM vitalperiodic INNER JOIN medication ON vitalperiodic.patientunitstayid = medication.patientunitstayid WHERE vitalperiodic.sao2 >= 60 ORDER BY medication.drugname ASC LIMIT 4",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['nacl 0.9% mbp'], ['potassium chloride'], ['tylenol']]",
            "pred_answer": "[['.5 ml vial : pneumococcal vac polyvalent 25 mcg/0.5ml ij inj'], ['.5 ml vial : pneumococcal vac polyvalent 25 mcg/0.5ml ij inj'], ['.5 ml vial : pneumococcal vac polyvalent 25 mcg/0.5ml ij inj'], ['.5 ml vial : pneumococcal vac polyvalent 25 mcg/0.5ml ij inj']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the intended logic: it joins medication to vitalperiodic and filters on sao2 rather than selecting patients aged 60 or above. It also orders alphabetically and limits without any aggregation or ranking by prescription frequency. This is not a localized syntax fix but a complete restructuring of the query\u2019s logic and relationships, thus a global error.",
            "error_type": "Global: incorrect join/logic"
        },
        "faed071c4640fac3eb46ce91": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the first procedure patient 017-100002 had on the first hospital visit?",
            "gold_sql": "SELECT treatment.treatmentname FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '017-100002' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) ORDER BY treatment.treatmenttime ASC LIMIT 1",
            "pred_sql": "SELECT treatment.treatmentname FROM patient INNER JOIN treatment ON patient.patientunitstayid = treatment.patientunitstayid WHERE patient.patientunitstayid = 111170",
            "gold_answer": "[['chest x-ray']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely replaces the multi-step logic needed to (1) identify the correct patient via uniquepid, (2) select their first completed hospital visit, and (3) then pick the earliest treatment on that visit. Instead, it hard-codes a single patientunitstayid (111170), omits any uniquepid filter, ignores hospitaldischargetime, and drops the ORDER BY\u2026LIMIT to get the first procedure. This is not a small typo or missing alias\u2014it fundamentally breaks the intended query flow and must be restructured to fix.",
            "error_type": "Global: join/logic error"
        },
        "3c0cb3bd0156ed794e5cd31e": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Which was the first procedure that patient 017-101737 received since 2100?",
            "gold_sql": "SELECT treatment.treatmentname FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '017-101737' ) ) AND strftime('%Y',treatment.treatmenttime) >= '2100' ORDER BY treatment.treatmenttime ASC LIMIT 1",
            "pred_sql": "SELECT procedureevents.itemid FROM patient INNER JOIN procedureevents ON patient.patientunitstayid = procedureevents.patientunitstayid WHERE patient.uniquepid = '017-101737' AND procedureevents.itemid IN (40314, 40313, 40315, 40312) AND procedureevents.procedureordercategoryname = 'Evaluation' ORDER BY procedureevents.procedureordercategorydescription DESC LIMIT 1",
            "gold_answer": "[['cabg']]",
            "pred_answer": "no such table: procedureevents",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a table named procedureevents, which does not exist in the provided schema. This is a localized error in the FROM/JOIN clause (a table reference error) and does not affect the overall intended logic of selecting the first treatment since 2100. It can be fixed by replacing the nonexistent table with the correct one (treatment) without restructuring the query.",
            "error_type": "Local: table reference error"
        }
    },
    "o4-mini": {
        "0ab92f68e27dbc8a5e3a463a": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Show me patient 027-104050's daily maximum weight since 10 months ago.",
            "gold_sql": "SELECT MAX(patient.admissionweight) FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-104050' ) AND patient.admissionweight IS NOT NULL AND datetime(patient.unitadmittime) >= datetime('2100-12-31 23:59:00','-10 month') GROUP BY strftime('%Y-%m-%d',patient.unitadmittime)",
            "pred_sql": "WITH weight_events AS ( SELECT date(hospitaladmittime) AS obs_date, admissionweight AS weight FROM patient WHERE uniquepid = '027-104050' AND hospitaladmittime >= datetime('2100-12-31 23:59:00', '-10 months') AND admissionweight IS NOT NULL UNION ALL SELECT date(hospitaldischargetime) AS obs_date, dischargeweight AS weight FROM patient WHERE uniquepid = '027-104050' AND hospitaldischargetime >= datetime('2100-12-31 23:59:00', '-10 months') AND dischargeweight IS NOT NULL ) SELECT obs_date, MAX(weight) AS daily_max_weight FROM weight_events GROUP BY obs_date ORDER BY obs_date;",
            "gold_answer": "[['61.9'], ['61.9'], ['63.95']]",
            "pred_answer": "[['2100-09-04', '63.95']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the original request. Instead of using only admission weights and grouping by the ICU admission time (unitadmittime) as in the ground truth, it introduces a UNION ALL with discharge weights, uses hospital admission/discharge times, and applies an incorrect time offset syntax (\u2018-10 months\u2019 rather than \u2018-10 month\u2019). These changes alter the intended meaning of the query and require a full restructuring rather than a simple fix in one clause.",
            "error_type": "Global: structural error"
        },
        "c05dacc0e794b065b0057789": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Among patients who received cardiac angiography , what are the top three most frequent medications that followed within the same month?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'cardiac angiography' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.drugstarttime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.drugstarttime,'start of month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT med.drugname FROM treatment AS angio JOIN medication AS med ON angio.patientunitstayid = med.patientunitstayid WHERE angio.treatmentname LIKE '%cardiac angiography%' AND med.drugstarttime > angio.treatmenttime AND strftime('%Y-%m', med.drugstarttime) = strftime('%Y-%m', angio.treatmenttime) GROUP BY med.drugname ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['furosemide 10mg/ml sso'], ['potassium chloride'], ['potassium chloride tab cr'], ['sodium chloride 0.9%  sso']]",
            "pred_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['potassium chloride'], ['sodium chloride 0.9%  sso']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly references existing tables and columns and uses valid SQLite functions, so there is no localized syntax or parsing issue. However, it alters the intended logic by joining and grouping on patientunitstayid rather than uniquepid. The ground truth query tracks unique patients across potentially multiple stays (using uniquepid), whereas the prediction counts per unit stay. This changes how frequencies are aggregated and thus the top\u20103 results differ. Fixing this requires changing the join key and aggregation logic\u2014i.e., restructuring the query\u2019s core logic\u2014so it is a global error.",
            "error_type": "Global: join-key error"
        },
        "fc3fa0924d7ccd7d27a6951a": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many patients this year were diagnosed with hypoglycemia within 2 months after the diagnosis of acute respiratory failure?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute respiratory failure' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hypoglycemia' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 WHERE T1.diagnosistime < T2.diagnosistime AND datetime(T2.diagnosistime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month')",
            "pred_sql": "SELECT COUNT(DISTINCT d1.patientunitstayid) AS patient_count FROM diagnosis d1 JOIN diagnosis d2 ON d1.patientunitstayid = d2.patientunitstayid AND d2.diagnosisname = 'hypoglycemia' AND datetime(d2.diagnosistime) BETWEEN datetime(d1.diagnosistime) AND datetime(d1.diagnosistime, '+2 months') WHERE d1.diagnosisname = 'acute respiratory failure' AND strftime('%Y', d1.diagnosistime) = '2100';",
            "gold_answer": "[['151.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only uses a different join key (patientunitstayid instead of uniquepid) but also fails to apply the same year filter to the hypoglycemia diagnosis. These mistakes change the overall logic\u2014linking events by stay rather than by patient and mis-filtering dates\u2014resulting in a fundamentally different result. This error spans multiple clauses (JOIN and WHERE) and requires restructuring the query rather than a simple fix in one clause.",
            "error_type": "Global: incorrect join key and filter logic"
        }
    },
    "macsql": {
        "0ec4d0ab8e32f4bcc5d9151e": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What is the minimum total hospital cost that involved oxygen therapy (< 40%) - 30-40% since 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'oxygen therapy (< 40%) - 30-40%' ) ) AND strftime('%Y',cost.chargetime) >= '2100' GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS minimum_total_cost FROM ( SELECT T1.patienthealthsystemstayid, SUM(T2.cost) AS total_cost FROM ( SELECT DISTINCT T1.patienthealthsystemstayid FROM treatment AS T1 JOIN patient AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.treatmentname LIKE '%oxygen therapy%' AND T1.treatmentname LIKE '%30-40%' AND datetime(T1.treatmenttime) >= datetime('2100-01-01 00:00:00') ) AS T1 JOIN cost AS T2 ON T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T1.patienthealthsystemstayid ) AS total_costs",
            "gold_answer": "[['486.69']]",
            "pred_answer": "no such column: T1.patienthealthsystemstayid",
            "is_correct": false,
            "reasoning": "The failure occurs because the subquery labels the treatment table as T1 and then tries to select T1.patienthealthsystemstayid, but patienthealthsystemstayid is a column of the patient table (aliased T2), not of treatment. This is a straightforward column reference mistake in a single clause and can be fixed by selecting T2.patienthealthsystemstayid. No overall query logic or structure needs changing, so it\u2019s a Local error.",
            "error_type": "Local: column-reference error"
        },
        "e132de5413f8581089ebc590": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Could you tell me the maximum total cost in a hospital where lorazepam 2 mg/1 ml 1 ml inj is used since 1 year ago?",
            "gold_sql": "SELECT MAX(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT medication.patientunitstayid FROM medication WHERE medication.drugname = 'lorazepam 2 mg/1 ml 1 ml inj' ) ) AND datetime(cost.chargetime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "SELECT MAX(total_cost) AS max_total_cost FROM ( SELECT T2.hospitalid, SUM(T3.cost) AS total_cost FROM patient AS T2 JOIN cost AS T3 ON T2.patienthealthsystemstayid = T3.patienthealthsystemstayid WHERE T2.hospitalid IN ( SELECT DISTINCT T2.hospitalid FROM medication AS T1 JOIN patient AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.drugname = 'lorazepam 2 mg/1 ml 1 ml inj' AND datetime(T1.drugstarttime) >= datetime('2100-12-31 23:59:00', '-1 year') ) GROUP BY T2.hospitalid ) AS hospital_costs",
            "gold_answer": "[['10138.72']]",
            "pred_answer": "[['105838.2']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by grouping costs at the hospital level rather than per patient health system stay. The ground truth sums costs per patient stay (patienthealthsystemstayid) and then takes the maximum of those sums. The predicted version joins and groups by hospitalid, summing costs across all stays in each hospital, and then takes the maximum, which is a different aggregation scope. This is not a localized syntax or reference mistake but a structural error that alters the intended meaning of the query.",
            "error_type": "Global: incorrect grouping/aggregation"
        },
        "f7fbfcef8db77d835ccb2925": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Retrieve the top four most frequent diagnoses of patients with age in their 60 or above since 1 year ago.",
            "gold_sql": "SELECT T1.diagnosisname FROM ( SELECT diagnosis.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnosis WHERE diagnosis.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age >= 60 ) AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY diagnosis.diagnosisname ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT `diagnosisname` FROM ( SELECT D.`diagnosisname`, COUNT(*) AS frequency FROM ( SELECT DISTINCT `patientunitstayid` FROM patient WHERE CAST(`age` AS INTEGER) >= 60 ) AS P INNER JOIN ( SELECT DISTINCT `patientunitstayid`, `diagnosisname` FROM diagnosis WHERE datetime(`diagnosistime`) >= datetime('2100-12-31 23:59:00', '-1 year') ) AS D ON P.`patientunitstayid` = D.`patientunitstayid` GROUP BY D.`diagnosisname` ORDER BY frequency DESC LIMIT 4 ) AS TopDiagnoses",
            "gold_answer": "[['acute renal failure'], ['acute respiratory failure'], ['congestive heart failure'], ['hypoxemia']]",
            "pred_answer": "[['acute renal failure'], ['acute respiratory failure'], ['congestive heart failure'], ['pneumonia']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the intended aggregation logic by introducing DISTINCT on diagnosis records per patient before counting. This causes it to count the number of patients with each diagnosis rather than the total number of diagnosis events, and it also uses LIMIT instead of a dense rank to handle ties. Fixing this requires restructuring the aggregation and windowing logic, which is beyond a simple one-clause correction.",
            "error_type": "Global: flawed aggregation logic"
        }
    },
    "dinsql": {
        "1de044d1478a726d732f23df": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What are the five most commonly prescribed drugs for patients that received antibacterials within the same month?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'antibacterials' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.drugstarttime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.drugstarttime,'start of month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH AntibacterialPatients AS ( SELECT DISTINCT patientunitstayid, strftime('%Y-%m', drugstarttime) AS month FROM medication WHERE drugname LIKE '%antibacterial%' ), PrescribedDrugs AS ( SELECT drugname FROM medication WHERE (patientunitstayid, strftime('%Y-%m', drugstarttime)) IN (SELECT patientunitstayid, month FROM AntibacterialPatients) ) SELECT drugname FROM ( SELECT drugname, COUNT(*) AS count FROM PrescribedDrugs GROUP BY drugname ORDER BY count DESC LIMIT 5 )",
            "gold_answer": "[['dextrose 50%'], ['nacl 0.9% mbp'], ['piperacillin-tazobactam'], ['potassium chloride'], ['potassium chloride crys er 20 meq po tbcr']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely bypasses the treatment table, instead looking for drugs with \u2018antibacterial\u2019 in their name in the medication table. The question asks for drugs prescribed in the same month as a treatment event named 'antibacterials'. The predicted query never references treatment and so fails to capture the intended patient set, requiring a fundamental restructuring (correct joins between treatment and medication tables) rather than a small fix. This is a Global error.",
            "error_type": "Global: incorrect join/logic error"
        },
        "42c5e6a86e2af4026e2293e2": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Please provide the count of patients who had a volume (ml)-levofloxacin (levaquin) ivpb 750 mg input since 2 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT intakeoutput.patientunitstayid FROM intakeoutput WHERE intakeoutput.celllabel = 'volume (ml)-levofloxacin (levaquin) ivpb 750 mg' AND intakeoutput.cellpath LIKE '%input%' AND datetime(intakeoutput.intakeoutputtime) >= datetime('2100-12-31 23:59:00','-2 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT patientunitstayid) FROM intakeoutput WHERE cellpath LIKE '%intake%' AND celllabel LIKE '%volume (ml)-levofloxacin (levaquin) ivpb 750 mg%' AND datetime(intakeoutputtime) >= datetime('2100-12-31 23:59:00', '-2 years')",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['7.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only queries the intakeoutput table, using cellpath LIKE '%intake%' (instead of '%input%') and counts DISTINCT patientunitstayid. It omits the patient table and thus counts ICU stays rather than unique patients. This is not a simple typo; it fundamentally alters the query\u2019s logic and relationships, requiring restructuring (adding a join or subquery) to count distinct patients based on their uniqepid.",
            "error_type": "Global: incorrect join/relationship"
        }
    }
}