{
    "deepseek": {
        "478c3255e0407d25b4d06849": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "In total, what was the input amount of zosyn that patient 005-10238 received on 03/26/this year?",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '005-10238' ) ) AND intakeoutput.celllabel = 'zosyn' AND intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',intakeoutput.intakeoutputtime) = '03-26'",
            "pred_sql": "SELECT SUM(CAST(SUBSTR(T2.dosage, 0, INSTR(T2.dosage, ' ') - 1) AS REAL)) AS total_zosyn_input FROM patient AS T1 INNER JOIN medication AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.uniquepid = '005-10238' AND T2.drugname LIKE '%zosyn%' AND DATE(T2.drugstarttime) = '2100-03-26';",
            "gold_answer": "[['250.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL retrieves data from the medication table (joining patient to medication and using drugstarttime and dosage) rather than from the intakeoutput table with celllabel = 'zosyn' and cellpath LIKE '%intake%'. This is not a simple column or date formatting mistake; it fundamentally changes the source of data and the logic of the query. Fixing it requires restructuring the query to use intakeoutput instead of medication and applying the proper filters, so it is a global error.",
            "error_type": "Global: incorrect table/join error"
        },
        "91bea95ae8d28879a583b9d7": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the total cost for patient 011-10149's hospital stay?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '011-10149' )",
            "pred_sql": "SELECT SUM(DISTINCT c.cost) AS total_cost FROM patient p JOIN cost c ON p.patienthealthsystemstayid = c.patienthealthsystemstayid WHERE p.uniquepid = '011-10149';",
            "gold_answer": "[['530.59']]",
            "pred_answer": "[['355.51']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly applies DISTINCT within the SUM aggregation, summing only unique cost values rather than all cost entries for the patient's stay. This mistake is confined to the SELECT clause's aggregate function and does not involve changes to the overall query structure or logic (joins and filters are correct). Hence, it is a localized error in the aggregate function.",
            "error_type": "Local: incorrect use of DISTINCT in SUM"
        },
        "74ea0cbd79ce583aa647a2d5": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Count the number of patients that were admitted to the hospital since 1 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE datetime(patient.hospitaladmittime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT COUNT(DISTINCT patienthealthsystemstayid) FROM patient WHERE hospitaladmittime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1384.0']]",
            "pred_answer": "[['1542.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses COUNT(DISTINCT patienthealthsystemstayid) instead of COUNT(DISTINCT patient.uniquepid). This is simply the wrong column reference for distinct patients (it counts stays rather than unique patients) but doesn\u2019t alter the overall structure or logic of the query beyond that one column. Hence it\u2019s a local column reference error.",
            "error_type": "Local: column-reference error"
        }
    },
    "macsql": {
        "fb5a2c37faa8aebf3fbe4240": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What was patient 004-15751's first output time of urine on 04/11/this year?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-15751' ) ) AND intakeoutput.cellpath LIKE '%output%' AND intakeoutput.celllabel = 'urine' AND datetime(intakeoutput.intakeoutputtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',intakeoutput.intakeoutputtime) = '04-11' ORDER BY intakeoutput.intakeoutputtime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(`intakeoutputtime`) AS first_output_time FROM intakeoutput WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '004-15751' ) AND datetime(`intakeoutputtime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') AND strftime('%m-%d', `intakeoutputtime`) = '04-11' AND `cellpath` LIKE '%output%' AND `celllabel` LIKE '%urine%'",
            "gold_answer": "[['2100-04-11 16:00:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simplifies the patient lookup by directly matching patientunitstayid to the one returned for uniquepid, rather than following the two\u2010step link via patienthealthsystemstayid as in the ground truth. This alters the intended relationship across unit stays and can miss the correct intakeoutput records. The error spans the core join logic (choice of key) and cannot be fixed by a minor tweak in a single clause\u2014it requires restructuring the patient\u2192unitstay link. Therefore, it's a global error.",
            "error_type": "Global: incorrect join key"
        },
        "1bbfae72fa70ce468d41d6dc": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What is the maximum total hospital expense that includes dka in 2100?",
            "gold_sql": "SELECT MAX(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'dka' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "SELECT MAX(total_expense) AS max_total_expense FROM ( SELECT T1.patienthealthsystemstayid, SUM(T2.cost) AS total_expense FROM ( SELECT DISTINCT patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname LIKE '%dka%' AND strftime('%Y', patient.hospitaladmittime) = '2100' ) AS T1 JOIN cost AS T2 ON T1.patienthealthsystemstayid = T2.patienthealthsystemstayid WHERE T2.eventtype IN ('diagnosis', 'treatment', 'lab', 'medication') GROUP BY T1.patienthealthsystemstayid )",
            "gold_answer": "[['6005.92']]",
            "pred_answer": "[['42041.44']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic in several ways. First, it filters on strftime('%Y', patient.hospitaladmittime) = '2100' instead of filtering cost records by strftime('%Y', cost.chargetime) = '2100'. Second, it restricts cost.eventtype to a subset ('diagnosis','treatment','lab','medication') rather than summing all cost records for each stay as intended. These changes span multiple clauses (FROM, JOIN, WHERE) and alter the set of cost records being aggregated, producing an entirely different result. Fixing this requires restructuring the query logic rather than a minor local correction.",
            "error_type": "Global: incorrect date filter and cost aggregation logic"
        }
    },
    "codes": {
        "9e28a8849f1adf8f2f7622cd": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Do you know the minimum lipase value of patient 027-100559?",
            "gold_sql": "SELECT MIN(lab.labresult) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-100559' ) ) AND lab.labname = 'lipase'",
            "pred_sql": "SELECT vitalperiodic.lipase FROM vitalperiodic INNER JOIN patient ON vitalperiodic.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '027-100559' ORDER BY vitalperiodic.observationtime DESC LIMIT 1",
            "gold_answer": "[['192.0']]",
            "pred_answer": "no such column: vitalperiodic.lipase",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column vitalperiodic.lipase and even joins the wrong table (vitalperiodic instead of lab). This is a simple column/table reference error that does not require restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "607553a977127c390af6f571": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me patient 002-1185's minimum lactate value this month?",
            "gold_sql": "SELECT MIN(lab.labresult) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-1185' ) ) AND lab.labname = 'lactate' AND datetime(lab.labresulttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month')",
            "pred_sql": "SELECT vitalaperiodic.patientunitstayid, min(vitalaperiodic.lactate) FROM vitalaperiodic WHERE strftime('%Y-%m', vitalaperiodic.observationtime) = '2016-06' AND patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '002-1185' )",
            "gold_answer": "[['1.9']]",
            "pred_answer": "no such table: vitalaperiodic",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent table \"vitalaperiodic\" (it should be \"vitalperiodic\" or more appropriately the \"lab\" table for lactate values) and a non-existent column \"lactate\" in that table. This is a localized table/column reference error and does not involve the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        },
        "584bb8420b45d70a62b843c6": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "On the current icu visit, what was the average value of the mean blood pressure of patient 022-1011?",
            "gold_sql": "SELECT AVG(vitalperiodic.systemicmean) FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-1011' ) AND patient.unitdischargetime IS NULL ) AND vitalperiodic.systemicmean IS NOT NULL",
            "pred_sql": "SELECT vitalperiodic.systemicmean FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '022-1011'",
            "gold_answer": "[['76.145']]",
            "pred_answer": "[['-2.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['103.0'], ['103.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects raw systemicmean values joined on patientuniquepid without computing an average or filtering to the current ICU visit (unitdischargetime IS NULL). These mistakes are confined to the SELECT and WHERE clauses (missing AVG and missing current\u2010visit filter) and do not alter the overall join structure or logic across multiple clauses. Therefore, this is a Local error.",
            "error_type": "Local: missing-aggregation-and-filter error"
        },
        "c14463e7894fdd618987127c": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Could you tell me the last procedure that patient 009-10008 had this year?",
            "gold_sql": "SELECT treatment.treatmentname FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '009-10008' ) ) AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "SELECT DISTINCT treatment.treatmentname FROM treatment INNER JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '009-10008' AND strftime('%Y',treatment.treatmenttime) = '2100'",
            "gold_answer": "[['antiarrhythmics']]",
            "pred_answer": "[['administration of electrolytes'], ['antiarrhythmics'], ['antibacterials'], ['bronchodilator'], ['chest x-ray'], ['cultures'], ['glucocorticoid administration'], ['insulin'], ['mechanical ventilation'], ['mechanical ventilation'], ['mechanical ventilation'], ['oxygen therapy (40% to 60%)'], ['vasopressors - norepinephrine <= 0.1 micrograms/kg/min'], ['vasopressors - norepinephrine <= 0.1 micrograms/kg/min']]",
            "is_correct": false,
            "reasoning": "The predicted query returns all distinct procedures performed this year for the patient, rather than the single \u2018last\u2019 procedure. It omits the ORDER BY treatment time in descending order and the LIMIT 1 clause that would select the most recent event. This is not a minor typo or column reference issue but a structural flaw that fails to enforce the intended \u2018last procedure\u2019 logic.",
            "error_type": "Global: missing ORDER BY/LIMIT"
        },
        "b33c72567d5a653d849243e5": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Today, when did patient 027-103257 the last time have a stool event?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-103257' ) ) AND intakeoutput.cellpath LIKE '%output%' AND intakeoutput.celllabel = 'stool' AND datetime(intakeoutput.intakeoutputtime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput INNER JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '027-103257' AND intakeoutput.cellpath = 'Cath Lab Stool' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "gold_answer": "[['2100-12-31 07:52:05']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters on intakeoutput.cellpath = 'Cath Lab Stool' and omits the required filters: intakeoutput.cellpath LIKE '%output%', intakeoutput.celllabel = 'stool', and the restriction to today\u2019s events. These mistakes are confined to the WHERE clause\u2019s filtering conditions without changing the overall query structure or joins. Thus, it\u2019s a localized column/value reference error.",
            "error_type": "Local: table/column reference error"
        },
        "eba36664c2a331fc498ecae9": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When did patient 030-12393 have a lab test for the last time last month?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '030-12393' ) ) AND datetime(lab.labresulttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY lab.labresulttime DESC LIMIT 1",
            "pred_sql": "SELECT lab.labresulttime FROM lab INNER JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '030-12393' AND strftime('%Y %m',lab.labresulttime) = ( SELECT strftime('%Y %m', max(lab.labresulttime)) FROM lab INNER JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '030-12393' )",
            "gold_answer": "[['2100-11-25 04:15:00']]",
            "pred_answer": "[['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 17:15:00'], ['2100-11-24 21:40:00'], ['2100-11-25 02:00:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00'], ['2100-11-25 04:15:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the overall logic of the query rather than just fixing a small typo or syntax issue. Instead of explicitly filtering for last month relative to the fixed current date and then ordering and limiting to one result, it attempts to match the YEAR and MONTH of every labresulttime against the YEAR and MONTH of the single maximum labresulttime. This not only misidentifies the correct month period (it doesn\u2019t anchor to \u201clast month\u201d per the assumptions) but also fails to apply LIMIT 1, leading to a fundamentally different result set. Because these problems span the WHERE filtering logic and subquery structure\u2014and alter the intended meaning of the query\u2014this is a Global error.",
            "error_type": "Global: incorrect filtering logic"
        },
        "c185698e6119b9fb668ad531": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was patient 004-10948's last heart rate on 06/21/this year?",
            "gold_sql": "SELECT vitalperiodic.heartrate FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-10948' ) ) AND vitalperiodic.heartrate IS NOT NULL AND datetime(vitalperiodic.observationtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',vitalperiodic.observationtime) = '06-21' ORDER BY vitalperiodic.observationtime DESC LIMIT 1",
            "pred_sql": "SELECT vitalperiodic.heartrate FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE strftime('%m %d %Y', vitalperiodic.observationtime) = '06 21 2100' AND patient.uniquepid = '004-10948'",
            "gold_answer": "[['99.0']]",
            "pred_answer": "[['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['100.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['101.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['102.0'], ['103.0'], ['103.0'], ['103.0'], ['103.0'], ['103.0'], ['103.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['104.0'], ['105.0'], ['105.0'], ['105.0'], ['105.0'], ['105.0'], ['105.0'], ['105.0'], ['106.0'], ['107.0'], ['107.0'], ['108.0'], ['110.0'], ['111.0'], ['112.0'], ['113.0'], ['114.0'], ['114.0'], ['114.0'], ['115.0'], ['115.0'], ['116.0'], ['117.0'], ['117.0'], ['118.0'], ['118.0'], ['119.0'], ['120.0'], ['121.0'], ['121.0'], ['123.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL retrieves all heart rate measurements on 06/21/2100 for patient 004-10948 but never orders them by observation time nor limits the result to the last measurement. The ground truth uses ORDER BY observationtime DESC LIMIT 1 to get exactly the most recent reading. Fixing this requires altering the overall query structure (adding ORDER BY and LIMIT), not just correcting a column name or minor syntax. Therefore, this is a global error.",
            "error_type": "Global: missing ORDER BY and LIMIT"
        },
        "f1e2ffce2af23bd974a8761a": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many patients were given a prescription of amiodarone (cordarone) 150 mg in d5w 100ml bolus since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid = ( SELECT medication.patientunitstayid FROM medication WHERE medication.drugname = 'amiodarone (cordarone) 150 mg in d5w 100ml bolus' AND datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM medication INNER JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'amiodarone 150 mg prn 100 d5w' AND medication.drugstarttime > datetime(CURRENT_TIMESTAMP, '-1 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query deviates from the ground truth only in localized parts: it uses the wrong drug name literal ('amiodarone 150 mg prn 100 d5w' vs. 'amiodarone (cordarone) 150 mg in d5w 100ml bolus'), employs CURRENT_TIMESTAMP instead of the fixed '2100-12-31 23:59:00' per assumptions, and counts patientunitstayid rather than DISTINCT uniquepid. None of these errors alter the overall logical structure or the table relationships; they are confined to value literals, datetime usage, and aggregation. Hence, this is a Local error.",
            "error_type": "Local: value parsing error"
        },
        "e90c9b066d2e1ffa492a8488": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What is the total number of patients who underwent nitroglycerin this year?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'nitroglycerin' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM patient INNER JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE medication.routeadmin = 'N/A' AND strftime('%Y', medication.drugstarttime) = '2100'",
            "gold_answer": "[['21.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the medication table (with a filter on routeadmin) rather than the treatment table and the treatmentname 'nitroglycerin'. It joins the wrong table, applies the wrong filter, and even omits DISTINCT on unique patient IDs. These mistakes alter the entire logic and require rewriting multiple clauses, not just a localized fix.",
            "error_type": "Global: incorrect table/join selection"
        }
    },
    "dinsql": {
        "750c6907adf544b9533322db": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Tell me what was the total estimated blood loss output of patient 002-10018 on last month/15.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-10018' ) ) AND intakeoutput.celllabel = 'estimated blood loss' AND intakeoutput.cellpath LIKE '%output%' AND datetime(intakeoutput.intakeoutputtime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',intakeoutput.intakeoutputtime) = '15'",
            "pred_sql": "SELECT SUM(T2.cellvaluenumeric) AS total_blood_loss FROM patient AS T1 INNER JOIN intakeoutput AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.uniquepid = '002-10018' AND datetime(T2.intakeoutputtime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', T2.intakeoutputtime) = '15' AND T2.cellpath LIKE '%output%' AND T2.celllabel = 'Estimated Blood Loss'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "We compare the two queries: both correctly restrict to the 15th of last month, filter on output events, restrict to the same patient\u2019s unit stays, and sum the numeric cellvaluenumeric. The only substantive difference is the WHERE clause on celllabel: the ground truth uses lowercase 'estimated blood loss' whereas the predicted query uses capitalized 'Estimated Blood Loss'. This typo/case mismatch in the literal string causes no rows to match and thus no result, but does not change the overall query structure or logic. Therefore this is a local error (a value parsing/filtering mistake).",
            "error_type": "Local: value parsing error"
        },
        "07fea0d5ec85b1acf9d8910a": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Since 2100, when was the first time that patient 005-10606 was diagnosed with enteritis - colitis, presumed infectious?",
            "gold_sql": "SELECT diagnosis.diagnosistime FROM diagnosis WHERE diagnosis.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '005-10606' ) ) AND diagnosis.diagnosisname = 'enteritis - colitis, presumed infectious' AND strftime('%Y',diagnosis.diagnosistime) >= '2100' ORDER BY diagnosis.diagnosistime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(diagnosistime) FROM diagnosis WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '005-10606' ) AND diagnosisname = 'enteritis - colitis, presumed infectious' AND datetime(diagnosistime) >= datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "gold_answer": "[['2100-08-17 15:23:00']]",
            "pred_answer": "[['2100-08-27 00:51:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a single patientunitstayid subquery instead of retrieving all unit stays for the patient's health system stay. The ground truth correctly accounts for all unit stays by first selecting the patienthealthsystemstayid for the given uniquepid and then all associated patientunitstayid values. By restricting to one unitstayid, the predicted query misses earlier diagnoses and returns the wrong date. This is not a simple syntax fix but an incorrect join/relationship logic that changes the overall result.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "o4-mini": {
        "c3c11e1cbbde15b360b8218e": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What is the number of patients who died after diagnosis of acute pulmonary edema within the same month since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.uniquepid ) FROM ( SELECT T1.uniquepid, T1.diagnosistime FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute pulmonary edema' ) AS T1 GROUP BY T1.uniquepid HAVING MIN(T1.diagnosistime) = T1.diagnosistime AND strftime('%Y',T1.diagnosistime) >= '2100' ) AS T2 JOIN ( SELECT patient.uniquepid, patient.patienthealthsystemstayid, patient.hospitaldischargetime FROM patient WHERE patient.hospitaldischargestatus = 'expired' ) as T3 ON T2.uniquepid = T3.uniquepid AND datetime(T2.diagnosistime,'start of month') = datetime(T3.hospitaldischargetime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) AS num_patients FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid WHERE d.diagnosisname = 'acute pulmonary edema' AND d.diagnosistime >= '2100-01-01' AND p.hospitaldischargestatus = 'expired' AND datetime(p.hospitaldischargetime) > datetime(d.diagnosistime) AND strftime('%Y-%m', d.diagnosistime) = strftime('%Y-%m', p.hospitaldischargetime');",
            "gold_answer": "[['3.0']]",
            "pred_answer": "unrecognized token: \"');\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to an extra/mismatched quote and parenthesis in the WHERE clause (unrecognized token \"');\"). This is a localized syntax issue that can be fixed without changing the overall logic or join structure.",
            "error_type": "Local: syntax error"
        },
        "8c3bf289979d474499b2aa49": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many days have elapsed since patient 025-1000 stayed in ward 908 for the first time on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',patient.unitadmittime) ) FROM patient WHERE patient.uniquepid = '025-1000' AND patient.wardid = 908 AND patient.hospitaldischargetime IS NULL ORDER BY patient.unitadmittime ASC LIMIT 1",
            "pred_sql": "WITH current_hospital AS ( SELECT patienthealthsystemstayid FROM patient WHERE uniquepid = '025-1000' AND hospitaldischargetime IS NULL LIMIT 1 ), first_ward AS ( SELECT MIN(unitadmittime) AS first_time FROM patient WHERE patienthealthsystemstayid = (SELECT patienthealthsystemstayid FROM current_hospital) AND wardid = 908 ) SELECT CAST(julianday('2100-12-31 23:59:00') - julianday(first_time) AS INTEGER) AS days_elapsed FROM first_ward;",
            "gold_answer": "[['9.574']]",
            "pred_answer": "[['9.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the two queries lies in how they compute the day difference. The ground truth uses strftime('%J',\u2026) to get the Julian day number (including fractional days), yielding a precise value (9.574). The predicted SQL uses julianday(\u2026) and then CASTs the result to INTEGER, truncating it to 9.0 and losing the fractional component. All other logic (filtering on current visit, selecting the first ward admission) is correct. This is a localized mistake in the date-difference calculation, not a flaw in the overall query structure.",
            "error_type": "Local: other-temporal error"
        }
    },
    "llm2sql": {
        "e8121858a0141075cd31178d": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What are the charges for a procedure called pulmonary resection?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.eventtype = 'treatment' AND cost.eventid IN ( SELECT treatment.treatmentid FROM treatment WHERE treatment.treatmentname = 'pulmonary resection' )",
            "pred_sql": "SELECT DISTINCT cost FROM cost WHERE eventtype = 'treatment'",
            "gold_answer": "[['8.39']]",
            "pred_answer": "[['10.01'], ['4.17'], ['4.33'], ['4.39'], ['4.4'], ['4.64'], ['4.74'], ['4.79'], ['4.8'], ['4.82'], ['4.88'], ['4.92'], ['4.98'], ['5.04'], ['5.06'], ['5.11'], ['5.13'], ['5.14'], ['5.15'], ['5.16'], ['5.17'], ['5.19'], ['5.28'], ['5.29'], ['5.3'], ['5.31'], ['5.32'], ['5.33'], ['5.34'], ['5.35'], ['5.45'], ['5.47'], ['5.48'], ['5.51'], ['5.52'], ['5.55'], ['5.56'], ['5.57'], ['5.58'], ['5.61'], ['5.62'], ['5.64'], ['5.65'], ['5.66'], ['5.69'], ['5.7'], ['5.71'], ['5.72'], ['5.73'], ['5.74'], ['5.75'], ['5.76'], ['5.78'], ['5.79'], ['5.8'], ['5.83'], ['5.84'], ['5.85'], ['5.87'], ['5.88'], ['5.89'], ['5.9'], ['5.91'], ['5.93'], ['5.94'], ['5.96'], ['5.97'], ['5.98'], ['6.0'], ['6.01'], ['6.02'], ['6.03'], ['6.04'], ['6.05'], ['6.06'], ['6.07'], ['6.08'], ['6.09'], ['6.1'], ['6.11'], ['6.12'], ['6.13'], ['6.14'], ['6.15'], ['6.17'], ['6.18'], ['6.19'], ['6.21'], ['6.22'], ['6.23'], ['6.24'], ['6.25'], ['6.26'], ['6.27'], ['6.28'], ['6.29'], ['6.3'], ['6.31'], ['6.32'], ['6.33']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the filter linking cost.eventid to the treatment table for 'pulmonary resection'. It simply selects all treatment costs without constraining to the specific procedure. This is a localized mistake in the WHERE clause (a missing filter) and does not require altering the overall query structure or joins.",
            "error_type": "Local: missing WHERE condition"
        }
    },
    "qwen3": {
        "78e443fce3a9afcc620fcb1d": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What is patient 009-10046's difference in weight from the second value measured on the first hospital visit compared to the first value measured on the first hospital visit?",
            "gold_sql": "SELECT ( SELECT patient.admissionweight FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '009-10046' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) AND patient.admissionweight IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1 OFFSET 1 ) - ( SELECT patient.admissionweight FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '009-10046' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) AND patient.admissionweight IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1 )",
            "pred_sql": "SELECT (dischargeweight - admissionweight) AS weight_difference FROM patient WHERE uniquepid = '009-10046' AND hospitaladmittime = ( SELECT MIN(hospitaladmittime) FROM patient WHERE uniquepid = '009-10046' ) AND admissionweight IS NOT NULL AND dischargeweight IS NOT NULL;",
            "gold_answer": "[['-19.7']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic by using a single patient row\u2019s dischargeweight and admissionweight instead of comparing the first and second admissionweight measurements within the first hospital visit. It ignores the requirement to identify the first hospital visit\u2019s unit stays and select the two admissionweight values in chronological order. This is not a simple typo or missing alias\u2014it alters the entire approach to retrieving and comparing the correct records, so it is a global logical error.",
            "error_type": "Global: logical error"
        }
    }
}