{
    "qwen3": {
        "926c6da813c51e098482d768": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Calculate the total volume of urine catheter for patient 022-1011 on the current icu visit.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-1011' ) AND patient.unitdischargetime IS NULL ) AND intakeoutput.celllabel = 'urine catheter' AND intakeoutput.cellpath LIKE '%output%'",
            "pred_sql": "SELECT SUM(io.cellvaluenumeric) AS total_volume FROM patient p JOIN intakeoutput io ON p.patientunitstayid = io.patientunitstayid WHERE p.uniquepid = '022-1011' AND p.hospitaldischargetime IS NULL AND io.celllabel = 'urine' AND io.cellpath LIKE '%output%';",
            "gold_answer": "[['113237.0']]",
            "pred_answer": "[['100.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL mistakenly filters on p.hospitaldischargetime IS NULL instead of p.unitdischargetime IS NULL (to identify the current ICU visit) and uses io.celllabel = 'urine' instead of the correct 'urine catheter'. These are errors confined to the WHERE clause (wrong column and wrong literal value) and do not alter the overall join structure or query logic.",
            "error_type": "Local: column reference and literal value error"
        },
        "eb89b2270e848e175f5257f9": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Since 1 year ago, what are the top five most frequently tested specimens for patients within 2 months after receiving neurology consultation?",
            "gold_sql": "SELECT T3.culturesite FROM ( SELECT T2.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'neurology consultation' AND datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, microlab.culturesite, microlab.culturetakentime FROM microlab JOIN patient ON microlab.patientunitstayid = patient.patientunitstayid WHERE datetime(microlab.culturetakentime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.culturetakentime AND datetime(T2.culturetakentime) BETWEEN datetime(T1.treatmenttime) AND datetime(T1.treatmenttime,'+2 month') GROUP BY T2.culturesite ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT lab.labname FROM diagnosis JOIN lab ON diagnosis.patientunitstayid = lab.patientunitstayid WHERE diagnosis.diagnosistime >= datetime('2100-12-31 23:59:00', '-1 year') AND diagnosis.diagnosistime <= datetime('2100-12-31 23:59:00') AND diagnosis.diagnosisname LIKE '%neurology%' AND lab.labresulttime >= diagnosis.diagnosistime AND lab.labresulttime <= datetime(diagnosis.diagnosistime, '+2 months') AND lab.labresulttime <= datetime('2100-12-31 23:59:00') GROUP BY lab.labname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['blood, venipuncture'], ['nasopharynx'], ['sputum, expectorated']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely misinterprets the question by using the diagnosis and lab tables rather than the treatment and microlab tables. It treats \u201cneurology consultation\u201d as a diagnosis name and retrieves lab.labname instead of microlab.culturesite. This isn\u2019t a simple typo or local clause mistake but a fundamental change to the query\u2019s structure and intended logic, requiring rewriting multiple joins and clauses to correct it.",
            "error_type": "Global: incorrect table usage"
        }
    },
    "codes": {
        "70857240166bae2581b96559": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the name of the last input that patient 016-10029 had yesterday?",
            "gold_sql": "SELECT intakeoutput.celllabel FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10029' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day') ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT intakeoutput.intakeoutputname FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE intakeoutput.intakeoutputtime LIKE '2100-12-31' AND patient.patientunitstayid = '016-10029' AND intakeoutput.cellpath LIKE '%input%' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "gold_answer": "[['p.o.']]",
            "pred_answer": "no such column: intakeoutput.intakeoutputname",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent column `intakeoutput.intakeoutputname`, causing the error \u201cno such column\u201d. This is a simple column reference mistake and does not affect the overall query structure or logic, so it is a localized error.",
            "error_type": "Local: column reference error"
        },
        "bd2f163685751c157c0c3933": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What is the known allergy of patient 025-13060 since 08/2100?",
            "gold_sql": "SELECT allergy.allergyname FROM allergy WHERE allergy.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-13060' ) ) AND strftime('%Y-%m',allergy.allergytime) >= '2100-08'",
            "pred_sql": "SELECT allergyname FROM allergy WHERE patientunitstayid = '025-13060' AND allergytime >= '2100-08-01 00:00:00'",
            "gold_answer": "[['amoxicillin'], ['cefuroxime'], ['levofloxacin'], ['prednisone']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL directly compares allergy.patientunitstayid to the patient\u2019s unique PID ('025-13060') instead of mapping the unique PID to the correct patientunitstayid via the patient table. This is a mistake in the column used for filtering\u2014it doesn\u2019t require restructuring the entire query logic but simply the correct join/column reference.",
            "error_type": "Local: table/column reference error"
        },
        "7bd605f170632966619d3d2f": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How much does albuterol/ipratropium neb 3ml cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.eventtype = 'medication' AND cost.eventid IN ( SELECT medication.medicationid FROM medication WHERE medication.drugname = 'albuterol/ipratropium neb 3ml' )",
            "pred_sql": "SELECT DISTINCT cost.cost FROM medication INNER JOIN cost ON medication.patientunitstayid = cost.patienthealthsystemstayid WHERE medication.drugname = 'albuterol/ipratropium' AND medication.dosage = '3'",
            "gold_answer": "[['13.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters how cost is associated with the medication. Instead of joining cost.eventid to medication.medicationid and filtering cost.eventtype = 'medication', it joins medication.patientunitstayid to cost.patienthealthsystemstayid. It also misnames the drug and dosage. These join and filter mistakes change the overall query logic and require restructuring the query to correct the relationships, not just a simple typo fix.",
            "error_type": "Global: incorrect join relationship"
        },
        "28d92a418e13b07d795a817c": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was the last total protein test that patient 011-10034 received?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '011-10034' ) ) AND lab.labname = 'total protein' ORDER BY lab.labresulttime DESC LIMIT 1",
            "pred_sql": "SELECT lab.labresulttime FROM lab INNER JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE patient.patientunitstayid = 11110034 AND lab.labname = 'Total Protein' ORDER BY lab.labresulttime DESC LIMIT 1",
            "gold_answer": "[['2100-05-14 08:50:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on patient.patientunitstayid = 11110034 rather than matching the uniquepid = '011-10034' (and even misconstrues the ID format). It also joins on patientunitstayid instead of using the patienthealthsystemstayid/uniquepid relationship required by the ground truth. This is a localized mistake in the WHERE clause (wrong column and value), not a fundamental structural or multi-clause logic issue, so it is a Local error.",
            "error_type": "Local: column reference error"
        },
        "b77c2915664d4f7faa9af936": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When did patient 002-12250 get input since 08/11/2100 for the last time?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-12250' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND strftime('%Y-%m-%d',intakeoutput.intakeoutputtime) >= '2100-08-11' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput INNER JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '002-12250' AND intakeoutput.intakeoutputtime >= '2100-08-11'",
            "gold_answer": "[['2100-09-24 20:00:00']]",
            "pred_answer": "[['2100-09-19 06:38:00'], ['2100-09-19 06:38:00'], ['2100-09-19 09:23:00'], ['2100-09-19 15:00:00'], ['2100-09-19 18:46:00'], ['2100-09-19 18:46:00'], ['2100-09-19 18:50:00'], ['2100-09-19 19:59:00'], ['2100-09-19 22:32:00'], ['2100-09-20 09:00:00'], ['2100-09-20 10:00:00'], ['2100-09-20 13:48:00'], ['2100-09-20 19:59:00'], ['2100-09-20 19:59:00'], ['2100-09-20 20:34:00'], ['2100-09-21 00:00:00'], ['2100-09-21 02:55:00'], ['2100-09-21 02:55:00'], ['2100-09-21 02:55:00'], ['2100-09-21 06:00:00'], ['2100-09-21 09:00:00'], ['2100-09-21 11:00:00'], ['2100-09-21 11:00:00'], ['2100-09-21 11:00:00'], ['2100-09-21 15:29:00'], ['2100-09-21 19:30:00'], ['2100-09-21 19:30:00'], ['2100-09-21 19:30:00'], ['2100-09-22 02:47:00'], ['2100-09-22 11:53:00'], ['2100-09-22 11:53:00'], ['2100-09-22 11:53:00'], ['2100-09-22 14:32:00'], ['2100-09-22 14:32:00'], ['2100-09-22 17:00:00'], ['2100-09-23 05:00:00'], ['2100-09-23 05:00:00'], ['2100-09-23 09:00:00'], ['2100-09-23 23:00:00'], ['2100-09-24 01:55:00'], ['2100-09-24 08:42:00'], ['2100-09-24 19:58:00'], ['2100-09-24 20:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the correct tables (intakeoutput to patient) and filters by patient.uniquepid and date, preserving the overall query structure. However, it fails to restrict to intake events (missing intakeoutput.cellpath LIKE '%intake%') and does not apply ORDER BY intakeoutputtime DESC LIMIT 1 to return only the last time. These are localized mistakes in the WHERE clause and missing ORDER BY/LIMIT rather than a structural or multi-clause logic error.",
            "error_type": "Local: filter/order error"
        },
        "0e04981caf3dfd83794866ec": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Provide the id list of patients who were diagnosed with cerebral subdural hematoma - secondary to anticoagulation since 2100.",
            "gold_sql": "SELECT DISTINCT patient.uniquepid FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'cerebral subdural hematoma - secondary to anticoagulation' AND strftime('%Y',diagnosis.diagnosistime) >= '2100' )",
            "pred_sql": "SELECT patient.patientunitstayid FROM diagnosis INNER JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'cerebral subdural hematoma - secondary to anticoagulation' AND strftime('%Y', diagnosis.diagnosistime) < '2100'",
            "gold_answer": "[['004-13216'], ['016-10104']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s mistakes are confined to column selection and the WHERE condition. They selected patientunitstayid instead of uniquepid, omitted DISTINCT, and reversed the comparison operator (\u2018<\u2019 instead of \u2018>=\u2019). These are local, fixable changes within the SELECT and WHERE clauses and do not require restructuring the overall join or query logic.",
            "error_type": "Local: value parsing error"
        },
        "4cd480a8ce675198f964e1d4": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "After getting diagnosed with change in mental status, how many patients were found to have dka within the same month since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'change in mental status' AND strftime('%Y',diagnosis.diagnosistime) >= '2100' ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'dka' AND strftime('%Y',diagnosis.diagnosistime) >= '2100' ) AS T2 WHERE T1.diagnosistime < T2.diagnosistime AND datetime(T1.diagnosistime,'start of month') = datetime(T2.diagnosistime,'start of month')",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid WHERE strftime('%Y-%m',diagnosis.diagnosistime) > '2099-12' AND diagnosis.diagnosisname = 'Deep vein thrombosis (dVT)'",
            "gold_answer": "[['88.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only uses a single JOIN between patient and diagnosis and filters for 'Deep vein thrombosis (dVT)' after a date, completely omitting the two-step logic of first finding patients diagnosed with 'change in mental status' and then checking for 'dka' within the same month. It also uses the wrong diagnosis name, fails to join two diagnosis subqueries, and does not enforce the same-month constraint correctly. This is a fundamental logic and structure mismatch with the intended query.",
            "error_type": "Global: incorrect query logic"
        },
        "d55a6a2b9901cd04ec7a4599": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the first drug that patient 035-1003 was prescribed on the first hospital visit?",
            "gold_sql": "SELECT medication.drugname FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '035-1003' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) ORDER BY medication.drugstarttime ASC LIMIT 1",
            "pred_sql": "SELECT medication.drugname FROM patient INNER JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE patient.patientunitstayid = '035-1003' ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "gold_answer": "[['azithromycin']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets the patient identifier and hospital visit logic. Instead of finding the first admission for the given uniquepid and then ordering medications by start time, it directly equates patientunitstayid to the string \u2018035-1003\u2019 (which is actually uniquepid) and orders by hospitaladmittime, not drugstarttime. This incorrect join and WHERE clause change the query\u2019s overall structure and intended relationships, so it cannot be fixed by a small tweak in one clause.",
            "error_type": "Global: incorrect join logic"
        },
        "e2e9aeb5439fe4143b0fbcaa": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Find out the number of hospital visits patient 002-13199 had since 1 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.patienthealthsystemstayid ) FROM patient WHERE patient.uniquepid = '002-13199' AND datetime(patient.hospitaladmittime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '002-13199' AND vitalperiodic.observationtime >= date_sub(now(), interval 1 year)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \"1\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL not only uses non-SQLite functions (date_sub, now()) but also joins the patient table to vitalperiodic and filters on observationtime instead of hospitaladmittime. It counts patientunitstayid instead of distinct patienthealthsystemstayid. These mistakes span multiple clauses (JOIN, WHERE, aggregation) and fundamentally change the query\u2019s logic, requiring a major restructuring rather than a small localized fix.",
            "error_type": "Global: incorrect join"
        }
    },
    "dinsql": {
        "9f2f892efa9cbefc44db2be8": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Count the number of patients who were diagnosed with hepatic dysfunction - with cirrhosis but didn't come back to the hospital within the same month since 1 year ago.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hepatic dysfunction - with cirrhosis' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hepatic dysfunction - with cirrhosis' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN patient ON T2.uniquepid = patient.uniquepid WHERE T2.diagnosistime < patient.hospitaladmittime AND datetime(patient.hospitaladmittime) >= datetime('2100-12-31 23:59:00','-1 year') AND datetime(T2.diagnosistime,'start of month') = datetime(patient.hospitaladmittime,'start of month') )",
            "pred_sql": "SELECT COUNT(DISTINCT T1.patientunitstayid) FROM diagnosis AS T1 JOIN patient AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.diagnosisname = 'hepatic dysfunction - with cirrhosis' AND datetime(T1.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') AND NOT EXISTS ( SELECT 1 FROM patient AS T3 WHERE T3.uniquepid = T2.uniquepid AND datetime(T3.hospitaladmittime,'start of month') = datetime(T2.hospitaladmittime,'start of month') AND datetime(T3.hospitaladmittime) > datetime(T2.hospitaldischargetime) )",
            "gold_answer": "[['12.0']]",
            "pred_answer": "[['16.0']]",
            "is_correct": false,
            "reasoning": "1. The predicted SQL counts DISTINCT patientunitstayid instead of DISTINCT uniquepid, changing the aggregation from patients to unit stays.\n2. The NOT EXISTS subquery logic is fundamentally flawed: it compares future hospitaladmittime against the original admission\u2019s discharge and matches months on T2.hospitaladmittime rather than the diagnosis time, breaking the intended \"same month return\" check.\n3. These mistakes alter the overall query logic and patient-level scope, requiring restructuring beyond a simple fix in one clause.\n4. Therefore, this is a Global error affecting the core logic and structure of the query.",
            "error_type": "Global: incorrect join and aggregation logic"
        },
        "a9a5042d9f74ed9c9f7a7f53": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Since 1 year ago, what are the top four most frequently ordered laboratory tests for patients within 2 months after diagnosed with acute respiratory failure - drug related?",
            "gold_sql": "SELECT T3.labname FROM ( SELECT T2.labname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute respiratory failure - drug related' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, lab.labname, lab.labresulttime FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE datetime(lab.labresulttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.labresulttime AND datetime(T2.labresulttime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month') GROUP BY T2.labname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT T2.labname FROM diagnosis AS T1 JOIN lab AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.diagnosisname = 'acute respiratory failure - drug related' AND datetime(T2.labresulttime) <= datetime(T1.diagnosistime, '+2 months') AND datetime(T1.diagnosistime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY T2.labname ORDER BY COUNT(T2.labname) DESC LIMIT 4",
            "gold_answer": "[['bedside glucose'], ['bicarbonate'], ['bun'], ['chloride'], ['creatinine'], ['glucose'], ['hct'], ['hgb'], ['mch'], ['mchc'], ['mcv'], ['platelets x 1000'], ['potassium'], ['rbc'], ['sodium'], ['wbc x 1000']]",
            "pred_answer": "[['bedside glucose'], ['potassium'], ['rbc'], ['wbc x 1000']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict lab events to those occurring after the diagnosis. It only applies an upper bound (labresulttime \u2264 diagnosis time + 2 months) but omits the lower bound (labresulttime \u2265 diagnosis time). This mistake is confined to the WHERE clause and can be corrected by adding that one temporal condition, without restructuring the overall query.",
            "error_type": "Local: missing time\u2010bound condition"
        },
        "447f96c8b5fb35865f80afac": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Calculate the total number of patients diagnosed with hypotension who didn't revisit the hospital within the same month since 1 year ago.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hypotension' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hypotension' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN patient ON T2.uniquepid = patient.uniquepid WHERE T2.diagnosistime < patient.hospitaladmittime AND datetime(patient.hospitaladmittime) >= datetime('2100-12-31 23:59:00','-1 year') AND datetime(T2.diagnosistime,'start of month') = datetime(patient.hospitaladmittime,'start of month') )",
            "pred_sql": "SELECT COUNT(DISTINCT T1.uniquepid) FROM patient AS T1 JOIN diagnosis AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T2.diagnosisname = 'hypotension' AND datetime(T1.hospitaladmittime) >= datetime('2100-12-31 23:59:00', '-1 year') AND NOT EXISTS ( SELECT 1 FROM patient AS T3 WHERE T3.uniquepid = T1.uniquepid AND datetime(T3.hospitaladmittime, 'start of month') = datetime(T1.hospitaladmittime, 'start of month') AND T3.patientunitstayid != T1.patientunitstayid AND datetime(T3.hospitaladmittime) >= datetime('2100-12-31 23:59:00', '-1 year') )",
            "gold_answer": "[['68.0']]",
            "pred_answer": "[['59.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic across multiple clauses: it filters on patient.hospitaladmittime instead of diagnosis.diagnosistime, and its NOT EXISTS subquery checks for other hospital admissions in the same month rather than revisits based on diagnosis time. These errors involve the overall join strategy and core query structure rather than a single clause, so they require major restructuring to fix.",
            "error_type": "Global: incorrect logical structure"
        }
    },
    "macsql": {
        "83bdb7920140393376346beb": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "How many patients were 1 ml crtrdg-ndl : lorazepam 2 mg/ml ij soln prescribed within the same hospital visit after undergoing vasopressors treatment?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'vasopressors' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugstarttime, patient.patienthealthsystemstayid FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = '1 ml crtrdg-ndl : lorazepam 2 mg/ml ij soln' ) AS T2 WHERE T1.treatmenttime < T2.drugstarttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid",
            "pred_sql": "SELECT COUNT(DISTINCT P.uniquepid) AS num_patients FROM patient AS P JOIN ( SELECT DISTINCT T1.patientunitstayid FROM treatment AS T1 WHERE T1.treatmentname LIKE '%vasopressors%' ) AS V ON P.patientunitstayid = V.patientunitstayid JOIN ( SELECT DISTINCT T2.patientunitstayid FROM medication AS T2 WHERE T2.drugname = '1 ml crtrdg-ndl : lorazepam 2 mg/ml ij soln' ) AS M ON P.patientunitstayid = M.patientunitstayid WHERE P.patienthealthsystemstayid = ( SELECT patienthealthsystemstayid FROM patient WHERE patientunitstayid = V.patientunitstayid LIMIT 1 )",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL drops the critical temporal condition (vasopressors treatment before lorazepam prescription) and, more fundamentally, joins the two events on patientunitstayid rather than patienthealthsystemstayid as required by \u201csame hospital visit.\u201d This wrong join key alters the overall logic (events may be linked within the same ICU stay rather than the broader hospital stay) and thus requires restructuring the query. Therefore, this is a global error.",
            "error_type": "Global: incorrect join relationship"
        },
        "e15ae33a051b2d3894c4e0e7": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Can you provide the top three most frequent lab tests that within the same month took place after patients had antihypertensive drug - labetalol?",
            "gold_sql": "SELECT T3.labname FROM ( SELECT T2.labname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'antihypertensive drug - labetalol' ) AS T1 JOIN ( SELECT patient.uniquepid, lab.labname, lab.labresulttime FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.labresulttime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.labresulttime,'start of month') GROUP BY T2.labname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT T3.labname FROM ( SELECT T2.labname, COUNT(*) AS frequency FROM lab AS T2 JOIN medication AS T1 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.drugname LIKE '%labetalol%' AND datetime(T2.labresulttime, 'start of month') = datetime(T1.drugstarttime, 'start of month') AND T2.labresulttime > T1.drugstarttime GROUP BY T2.labname ) AS T3 ORDER BY T3.frequency DESC LIMIT 3",
            "gold_answer": "[['anion gap'], ['bedside glucose'], ['bicarbonate'], ['bun'], ['calcium'], ['chloride'], ['creatinine'], ['fio2'], ['glucose'], ['hct'], ['hgb'], ['lpm o2'], ['potassium'], ['sodium']]",
            "pred_answer": "[['bedside glucose'], ['glucose'], ['potassium']]",
            "is_correct": false,
            "reasoning": "The predicted query is joining the lab table to the medication table and filtering on medication.drugname rather than the treatment table\u2019s treatmentname = 'antihypertensive drug - labetalol'. This is a mistaken table/column reference (using medication.drugname and drugstarttime instead of treatment.treatmentname and treatmenttime). It can be fixed by swapping medication for treatment and using the correct columns without restructuring the rest of the query logic. Hence it\u2019s a local error.",
            "error_type": "Local: table reference error"
        }
    },
    "deepseek": {
        "fe732f7c6bbbbe8bc019c631": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the last mch result for patient 007-10076 on the first hospital visit?",
            "gold_sql": "SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '007-10076' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) AND lab.labname = 'mch' ORDER BY lab.labresulttime DESC LIMIT 1",
            "pred_sql": "SELECT (SELECT MAX(labresult) FROM lab WHERE patientunitstayid = (SELECT patientunitstayid FROM patient WHERE uniquepid = '007-10076' ORDER BY hospitaladmittime LIMIT 1) AND labname = 'mch') AS last_mch_result;",
            "gold_answer": "[['30.6']]",
            "pred_answer": "[['28.9']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic for identifying the patient\u2019s first hospital visit and the last \u201cmch\u201d result. Instead of: 1) filtering for a completed (discharged) first hospital visit via hospitaldischargetime IS NOT NULL and 2) ordering lab results by labresulttime, it simply picks the earliest patient.admission record (regardless of discharge) and uses MAX(labresult) (the highest numeric value) rather than the most recent by timestamp. These alterations span multiple clauses (the visit\u2010selection subquery and the aggregation of lab results) and thus require restructuring the overall query, not just a minor tweak within one clause.",
            "error_type": "Global: incorrect visit selection and aggregation logic"
        },
        "6ddde3148b0940760c8a3a29": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What are the five most commonly received procedures for patients in their 60 or above since 2100?",
            "gold_sql": "SELECT T1.treatmentname FROM ( SELECT treatment.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age >= 60 ) AND strftime('%Y',treatment.treatmenttime) >= '2100' GROUP BY treatment.treatmentname ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "WITH procedures AS ( SELECT t.treatmentname AS procedure_name FROM treatment t INNER JOIN patient p ON t.patientunitstayid = p.patientunitstayid WHERE CAST(p.age AS INT) >= 60 AND datetime(t.treatmenttime) >= datetime('2100-01-01') UNION SELECT m.drugname AS procedure_name FROM medication m INNER JOIN patient p ON m.patientunitstayid = p.patientunitstayid WHERE CAST(p.age AS INT) >= 60 AND datetime(m.drugstarttime) >= datetime('2100-01-01') ) SELECT procedure_name, COUNT(DISTINCT patientunitstayid) AS count FROM ( SELECT procedure_name, patientunitstayid FROM procedures ) AS combined GROUP BY procedure_name ORDER BY count DESC LIMIT 5;",
            "gold_answer": "[['antiplatelet agent - aspirin'], ['cardiology consultation'], ['chest x-ray'], ['mechanical ventilation'], ['stress ulcer prophylaxis - pantoprazole']]",
            "pred_answer": "no such column: patientunitstayid",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references patientunitstayid in the outer query, but the CTE \u201cprocedures\u201d only selects procedure_name (no patientunitstayid). This is a column reference error localized to the SELECT/CTE definition and can be fixed by including patientunitstayid in the CTE. It does not affect overall query structure or logic beyond this missing column.",
            "error_type": "Local: column reference error"
        },
        "9fe9bada749eb3c056c66c6a": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Show me the total amount of patient 021-101378's output on the first icu visit.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '021-101378' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1 ) AND intakeoutput.cellpath LIKE '%output%'",
            "pred_sql": "SELECT SUM(io.cellvaluenumeric) AS total_output FROM patient p JOIN intakeoutput io ON p.patientunitstayid = io.patientunitstayid WHERE p.uniquepid = '021-101378' AND io.cellpath LIKE '%output%' AND io.intakeoutputtime >= p.unitadmittime AND io.intakeoutputtime <= p.unitdischargetime ORDER BY p.unitadmittime ASC LIMIT 1;",
            "gold_answer": "[['22735.0']]",
            "pred_answer": "[['3685.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes how the \u201cfirst ICU visit\u201d is identified and how the output records are selected. Instead of isolating exactly one patientunitstayid via a sub\u2010query that filters on unitdischargetime IS NOT NULL and orders by unitadmittime, it simply joins patient to intakeoutput and tries to bound on time conditions. This not only omits the IS NOT NULL filter but also misuses ORDER BY/LIMIT on an aggregated result, altering the intended logic of selecting the first ICU visit. Fixing this would require restructuring the query (correct sub\u2010query or join conditions), not just tweaking a single clause. Hence, it is a global error.",
            "error_type": "Global: incorrect join strategy"
        }
    },
    "llm2sql": {
        "8b621dd46769f8f16098888e": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What was the change in chloride in patient 021-105581 last measured on the last hospital visit compared to the second to last value measured on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '021-105581' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'chloride' ORDER BY lab.labresulttime DESC LIMIT 1 ) - ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '021-105581' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'chloride' ORDER BY lab.labresulttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT MAX(LAB2.LABRESULT) - MIN(LAB2.LABRESULT) AS chloride_change FROM (SELECT LAB.labresult FROM lab LAB JOIN patient PAT ON LAB.patientunitstayid = PAT.patientunitstayid WHERE PAT.uniquepid = '021-105581' AND PAT.hospitaldischargetime IS NOT NULL AND LAB.labname = 'chloride' ORDER BY LAB.labresulttime DESC LIMIT 2) AS LAB2;",
            "gold_answer": "[['-1.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls the two most recent chloride values but then uses MAX() - MIN(), which computes an absolute difference rather than \u201cmost recent minus second most recent.\u201d This is a mistake confined to the SELECT clause\u2019s aggregation logic and does not alter joins or overall query structure. Hence, it\u2019s a local error.",
            "error_type": "Local: aggregate misuse"
        }
    }
}