{
    "codes": {
        "3a01a286c1423e44aaa74e8b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What drug was patient 022-102442 allergic to last month?",
            "gold_sql": "SELECT allergy.drugname FROM allergy WHERE allergy.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-102442' ) ) AND datetime(allergy.allergytime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT allergy.drugname FROM allergy INNER JOIN patient ON patient.patientunitstayid = allergy.patientunitstayid WHERE strftime('%Y %m', allergy.allergytime) = '2100 12' AND patient.uniquepid = '022-102442'",
            "gold_answer": "[['codeine phosphate'], ['lidocaine'], ['penicillin g benzathine']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly joins allergy to patient and filters by the given patient identifier, but it applies the wrong date filter for \"last month.\" Instead of using the 'start of month','-1 month' logic to target November (the month before the fixed current date of 2100-12-31), it hardcodes strftime('%Y %m') = '2100 12', which erroneously selects December. This is a mistake localized to the WHERE clause\u2019s temporal condition and does not require restructuring the query\u2019s overall logic or joins.",
            "error_type": "Local: temporal filter error"
        },
        "686a213e1cb9376d7b32c3c5": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What is the cost of a diagnosis of upper gi bleeding - esophageal?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.eventtype = 'diagnosis' AND cost.eventid IN ( SELECT diagnosis.diagnosisid FROM diagnosis WHERE diagnosis.diagnosisname = 'upper gi bleeding - esophageal' )",
            "pred_sql": "SELECT sum(cost.cost) FROM diagnosis INNER JOIN cost ON diagnosis.patientunitstayid = cost.patientunitstayid WHERE diagnosis.diagnosisname = 'upper gastrointestinal bleeding - esophageal'",
            "gold_answer": "[['7.86']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended logic by joining cost and diagnosis on patientunitstayid rather than filtering cost.eventid to match diagnosis.diagnosisid and cost.eventtype = 'diagnosis'. It also uses the wrong diagnosis name text ('upper gastrointestinal bleeding - esophageal' vs 'upper gi bleeding - esophageal'), resulting in no matching rows. These issues span multiple parts of the query (JOIN condition, WHERE filters) and change the query\u2019s meaning, requiring a restructuring of the query logic.",
            "error_type": "Global: join error"
        },
        "fb6a8797ebccfafdd3738ab6": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the name of the lab test that patient 027-101119 first had since 03/2100?",
            "gold_sql": "SELECT lab.labname FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-101119' ) ) AND strftime('%Y-%m',lab.labresulttime) >= '2100-03' ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT lab.labname FROM cost INNER JOIN patient ON cost.patienthealthsystemstayid = patient.patienthealthsystemstayid INNER JOIN lab ON lab.patientunitstayid = patient.patientunitstayid WHERE cost.eventtype = 'lab' AND patient.uniquepid = '027-101119' AND datetime(cost.eventtime) >= datetime('2100 03 21 23:59:00') LIMIT 1",
            "gold_answer": "[['ptt ratio']]",
            "pred_answer": "no such column: cost.eventtime",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it references a non-existent column cost.eventtime. This is a localized error in the SELECT\u2019s WHERE clause (a column reference issue), not a fundamental flaw in the overall query structure or join logic. It can be fixed by correcting the column name without restructuring the query\u2019s logic.",
            "error_type": "Local: table/column reference error"
        },
        "7f90858ee680596075465460": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many patients received a lithium lab test since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT lab.patientunitstayid FROM lab WHERE lab.labname = 'lithium' AND strftime('%Y',lab.labresulttime) >= '2100' )",
            "pred_sql": "SELECT count(patientunitstayid) FROM lab WHERE labname LIKE '%lithium%' AND labresulttime LIKE '2100%'",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts the number of lab rows (tests) rather than the distinct number of patients, and it omits the DISTINCT and patient table reference entirely. It thus fundamentally changes the metric being computed (tests vs. patients) and requires restructuring the aggregation logic, not just a minor fix in one clause. This spans multiple parts of the query (FROM, SELECT, aggregation), altering the overall intent.",
            "error_type": "Global: aggregation error"
        },
        "a29b614ffd343441d111352e": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was the first time that patient 013-10167 has been prescribed medication via the rectal route on the current hospital visit?",
            "gold_sql": "SELECT medication.drugstarttime FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '013-10167' AND patient.hospitaldischargetime IS NULL ) ) AND medication.routeadmin = 'rectal' ORDER BY medication.drugstarttime ASC LIMIT 1",
            "pred_sql": "SELECT medication.drugstarttime FROM patient INNER JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE patient.patientunitstayid = '013-10167' AND medication.routeadmin = 'oral' AND medication.drugname = 'clonidine hydrochloride 100 mg'",
            "gold_answer": "[['2100-12-15 07:28:06']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails to capture the overall structure required: it joins patient to medication but then filters patientunitstayid directly against the patient\u2019s unique ID, uses the wrong route ('oral' instead of 'rectal'), adds an extraneous drug filter, and omits the subquery logic to restrict to the current hospital visit. These issues span multiple clauses (JOIN, WHERE) and break the intended logic of identifying the current visit and the correct route, so they cannot be fixed by a minor local tweak.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "f88531ecfa6e7dda7883c2ed": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Calculate the number of patients diagnosed with acute copd exacerbation - with baseline hypoxemia in 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'acute copd exacerbation - with baseline hypoxemia' AND strftime('%Y',diagnosis.diagnosistime) = '2100' )",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid WHERE diagnosis.diagnosisname = 'acute copd exacerbation' AND substr(diagnosis.diagnosistime, 1, 4) = '2100'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['191.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s JOIN structure is acceptable, but the WHERE clause is incorrect in two local ways: it filters for diagnosisname = 'acute copd exacerbation' instead of the full 'acute copd exacerbation - with baseline hypoxemia', and it uses count(patient.patientunitstayid) instead of COUNT(DISTINCT patient.uniquepid). These mistakes are confined to the SELECT/WHERE clause and do not alter the overall query logic or table relationships.",
            "error_type": "Local: incorrect WHERE value and missing DISTINCT"
        },
        "eefab9f6dfaede81ed784769": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What new medications were introduced for patient 015-100025 today compared to the ones yesterday?",
            "gold_sql": "SELECT medication.drugname FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.uniquepid = '015-100025' ) AND datetime(medication.drugstarttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT medication.drugname FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.uniquepid = '015-100025' ) AND datetime(medication.drugstarttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT count(CASE WHEN datetime(drugstarttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day') THEN drugname ELSE NULL END) - count(CASE WHEN datetime(drugstarttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day') THEN drugname ELSE NULL END) FROM medication WHERE patientunitstayid = 150025",
            "gold_answer": "[['1 ml : heparin sodium (porcine) 5000 unit/ml ij soln'], ['1 ml vial : heparin sodium (porcine) 1000 unit/ml ij soln'], ['100 ml flex cont : sodium chloride 0.9 % iv soln'], ['acetaminophen 325 mg po tabs'], ['alteplase 2 mg ij solr'], ['aspirin 81 mg po chew'], ['atorvastatin calcium 40 mg po tabs'], ['carvedilol 25 mg po tabs'], ['diphenhydramine hcl 25 mg po caps'], ['famotidine 20 mg po tabs'], ['nitroglycerin 0.4 mg sl subl'], ['oxycodone hcl 5 mg po tabs'], ['prednisone 10 mg po tabs'], ['prednisone 20 mg po tabs'], ['prednisone 5 mg po tabs'], ['sodium chloride 0.9 % ij soln']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the intended set-difference logic into an aggregate count subtraction, never filters for today (uses yesterday\u2019s date twice), and even uses a hard\u2010coded patientunitstayid instead of mapping uniquepid. This goes beyond a simple column or filter mistake and requires rewriting multiple clauses (SELECT, WHERE, the core logic). It is a global error in the overall query structure and logic.",
            "error_type": "Global: flawed query logic"
        }
    },
    "qwen3": {
        "b11dccba6b0fa27a522c37c7": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What drug was patient 013-10215 prescribed with after the diagnosis of bone fracture(s) - left lower extremity within 2 days this month?",
            "gold_sql": "SELECT T2.drugname FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '013-10215' ) AND diagnosis.diagnosisname = 'bone fracture(s) - left lower extremity' AND datetime(diagnosis.diagnosistime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '013-10215' ) AND datetime(medication.drugstarttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND datetime(T2.drugstarttime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 day')",
            "pred_sql": "SELECT DISTINCT m.drugname FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE p.uniquepid = '013-10215' AND d.diagnosisname = 'bone fracture(s) - left lower extremity' AND strftime('%Y-%m', d.diagnosistime) = '2100-12' AND m.drugstarttime >= d.diagnosistime AND m.drugstarttime <= datetime(d.diagnosistime, '+2 days') AND strftime('%Y-%m', m.drugstarttime) = '2100-12' AND d.diagnosistime = ( SELECT MIN(diagnosistime) FROM diagnosis WHERE patientunitstayid = p.patientunitstayid AND diagnosisname = 'bone fracture(s) - left lower extremity' AND strftime('%Y-%m', diagnosistime) = '2100-12' );",
            "gold_answer": "[['bisacodyl'], ['bisacodyl'], ['bisacodyl'], ['bisacodyl'], ['cefazolin'], ['diphenhydramine'], ['diphenhydramine'], ['docusate sodium'], ['docusate sodium'], ['enoxaparin'], ['enoxaparin'], ['enoxaparin'], ['flumazenil'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['hydromorphone'], ['hydromorphone'], ['hydromorphone'], ['ipratropium-albuterol'], ['naloxone'], ['naloxone'], ['naloxone'], ['ondansetron'], ['ondansetron'], ['ondansetron'], ['pantoprazole']]",
            "pred_answer": "[['bisacodyl'], ['bisacodyl'], ['cefazolin'], ['diphenhydramine'], ['docusate sodium'], ['enoxaparin'], ['hydrocodone-acetaminophen'], ['hydrocodone-acetaminophen'], ['naloxone'], ['ondansetron'], ['pantoprazole']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a filter limiting the diagnosis time to the earliest (\u2018MIN(diagnosistime)\u2019) within the month, whereas the ground truth considers all diagnosis events in that month and all corresponding medications within two days of each. This change alters the overall logic\u2014omitting drugs tied to later diagnoses\u2014and is not a simple typo or localized syntax issue but a structural/logical difference across clauses.",
            "error_type": "Global: incorrect logical filter on diagnosis time"
        },
        "51480a6db604cb44753b4aa9": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "How many patients received a bronchodilator procedure within the same hospital visit after having a non-invasive ventilation procedure this year?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'non-invasive ventilation' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'bronchodilator' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 WHERE T1.treatmenttime < T2.treatmenttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) FROM treatment t1 JOIN treatment t2 ON t1.patientunitstayid = t2.patientunitstayid AND t2.treatmenttime > t1.treatmenttime JOIN patient p ON t1.patientunitstayid = p.patientunitstayid WHERE t1.treatmentname LIKE '%non-invasive ventilation%' AND t2.treatmentname LIKE '%bronchodilator%' AND strftime('%Y', t1.treatmenttime) = '2100';",
            "gold_answer": "[['10.0']]",
            "pred_answer": "[['23.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict the bronchodilator events to the current year and furthermore joins treatments on patientunitstayid rather than patienthealthsystemstayid (hospital visit). This changes the query\u2019s core logic for \u201csame hospital visit\u201d and miscounts patients. These are not simple typos but fundamental join and filtering mistakes that require restructuring the query, so it is a global error.",
            "error_type": "Global: incorrect join and filter logic"
        }
    },
    "o4-mini": {
        "3ad2a55fd673d7cc2caa8aca": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What is the 6-month survival rate of patients who received 1 ml vial : diphenhydramine hcl 50 mg/ml ij soln treatment following a s/p embolectomy diagnosis?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 2 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 's/p embolectomy' AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 2 GROUP BY patient.uniquepid HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = '1 ml vial : diphenhydramine hcl 50 mg/ml ij soln' GROUP BY patient.uniquepid HAVING MAX(medication.drugstarttime) = medication.drugstarttime ) AS T2 ON T1.uniquepid = T2.uniquepid JOIN patient ON T1.uniquepid = patient.uniquepid WHERE T1.diagnosistime < T2.drugstarttime GROUP BY patient.uniquepid HAVING MAX(patient.hospitaldischargetime) = patient.hospitaldischargetime )",
            "pred_sql": "WITH first_embo AS ( SELECT patientunitstayid, MIN(diagnosistime) AS first_diag_time FROM diagnosis WHERE diagnosisname = 's/p embolectomy' GROUP BY patientunitstayid ), med_after_embo AS ( SELECT DISTINCT e.patientunitstayid, e.first_diag_time FROM first_embo e JOIN medication m ON m.patientunitstayid = e.patientunitstayid AND m.drugname = '1 ml - diphenhydramine hcl 50 mg/ml ij soln' AND m.drugstarttime >= e.first_diag_time ), survival_calc AS ( SELECT m.patientunitstayid, p.hospitaldischargetime, p.hospitaldischargestatus, m.first_diag_time, CASE WHEN p.hospitaldischargestatus = 'expired' AND datetime(p.hospitaldischargetime) <= datetime(m.first_diag_time, '+183 days') THEN 0 ELSE 1 END AS survived FROM med_after_embo m JOIN patient p ON p.patientunitstayid = m.patientunitstayid ) SELECT AVG(survived) AS six_month_survival_rate FROM survival_calc;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to match any medication records because the drug name literal in the WHERE clause is incorrect ('1 ml - diphenhydramine...' instead of '1 ml vial : diphenhydramine...'), leading to no rows in the CTE and ultimately NULL for the average. This is a localized mistake in the WHERE condition\u2019s string literal and can be corrected without changing the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "3162159f1b6f94c9ef55cdaf": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Among the patients who are prescribed mag/alum hydroxide w/simeth, what are the five most frequently prescribed drugs at the same time since 2100?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'mag/alum hydroxide w/simeth' AND strftime('%Y',medication.drugstarttime) >= '2100' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid AND strftime('%Y',medication.drugstarttime) >= '2100' ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE datetime(T1.drugstarttime) = datetime(T2.drugstarttime) GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT med2.drugname FROM medication AS med1 JOIN medication AS med2 ON med1.patientunitstayid = med2.patientunitstayid AND med1.drugname = 'mag/alum hydroxide w/simeth' AND med2.drugname <> 'mag/alum hydroxide w/simeth' AND med1.drugstarttime >= '2100-01-01 00:00:00' AND med2.drugstarttime >= '2100-01-01 00:00:00' AND med1.drugstarttime <= med2.drugstoptime AND med2.drugstarttime <= med1.drugstoptime GROUP BY med2.drugname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['hydralazine'], ['mag/alum hydroxide w/simeth'], ['morphine'], ['ondansetron (pf)'], ['oxycodone']]",
            "pred_answer": "[['amiodarone'], ['morphine'], ['oxycodone'], ['pantoprazole'], ['potassium chloride']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic of how \u201cat the same time\u201d is determined. Instead of matching exact drugstarttime timestamps between mag/alum hydroxide w/simeth prescriptions and other drugs (as in the ground truth), it uses an interval overlap condition (med1.drugstarttime <= med2.drugstoptime AND med2.drugstarttime <= med1.drugstoptime). This alters the join relationship and thus the intended meaning of \u201cat the same time.\u201d Fixing this requires rewriting the join condition, not just correcting a column or alias, so it is a global error in join strategy.",
            "error_type": "Global: join strategy error"
        },
        "cbe78f4e084965ff31d7fb0b": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many hours have gone by since the first instance patient 033-10044 had a intake, oral amount input on the current icu visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',intakeoutput.intakeoutputtime) ) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '033-10044' ) AND patient.unitdischargetime IS NULL ) AND intakeoutput.celllabel = 'intake, oral amount' AND intakeoutput.cellpath LIKE '%intake%' ORDER BY intakeoutput.intakeoutputtime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', (SELECT MIN(io.intakeoutputtime) FROM intakeoutput io JOIN patient p ON io.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '033-10044' AND p.unitdischargetime IS NULL AND io.cellpath LIKE '%intake%' AND io.celllabel = 'p.o.' ) )) / 3600.0 AS hours_since;",
            "gold_answer": "[['16.483']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the wrong celllabel value ('p.o.') instead of the correct 'intake, oral amount'. This is a localized mistake in the WHERE clause literal, causing no rows to match. The overall query structure\u2014joining intakeoutput to patient, filtering for current ICU stay, selecting the minimum time, and computing the hour difference\u2014is otherwise correct according to the question. Hence, this is a local error confined to a specific value mismatch in the query.",
            "error_type": "Local: value parsing error"
        }
    },
    "llm2sql": {
        "b6aa0837ace2e0c90b15be92": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "Tell me the top four most frequent procedures that patients received within the same hospital visit after being diagnosed with encephalitis - viral.",
            "gold_sql": "SELECT T3.treatmentname FROM ( SELECT T2.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'encephalitis - viral' ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmentname, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.treatmenttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.treatmentname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT T.treatmentname, COUNT(*) as frequency FROM diagnosis D JOIN treatment T ON D.patientunitstayid = T.patientunitstayid WHERE D.diagnosisname = 'encephalitis - viral' AND T.treatmenttime > D.diagnosistime GROUP BY T.treatmentname ORDER BY frequency DESC LIMIT 4;",
            "gold_answer": "[['anticonvulsant - levetiracetam'], ['antiviral therapy - acyclovir'], ['eeg'], ['electrolyte administration - intravenous'], ['electrolyte administration - oral'], ['electrolyte administration - potassium'], ['foley catheter'], ['head ct scan'], ['insulin - continuous infusion'], ['lumbar puncture'], ['mri - head'], ['normal saline administration'], ['oxygen therapy (< 40%) - nasal cannula'], ['vasodilating agent - iv - nicardipine'], ['vte prophylaxis - compression boots'], ['vte prophylaxis - conventional heparin therapy']]",
            "pred_answer": "[['insulin - continuous infusion', '9.0'], ['lumbar puncture', '9.0'], ['oxygen therapy (< 40%) - nasal cannula', '9.0'], ['vte prophylaxis - compression boots', '9.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins diagnosis and treatment on patientunitstayid, but the question asks for procedures within the same hospital visit (patienthealthsystemstayid). By using the wrong key, it fundamentally alters the data association and scope of the query. This is not a simple column typo\u2014it changes the join logic and requires restructuring the query to use patienthealthsystemstayid, so it is a global logic error.",
            "error_type": "Global: incorrect join key"
        },
        "90fc8fa81b2c8abb424a1228": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What are the four most frequently prescribed medications for patients in their 20s?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age BETWEEN 20 AND 29 ) GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT medication.drugname, COUNT(medication.medicationid) AS frequency FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE CAST(patient.age AS INTEGER) BETWEEN 20 AND 29 GROUP BY medication.drugname ORDER BY frequency DESC LIMIT 4;",
            "gold_answer": "[['100 ml flex cont : sodium chloride 0.9 % iv soln'], ['acetaminophen 325 mg po tabs'], ['ativan'], ['potassium chloride'], ['sodium chloride 0.9 % ij soln'], ['sodium chloride 0.9 % iv : 1000 ml']]",
            "pred_answer": "[['100 ml flex cont : sodium chloride 0.9 % iv soln', '17.0'], ['ativan', '22.0'], ['potassium chloride', '18.0'], ['sodium chloride 0.9 % iv : 1000 ml', '15.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly joins medication with patient and filters ages 20\u201329, and it groups and orders by prescription count. However, it uses ORDER BY \u2026 LIMIT 4 instead of applying DENSE_RANK() to include ties for the fourth rank, and it returns both drug names and their counts rather than only the drug names. These are errors localized to the SELECT and ranking logic without altering table relationships or the overall query structure.",
            "error_type": "Local: top-N ranking and projection error"
        }
    },
    "macsql": {
        "dd2fff7121aa317a158ef08e": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What are the ways to consume 150 ml flex cont : levofloxacin in d5w 5 mg/ml iv soln?",
            "gold_sql": "SELECT DISTINCT medication.routeadmin FROM medication WHERE medication.drugname = '150 ml flex cont : levofloxacin in d5w 5 mg/ml iv soln'",
            "pred_sql": "SELECT DISTINCT `routeadmin` FROM medication WHERE `drugname` = 'flex cont : levofloxacin in d5w 5 mg/ml iv soln' AND `dosage` LIKE '150%'",
            "gold_answer": "[['intravenous']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the WHERE clause: it splits the drug identification into drugname and dosage rather than matching the full drugname string. This incorrect filter condition does not require restructuring the query\u2014only fixing the WHERE clause\u2014so it is a localized error.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "e172b906c0bf5c99f8e7c2db": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Since patient 022-1011's admission to the icu, how many days have passed?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',patient.unitadmittime) ) FROM patient WHERE patient.uniquepid = '022-1011' AND patient.unitdischargetime IS NULL",
            "pred_sql": "SELECT julianday('2100-12-31') - julianday( (SELECT `unitadmittime` FROM patient WHERE `uniquepid` = '022-1011') ) AS days_passed",
            "gold_answer": "[['32.806']]",
            "pred_answer": "[['31.806']]",
            "is_correct": false,
            "reasoning": "The predicted SQL diverges from the ground truth only in how it handles the date literal and omits the explicit filter on unitdischargetime. Instead of using the prescribed fixed datetime '2100-12-31 23:59:00' and strftime('%J', \u2026) to compute the Julian day difference, it uses julianday('2100-12-31'), which refers to midnight of that day. This single change in the datetime literal shifts the result by approximately one day (31.806 vs. 32.806) but does not alter the query\u2019s overall structure or joins. All other logic\u2014selecting admission time and calculating a difference\u2014remains intact. Therefore, this is a localized date/time formatting error, fixable by adjusting the literal to include the time component or using strftime as specified.",
            "error_type": "Local: other-temporal error"
        }
    },
    "dinsql": {
        "3f431d7d1d47504f2a86e31f": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What time was patient 002-12407 prescribed a drug for the last time on the last hospital visit?",
            "gold_sql": "SELECT medication.drugstarttime FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-12407' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) ORDER BY medication.drugstarttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(drugstoptime) FROM medication WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '002-12407' AND hospitaldischargetime IS NOT NULL ORDER BY hospitaladmittime DESC LIMIT 1 )",
            "gold_answer": "[['2100-03-05 00:00:40']]",
            "pred_answer": "[['2100-03-06 21:43:40']]",
            "is_correct": false,
            "reasoning": "The predicted SQL locally mis\u2010references the wrong time column: it uses MAX(drugstoptime) instead of retrieving medication.drugstarttime. This is confined to the SELECT expression and does not alter the overall query structure or join logic, so it\u2019s a localized column reference error.",
            "error_type": "Local: column reference error"
        },
        "9914665feeedfe52a91bea48": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What are the four most frequent drugs prescribed to male patients with an age of 60 or above within the same month after they are diagnosed with anemia - anemia of critical illness?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'anemia - anemia of critical illness' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.gender = 'male' AND patient.age >= 60 ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND datetime(T1.diagnosistime,'start of month') = datetime(T2.drugstarttime,'start of month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH AnemiaPatients AS ( SELECT DISTINCT p.patientunitstayid FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid WHERE p.gender = 'male' AND CAST(p.age AS INTEGER) >= 60 AND d.diagnosisname = 'anemia - anemia of critical illness' ), PrescribedDrugs AS ( SELECT m.drugname FROM medication m JOIN AnemiaPatients ap ON m.patientunitstayid = ap.patientunitstayid JOIN diagnosis d ON m.patientunitstayid = d.patientunitstayid WHERE d.diagnosisname = 'anemia - anemia of critical illness' AND datetime(d.diagnosistime, 'start of month') = datetime(m.drugstarttime, 'start of month') ) SELECT drugname FROM ( SELECT drugname, COUNT(*) as freq, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) as rnk FROM PrescribedDrugs GROUP BY drugname ) ranked_drugs WHERE rnk <= 4",
            "gold_answer": "[['albumin human 5%'], ['ativan'], ['dextrose 50% abboject'], ['haldol'], ['morphine'], ['normal saline flush']]",
            "pred_answer": "[['albumin human 5%'], ['ativan'], ['dextrose 50% abboject'], ['normal saline flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the crucial condition requiring the medication start time to occur after the diagnosis time (T1.diagnosistime < T2.drugstarttime). This is a missing WHERE\u2010clause filter rather than a flaw in the overall query structure, joins, or ranking logic. Adding the time-comparison condition back into the WHERE clause would align the predicted SQL with the intended logic, so the error is confined to a specific part of the query.",
            "error_type": "Local: missing time comparison filter"
        }
    },
    "deepseek": {
        "e978805c9c431a4af765f712": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the total number of patients who were discharged from the hospital this year?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.hospitaldischargetime IS NOT NULL AND datetime(patient.hospitaldischargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year')",
            "pred_sql": "SELECT COUNT(*) FROM patient WHERE STRFTIME('%Y', hospitaldischargetime) = '2100';",
            "gold_answer": "[['1367.0']]",
            "pred_answer": "[['1758.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(*) instead of COUNT(DISTINCT patient.uniquepid) and does not explicitly check for hospitaldischargetime IS NOT NULL. This is a localized mistake in the aggregation and DISTINCT usage rather than a structural or multi-clause logic error. The overall query structure and filtering logic remain intact, but the aggregation is incorrect.",
            "error_type": "Local: aggregation error"
        },
        "40448499d12131f2f43e9143": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "On the last hospital visit, what was patient 016-10349's last weight measurement?",
            "gold_sql": "SELECT patient.admissionweight FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10349' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) AND patient.admissionweight IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1",
            "pred_sql": "SELECT dischargeweight FROM patient WHERE uniquepid = '016-10349' ORDER BY hospitaladmittime DESC LIMIT 1;",
            "gold_answer": "[['121.56']]",
            "pred_answer": "[['121.6']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the wrong column (dischargeweight) instead of the required admissionweight. This is a localized mistake in the SELECT clause\u2019s column reference and does not affect the overall query structure or logic.",
            "error_type": "Local: column reference error"
        }
    }
}