{
    "deepseek": {
        "51f6daf8480d7c0ebaa6546b": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Keep count of how many times patient 027-101164 outputted urine since 02/2100.",
            "gold_sql": "SELECT COUNT(*) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-101164' ) ) AND intakeoutput.cellpath LIKE '%output%' AND intakeoutput.celllabel = 'urine' AND strftime('%Y-%m',intakeoutput.intakeoutputtime) >= '2100-02'",
            "pred_sql": "SELECT COUNT(*) FROM intakeoutput JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE patient.patient = '027-101164' AND celllabel = 'urine' AND datetime(intakeoutputtime) >= datetime('2100-02-01');",
            "gold_answer": "[['31.0']]",
            "pred_answer": "no such column: patient.patient",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column patient.patient instead of using the correct column (e.g., patient.uniquepid or joining via patienthealthsystemstayid), causing a simple column reference error. This is localized to the WHERE clause and doesn\u2019t alter the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "ec8b06d3d0d215017406de8d": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the maximum -monos value for patient 004-16896 last month?",
            "gold_sql": "SELECT MAX(lab.labresult) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-16896' ) ) AND lab.labname = '-monos' AND datetime(lab.labresulttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT MAX(v.systemicmean) AS max_monos FROM vitalperiodic v JOIN patient p ON v.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '004-16896' AND strftime('%Y-%m', v.observationtime) = '2100-11';",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets the question by querying the vitalperiodic table for systemicmean rather than the lab table for the '-monos' lab result. It also applies an incorrect time filter (using strftime('%Y-%m') = '2100-11' instead of the specified start-of-month filter) and fails to link through the patienthealthsystemstayid hierarchy. These are not localized typos or minor clause fixes but a complete mis-structuring of the query logic, table selection, and time filtering.",
            "error_type": "Global: incorrect table join and logic"
        },
        "1f665d965c0a35901cd6f9f6": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Count the number of times that patient 007-10182 received the stress ulcer treatment procedure since 2100.",
            "gold_sql": "SELECT COUNT(*) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '007-10182' ) ) AND treatment.treatmentname = 'stress ulcer treatment' AND strftime('%Y',treatment.treatmenttime) >= '2100'",
            "pred_sql": "SELECT COUNT(t.treatmentid) AS count_stress_ulcer_treatments FROM patient p JOIN treatment t ON p.patientunitstayid = t.patientunitstayid WHERE p.uniquepid = '007-10182' AND t.treatmentname = 'stress ulcer treatment - omeprazole' AND t.treatmenttime >= '2100-01-01 00:00:00';",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses an incorrect treatment name filter ('stress ulcer treatment - omeprazole' instead of 'stress ulcer treatment'). The JOIN, date filter, and overall structure are correct and align with the intent, but the specific WHERE condition on treatmentname is wrong. This is a confined mistake in a single clause and does not affect the broader query logic.",
            "error_type": "Local: incorrect literal in WHERE clause"
        }
    },
    "o4-mini": {
        "890c4c363cef0eeb6026ba30": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many patients were treated with carotid endarterectomy this year?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'carotid endarterectomy' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) AS num_patients FROM treatment t JOIN patient p ON t.patientunitstayid = p.patientunitstayid WHERE t.treatmentname LIKE '%carotid endarterectomy%' AND strftime('%Y', t.treatmenttime) = '2100';",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only errs in the WHERE clause by using a broad LIKE '%carotid endarterectomy%' match instead of the exact treatmentname = 'carotid endarterectomy'. This localized mistake overcounts patients without changing the query\u2019s overall structure or join logic. Hence it\u2019s a local error in the filtering condition.",
            "error_type": "Local: incorrect filter condition"
        },
        "78089756da598e878d999d58": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Which are the top three most frequent procedures patients had within the same hospital visit after being diagnosed with syncope?",
            "gold_sql": "SELECT T3.treatmentname FROM ( SELECT T2.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'syncope' ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmentname, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.treatmenttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.treatmentname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH syncope_diag AS ( SELECT p.patienthealthsystemstayid, MIN(d.diagnosistime) AS first_syncope_time FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid WHERE d.diagnosisname = 'syncope' GROUP BY p.patienthealthsystemstayid ) SELECT t.treatmentname FROM syncope_diag s JOIN patient p2 ON p2.patienthealthsystemstayid = s.patienthealthsystemstayid JOIN treatment t ON t.patientunitstayid = p2.patientunitstayid WHERE t.treatmenttime > s.first_syncope_time GROUP BY t.treatmentname ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['analgesics - narcotic analgesic'], ['antiemetic'], ['antihyperlipidemic agent - hmg-coa reductase inhibitor'], ['electrolyte administration'], ['foley catheter'], ['insulin'], ['oxygen therapy (< 40%)'], ['vte prophylaxis - compression boots']]",
            "pred_answer": "[['analgesics - narcotic analgesic'], ['oxygen therapy (< 40%)'], ['vte prophylaxis - compression boots']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies the first syncope time per hospital visit and joins treatments after that time. The only deviation is in the final selection of top procedures: it uses ORDER BY COUNT(*) DESC LIMIT 3, which drops procedures tied for third place, instead of using DENSE_RANK() to include all ties as specified in the ground truth. This mistake is confined to the LIMIT/selection logic and does not alter the overall query structure or joins.",
            "error_type": "Local: incorrect top-N implementation"
        },
        "b260d9b209ffbd72ff0df60d": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Since 1 year ago, what are the top four most frequently given diagnoses that patients received within the same month after being diagnosed with hypothyroidism?",
            "gold_sql": "SELECT T3.diagnosisname FROM ( SELECT T2.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'hypothyroidism' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.diagnosistime AND datetime(T1.diagnosistime,'start of month') = datetime(T2.diagnosistime,'start of month') GROUP BY T2.diagnosisname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT d.diagnosisname, COUNT(*) AS frequency FROM diagnosis d JOIN diagnosis h ON d.patientunitstayid = h.patientunitstayid AND h.diagnosisname = 'hypothyroidism' AND h.diagnosistime >= datetime('2100-12-31 23:59:00','-1 year') AND d.diagnosisid <> h.diagnosisid AND d.diagnosistime BETWEEN h.diagnosistime AND datetime(h.diagnosistime, '+1 month') WHERE d.diagnosisname <> 'hypothyroidism' GROUP BY d.diagnosisname ORDER BY frequency DESC LIMIT 4;",
            "gold_answer": "[['acute respiratory failure'], ['diabetes mellitus'], ['hypothyroidism'], ['metabolic acidosis']]",
            "pred_answer": "[['acute respiratory failure', '643.0'], ['diabetes mellitus', '646.0'], ['hypoxemia', '628.0'], ['metabolic acidosis', '633.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic of the question in two ways: 1) it uses a BETWEEN h.diagnosistime AND datetime(h.diagnosistime, '+1 month') filter rather than matching diagnoses in the same calendar month (start of month equality), and 2) it explicitly excludes 'hypothyroidism' from the results, whereas the ground truth includes it if it recurs. These are not mere syntax or localized fixes but alter the intended time-window semantics and result set. Thus this requires restructuring the filtering logic, making it a global error.",
            "error_type": "Global: incorrect join/time-filter logic"
        }
    },
    "codes": {
        "755f81df3ee4fb5f9851f75c": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the first height reading for patient 002-10122 on the current hospital visit?",
            "gold_sql": "SELECT patient.admissionheight FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-10122' AND patient.hospitaldischargetime IS NULL ) AND patient.admissionheight IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1",
            "pred_sql": "SELECT vitalperiodic.observationtime FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '002-10122' ORDER BY vitalperiodic.observationtime LIMIT 1",
            "gold_answer": "[['157.5']]",
            "pred_answer": "[['2100-12-29 17:50:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects vitalperiodic.observationtime instead of patient.admissionheight and does not restrict to the current hospital visit (no hospitaldischargetime IS NULL filter). The mistake is localized to using the wrong table/column and missing the simple WHERE clause for the current visit, rather than a fundamental restructuring of the query logic.",
            "error_type": "Local: column reference error"
        },
        "02cfc24a25bbc3f65344b938": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me the top three most commonly prescribed drugs this year.",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE datetime(medication.drugstarttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT medication.drugname FROM medication INNER JOIN cost ON medication.patientunitstayid = cost.patienthealthsystemstayid WHERE strftime('%Y', medication.drugstarttime) = '2100' GROUP BY medication.drugname ORDER BY count(medication.drugname) DESC LIMIT 3",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['potassium chloride'], ['tylenol']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the cost table on medication.patientunitstayid = cost.patienthealthsystemstayid, which is the wrong relationship and isn\u2019t needed to answer the question. This join filters out all medication records and changes the overall logic of the query. Fixing it requires removing or restructuring the join, not just tweaking a column reference in a single clause.",
            "error_type": "Global: incorrect join"
        },
        "3a7f1b2a7cc6efd048eaa761": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me the name of the output patient 004-13203 had last on the last icu visit?",
            "gold_sql": "SELECT intakeoutput.celllabel FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-13203' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND intakeoutput.cellpath LIKE '%output%' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT patient.patientunitstayid FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.patientunitstayid = '004-13203' ORDER BY patient.hospitaldischargeoffset DESC LIMIT 1",
            "gold_answer": "[['urine']]",
            "pred_answer": "no such column: patient.hospitaldischargeoffset",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column `patient.hospitaldischargeoffset`, which is a column reference error. This mistake is confined to an incorrect column name in the SELECT and ORDER BY clauses and does not affect the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "b775d3303dae346b324ba667": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What are the top five most commonly tested specimens?",
            "gold_sql": "SELECT T1.culturesite FROM ( SELECT microlab.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM microlab GROUP BY microlab.culturesite ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT labname FROM lab GROUP BY labname ORDER BY count(labname) DESC LIMIT 5",
            "gold_answer": "[['blood, venipuncture'], ['nasopharynx'], ['sputum, tracheal specimen'], ['urine, catheter specimen'], ['urine, voided specimen']]",
            "pred_answer": "[['bedside glucose'], ['chloride'], ['glucose'], ['potassium'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the lab table and labname column, returning lab test names (e.g., 'chloride', 'potassium') instead of specimen sites from the microlab.culturesite column. This is not a minor syntax or alias mistake but a fundamentally wrong choice of table and field, altering the overall query logic. Hence, this is a global error.",
            "error_type": "Global: incorrect table/column"
        },
        "81cd13f00eb5a013f9338eb9": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was patient 025-13916's last procedure conducted since 1 year ago?",
            "gold_sql": "SELECT treatment.treatmenttime FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-13916' ) ) AND datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "SELECT DISTINCT cost.chargetime FROM patient INNER JOIN cost ON patient.patienthealthsystemstayid = cost.patienthealthsystemstayid WHERE patient.uniquepid = '025-13916' AND strftime('%Y',cost.chargetime) >= strftime('%Y', 'now') - 1",
            "gold_answer": "[['2100-07-30 13:53:00']]",
            "pred_answer": "[['2100-07-22 08:00:00'], ['2100-07-22 08:00:00'], ['2100-07-22 08:00:00'], ['2100-07-22 08:00:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 08:20:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-22 14:45:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-23 10:33:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 09:55:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00'], ['2100-07-24 22:46:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the cost table and patient.patienthealthsystemstayid join instead of the treatment table, completely changing the target entity from procedures (treatments) to billing events. It also misapplies a year filter via strftime('%Y',\u2026) on 'now' rather than using the fixed timestamp and does not order or limit to the last event. These issues are not confined to a single clause but alter the overall logic and results, requiring a structural rewrite to use the correct table and date filtering.",
            "error_type": "Global: incorrect table/join logic"
        }
    },
    "dinsql": {
        "d4101fa929c237ae9386cf10": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Get me the patient ids who were diagnosed with cellulitis - abdomen/pelvis in 2100.",
            "gold_sql": "SELECT DISTINCT patient.uniquepid FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'cellulitis - abdomen/pelvis' AND strftime('%Y',diagnosis.diagnosistime) = '2100' )",
            "pred_sql": "SELECT DISTINCT patient.patientunitstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'cellulitis - abdomen/pelvis' AND strftime('%Y', diagnosis.diagnosistime) = '2100'",
            "gold_answer": "[['004-15269'], ['016-10986']]",
            "pred_answer": "[['1554049.0'], ['389703.0']]",
            "is_correct": false,
            "reasoning": "The predicted query differs only in the SELECT clause: it returns patient.patientunitstayid instead of patient.uniquepid. The JOIN and WHERE logic correctly filters diagnoses in 2100 for cellulitis - abdomen/pelvis, but the wrong column is projected. This is a localized column reference mistake that doesn\u2019t affect the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "5a547c9a0a0b185342a34529": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Give me the top three most commonly prescribed drugs since 2100.",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE strftime('%Y',medication.drugstarttime) >= '2100' GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT drugname FROM medication WHERE datetime(drugstarttime) >= datetime('2100-12-31 23:59:00','-0 year') GROUP BY drugname ORDER BY COUNT(*) DESC LIMIT 3",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['potassium chloride'], ['tylenol']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The discrepancy lies solely in the WHERE clause\u2019s time filter. The ground truth uses strftime('%Y', medication.drugstarttime) >= '2100' to capture all prescriptions from the start of year 2100 onward. The predicted query instead filters for medication.drugstarttime >= '2100-12-31 23:59:00', effectively only including prescriptions on or after the very last moment of 2100, leading to no results. This is an isolated issue in the temporal condition and does not impact the overall query structure or logic beyond that clause.",
            "error_type": "Local: other-temporal error"
        }
    },
    "qwen3": {
        "e634a2c2d4fe2d94757686eb": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What is the 3-month survival rate for cardiac arrest - witnessed, < 15 minutes cpr patients who were prescribed 20.3 ml cup : acetaminophen 160 mg/5ml po soln after the diagnosis?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'cardiac arrest - witnessed, < 15 minutes cpr' AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 4 GROUP BY patient.uniquepid HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = '20.3 ml cup : acetaminophen 160 mg/5ml po soln' GROUP BY patient.uniquepid HAVING MAX(medication.drugstarttime) = medication.drugstarttime ) AS T2 ON T1.uniquepid = T2.uniquepid JOIN patient ON T1.uniquepid = patient.uniquepid WHERE T1.diagnosistime < T2.drugstarttime GROUP BY patient.uniquepid HAVING MAX(patient.hospitaldischargetime) = patient.hospitaldischargetime )",
            "pred_sql": "WITH eligible_patients AS ( SELECT DISTINCT p.patientunitstayid, d.diagnosistime FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid JOIN treatment t ON p.patientunitstayid = t.patientunitstayid JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE d.diagnosisname = 'cardiac arrest - witnessed' AND t.treatmentname LIKE '%CPR%' AND t.treatmenttime >= d.diagnosistime AND t.treatmenttime <= datetime(d.diagnosistime, '+15 minutes') AND m.drugname = '20.3 ml cup : acetaminophen 160 mg/5ml po soln' AND m.drugstarttime > d.diagnosistime ) SELECT (COUNT(*) - SUM(CASE WHEN p.hospitaldischargestatus = 'expired' AND datetime(p.hospitaldischargetime) <= datetime(ep.diagnosistime, '+91.25 days') THEN 1 ELSE 0 END)) * 1.0 / COUNT(*) AS survival_rate FROM eligible_patients ep JOIN patient p ON ep.patientunitstayid = p.patientunitstayid;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally alters the logic by introducing a JOIN on the treatment table (filtering by CPR treatment time) rather than relying on the diagnosis name that already encodes \u201c< 15 minutes cpr.\u201d It also mismatches diagnosis names, misuses treatment conditions, and computes the time window differently. These changes affect the entire query structure and intended patient cohort selection, so the error is not a localized typo or syntax mistake but a global logical/join error.",
            "error_type": "Global: incorrect join/logic error"
        },
        "1244ff6a6a69b74074964f0a": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "When was patient 035-10028's nutrition total input the last time on this month/22?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '035-10028' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND intakeoutput.celllabel = 'nutrition total' AND datetime(intakeoutput.intakeoutputtime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') AND strftime('%d',intakeoutput.intakeoutputtime) = '22' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT i.intakeoutputtime FROM intakeoutput i JOIN patient p ON i.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '035-10028' AND i.cellpath LIKE '%intake%' AND datetime(i.intakeoutputtime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', i.intakeoutputtime) = '22' ORDER BY i.intakeoutputtime DESC LIMIT 1;",
            "gold_answer": "[['2100-12-22 18:00:00']]",
            "pred_answer": "[['2100-12-22 23:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in its WHERE clause: it omits the condition AND i.celllabel = 'nutrition total'. As a result, it picks up any intake event on the 22nd of the month rather than specifically the \u201cnutrition total\u201d input. This is a localized mistake in the WHERE clause filters and does not require restructuring the joins or overall query logic.",
            "error_type": "Local: missing-filter error"
        }
    },
    "llm2sql": {
        "f36aeace3637e368c0ee332f": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "For patients that are given glucagon 1 mg (1 unit) inj since 1 year ago, what are the top three drugs that they are prescribed afterwards within the same month?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'glucagon 1 mg (1 unit) inj' AND datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.drugstarttime < T2.drugstarttime AND datetime(T1.drugstarttime,'start of month') = datetime(T2.drugstarttime,'start of month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drugname FROM medication WHERE medicationid IN (SELECT DISTINCT subsequent.medicationid FROM medication AS initial JOIN medication AS subsequent ON initial.patientunitstayid = subsequent.patientunitstayid WHERE initial.drugname = 'glucagon 1 mg (1 unit) inj' AND datetime(initial.drugstarttime) >= datetime('2100-12-31 23:59:00', '-1 year') AND datetime(initial.drugstarttime, 'start of month') = datetime(subsequent.drugstarttime, 'start of month') AND subsequent.drugname <> 'glucagon 1 mg (1 unit) inj') GROUP BY drugname ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['fentanyl (pf) 50 mcg/1 ml 2 ml inj'], ['insulin-glargine *unit* inj'], ['magnesium sulfate 2 gm/50 ml ivpb']]",
            "pred_answer": "[['dextrose 50% 25 gram/50 ml syr'], ['glucose 15 gram/60 ml bq liq'], ['sodium chloride 0.9% 1000 ml lvp']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the requirement that the \u201csubsequent\u201d drug event must occur after the initial glucagon administration (i.e. initial.drugstarttime < subsequent.drugstarttime). This is a missing condition in the WHERE clause/join logic but does not alter the overall structure of the query. It is a localized error in the temporal condition, so it is classified as a Local error.",
            "error_type": "Local: other-temporal error"
        },
        "e01c792f66d80f91b861252e": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "For patients who underwent cpap/peep therapy - 10-15 cm h2o since 1 year ago, what are the four most common diagnoses within the same month?",
            "gold_sql": "SELECT T3.diagnosisname FROM ( SELECT T2.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'cpap/peep therapy - 10-15 cm h2o' AND datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.diagnosistime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.diagnosistime,'start of month') GROUP BY T2.diagnosisname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH RecentTreatments AS ( SELECT patientunitstayid, treatmenttime FROM treatment WHERE treatmentname LIKE '%cpap/peep therapy%' AND treatmenttime >= datetime('2100-12-31 23:59:00', '-1 year') AND treatmentname LIKE '%10-15 cm h2o%' ) SELECT d.diagnosisname FROM diagnosis d JOIN RecentTreatments rt ON d.patientunitstayid = rt.patientunitstayid WHERE strftime('%Y-%m', d.diagnosistime) = strftime('%Y-%m', rt.treatmenttime) GROUP BY d.diagnosisname ORDER BY COUNT(*) DESC LIMIT 4",
            "gold_answer": "[['acute coronary syndrome - myocardial ischemia'], ['acute respiratory failure'], ['ards'], ['atrial fibrillation'], ['bladder ca'], ['hematological effect of infection - bandemia'], ['hematological effect of infection - leukocytosis'], ['lower urinary tract infection'], ['septic shock']]",
            "pred_answer": "[['hematological effect of infection - bandemia'], ['hematological effect of infection - leukocytosis'], ['lower urinary tract infection'], ['septic shock']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic by (1) joining diagnosis and treatment on patientunitstayid instead of patient.uniquepid (thus tying events to the same ICU stay rather than the same patient) and (2) omitting the required condition that the treatment time precede the diagnosis time. These are not isolated syntax mistakes but core logical flaws spanning multiple clauses (JOIN and WHERE). Hence, this is a Global error.",
            "error_type": "Global: join logic error"
        },
        "602b90586b2027249be814c5": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What is the monthly average volume of urine that patient 012-10772 had on the first hospital visit?",
            "gold_sql": "SELECT AVG(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-10772' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) AND intakeoutput.celllabel = 'urine' AND intakeoutput.cellpath LIKE '%output%' GROUP BY strftime('%Y-%m',intakeoutput.intakeoutputtime)",
            "pred_sql": "SELECT SUM(io.cellvaluenumeric) / (JULIANDAY(p.hospitaldischargetime) - JULIANDAY(p.hospitaladmittime)) * 30 AS monthly_average FROM intakeoutput io INNER JOIN patient p ON io.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '012-10772' AND io.cellpath LIKE '%urine%' AND p.hospitaladmittime = ( SELECT MIN(hospitaladmittime) FROM patient WHERE uniquepid = '012-10772' );",
            "gold_answer": "[['156.667'], ['97.5']]",
            "pred_answer": "[['12297.987']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the aggregation logic: instead of grouping by month and averaging monthly totals (as required), it sums all urine outputs over the first stay and divides by the length of stay to estimate a monthly rate. It also uses io.cellpath LIKE '%urine%' instead of filtering on celllabel = 'urine'. These errors alter the core structure and intended meaning of the query, not just a minor clause fix.",
            "error_type": "Global: incorrect aggregation and filtering logic"
        },
        "718699d9458da53c8e30bbaa": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "How many hours have passed since the last time patient 012-10585 got the antibacterials on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',treatment.treatmenttime) ) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-10585' AND patient.hospitaldischargetime IS NULL ) ) AND treatment.treatmentname = 'antibacterials' ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', MAX(m.drugstoptime))) / 3600 AS hours_since_last_antibacterial FROM medication m JOIN patient p ON m.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '012-10585' AND p.hospitaldischargetime IS NULL AND m.drugname LIKE '%antibacterial%';",
            "gold_answer": "[['97.367']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication table and drugstoptime (filtered by drugname LIKE '%antibacterial%') instead of the treatment table and treatmenttime with treatmentname = 'antibacterials'. This is not a local typo or minor syntax issue\u2014it fundamentally changes which events are considered and from which table, altering the query\u2019s logic and intended meaning. Therefore, this is a global error.",
            "error_type": "Global: wrong table/column usage"
        }
    },
    "macsql": {
        "f9e17f8c9c58b6ed7aeb9d39": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "When did patient 010-10317 get in 02/this year a lab test for the first time?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '010-10317' ) ) AND datetime(lab.labresulttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',lab.labresulttime) = '02' ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(`labresulttime`) AS first_lab_test_time FROM lab WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '010-10317' ) AND datetime(`labresulttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND strftime('%d', `labresulttime`) = '02'",
            "gold_answer": "[['2100-02-01 03:49:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistake is confined to the WHERE clause date filter. It uses `datetime(...,'start of month')` and `strftime('%d') = '02'` (i.e. day = 02 of the current month) instead of filtering for month = '02' of this year. The overall query structure (finding the first lab test for the patient) remains correct; only the temporal condition is wrong. This is a localized error in the date\u2010filtering logic.",
            "error_type": "Local: other-temporal error"
        }
    }
}