{
    "dinsql": {
        "35cca7494423f9c053a7198c": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Tell me the top three diagnoses with the highest 3-month mortality rate.",
            "gold_sql": "SELECT T4.diagnosisname FROM ( SELECT T3.diagnosisname, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C2 FROM ( SELECT T2.diagnosisname, AVG(C1) AS C2 FROM ( SELECT T1.diagnosisname, ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 4 GROUP BY patient.uniquepid, diagnosis.diagnosisname HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN patient ON T1.uniquepid = patient.uniquepid ) AS T2 GROUP BY T2.diagnosisname ) AS T3 ) AS T4 WHERE T4.C2 <= 3",
            "pred_sql": "WITH FirstDiagnosis AS ( SELECT patientunitstayid, diagnosisname, MIN(diagnosistime) AS first_diagnosistime FROM diagnosis GROUP BY patientunitstayid, diagnosisname ), MortalityWithin3Months AS ( SELECT FD.diagnosisname, COUNT(DISTINCT P.patientunitstayid) AS total_patients, SUM(CASE WHEN P.hospitaldischargestatus = 'expired' AND datetime(P.hospitaldischargetime) <= datetime(FD.first_diagnosistime, '+91.25 days') THEN 1 ELSE 0 END) AS deaths_within_3_months FROM FirstDiagnosis FD JOIN patient P ON FD.patientunitstayid = P.patientunitstayid GROUP BY FD.diagnosisname ), MortalityRate AS ( SELECT diagnosisname, (CAST(deaths_within_3_months AS FLOAT) / total_patients) AS mortality_rate FROM MortalityWithin3Months ) SELECT diagnosisname FROM MortalityRate ORDER BY mortality_rate DESC LIMIT 3",
            "gold_answer": "[['acute copd exacerbation - with baseline hypoxemia'], ['anuria'], ['ards - pulmonary etiology'], ['arterial thromboembolism'], ['arterial thromboembolism - kidney'], ['av block - pacemaker in place, pacing and capturing'], ['bacteremia - gram negative rod'], ['bacteremia - respiratory source'], ['cardiac arrest - initial rhythm: asystole'], ['cardiac arrest - initial rhythm: pulseless electrical activity'], ['cardiac arrest - initial rhythm: ventricular tachycardia'], ['cardiac arrest - un-witnessed'], ['cerebral subdural hematoma - secondary to anticoagulation'], ['cholecystitis - acute'], ['dvt - left upper extremity'], ['dvt - right upper extremity'], ['encephalopathy - toxic'], ['enteritis - colitis, presumed infectious'], ['facial bone fracture - maxillary fracture'], ['facial bone fracture - orbital fracture'], ['gastric ca'], ['hyperglycemia - corticosteroid related'], ['hypernatremia - severe (> 155 meq/dl)'], ['hypothermia'], ['liver ca'], ['melanoma - nodular'], ['mouth and jaw tumor - oral cavity squamous cell ca'], ['myopathy - malignant hyperthermia'], ['necrotizing fasciitis - abdomen/pelvis'], ['peripheral vascular ischemia - atherosclerotic thrombo-occlusion'], ['peripheral vascular ischemia - postoperative'], ['pleural effusion - due to heart failure'], ['pneumothorax - spontaneous'], ['s/p aneurysm resection/repair abdominal aorta'], ['seizures - from ischemia/infarction'], ['sepsis - sepsis with single organ dysfunction- circulatory system failure'], ['valve repair >= 7 days - aortic / mitral'], ['vascular insufficiency of intestine - due to extrinsic compression'], ['ventricular tachycardia - with hemodynamic compromise']]",
            "pred_answer": "[['acute copd exacerbation - with baseline hypoxemia'], ['anal ca'], ['anuria']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict the cohort to only those diagnoses that occurred at least three months ago, so it includes recent diagnoses that have not yet had a full 3-month follow-up. This omission fundamentally alters the mortality rate calculation and requires restructuring the query (adding a filter on diagnosis time) rather than a simple fix within one clause. Therefore, it is a global logic error.",
            "error_type": "Global: missing follow-up time filter"
        },
        "bfc77af7df4b45fa5ec0579d": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "When was the time of patient 027-100241's last hospital discharge?",
            "gold_sql": "SELECT patient.hospitaldischargetime FROM patient WHERE patient.uniquepid = '027-100241' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaldischargetime DESC LIMIT 1",
            "pred_sql": "hospitaldischargetime FROM patient WHERE uniquepid = '027-100241' ORDER BY hospitaldischargetime DESC LIMIT 1",
            "gold_answer": "[['2100-12-01 22:31:00']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the column list and also omits the IS NOT NULL filter on hospitaldischargetime. These are syntactic/local mistakes in the SELECT clause rather than a fundamental re\u2010structuring of the query logic or table relationships.",
            "error_type": "Local: syntax error"
        }
    },
    "codes": {
        "3371b2f9debfbbaecaf88aaa": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Show me the top five most common input events this year.",
            "gold_sql": "SELECT T1.celllabel FROM ( SELECT intakeoutput.celllabel, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM intakeoutput WHERE intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY intakeoutput.celllabel ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT intakeoutput.cellpath FROM intakeoutput INNER JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE intakeoutput.cellpath LIKE '%input%' AND strftime('%Y', intakeoutput.intakeoutputtime) = '2100' GROUP BY intakeoutput.cellpath ORDER BY count(intakeoutput.cellpath) DESC LIMIT 5",
            "gold_answer": "[['continuous infusion meds'], ['ns'], ['ns ivf'], ['oral'], ['p.o.']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query selects and filters on the wrong column (cellpath instead of celllabel), uses LIKE '%input%' instead of LIKE '%intake%', and applies an incorrect year filter with strftime rather than the required datetime start-of-year condition. These mistakes are confined to the SELECT and WHERE clauses and do not alter the overall join structure or logical flow, so they are localized reference and filtering errors.",
            "error_type": "Local: column reference error"
        },
        "ecc45c5ab8349e2cebba606b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What are the top three most commonly prescribed medicines this year?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE datetime(medication.drugstarttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT medication.drugname FROM medication INNER JOIN cost ON medication.patientunitstayid = cost.patientunitstayid WHERE cost.eventtype = 'medication' AND strftime('%Y', medication.drugstarttime) = strftime('%Y', 'now') GROUP BY medication.drugname ORDER BY count(medication.drugname) DESC LIMIT 3",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['potassium chloride'], ['tylenol']]",
            "pred_answer": "[['aspart insulin'], ['nacl 0.9% mbp'], ['potassium chloride']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the intended logic in several ways. It introduces an unnecessary INNER JOIN with the cost table (linking medications to cost records) which alters which medication counts are considered, rather than simply counting prescriptions. It also misuses strftime('now') instead of the fixed current date ('2100-12-31 23:59:00') as required by the assumptions. These changes affect the overall structure and meaning of the query rather than a local clause, so this is a global logic/join error.",
            "error_type": "Global: incorrect join/logic error"
        },
        "e196d71a57da821d3fea4ffc": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Is the respiratory rate level of patient 032-10123 second measured on the last icu visit less than the first measurement on the last icu visit?",
            "gold_sql": "SELECT ( SELECT vitalperiodic.respiration FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '032-10123' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND vitalperiodic.respiration IS NOT NULL ORDER BY vitalperiodic.observationtime ASC LIMIT 1 OFFSET 1 ) < ( SELECT vitalperiodic.respiration FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '032-10123' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND vitalperiodic.respiration IS NOT NULL ORDER BY vitalperiodic.observationtime ASC LIMIT 1 )",
            "pred_sql": "SELECT vitalperiodic.respiration < ( SELECT min(respiration) FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient ORDER BY hospitaldischargetime DESC LIMIT 1 ) ) FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient ORDER BY hospitaldischargetime DESC LIMIT 1 )",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0'], ['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally alters the intended logic: it fails to filter by the specific patient '032-10123', uses hospitaldischargetime instead of unitdischargetime to identify the last ICU visit, and compares every respiration reading to the minimum respiration rather than comparing only the second measurement to the first. Fixing this would require restructuring the query\u2019s joins, filtering, and comparison logic\u2014not just a localized tweak.",
            "error_type": "Global: join error"
        },
        "a3569887096547747b83607e": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many people were admitted to the hospital?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient",
            "pred_sql": "SELECT count(patientunitstayid) FROM patient",
            "gold_answer": "[['1384.0']]",
            "pred_answer": "[['1788.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts the number of patientunitstayid values (i.e., admissions) rather than the number of unique patients. This is a column reference mistake in the SELECT clause: using patientunitstayid instead of DISTINCT uniquepid. The overall query structure (a single-table aggregate) remains correct; only the chosen column is wrong.",
            "error_type": "Local: column reference error"
        },
        "0ba85fee6d6f0714d590db19": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How frequently were stress ulcer prophylaxis - lansoprazole procedures performed since 1 year ago?",
            "gold_sql": "SELECT COUNT(*) FROM treatment WHERE treatment.treatmentname = 'stress ulcer prophylaxis - lansoprazole' AND datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(DISTINCT patientunitstayid) FROM treatment WHERE treatmentstring LIKE '%lansoprazole%' AND strftime('%Y',treatmenttime) >= strftime('%Y', 'now', '-1 year')",
            "gold_answer": "[['15.0']]",
            "pred_answer": "no such column: treatmentstring",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column treatmentstring instead of treatmentname, causing a column reference error. This is a localized mistake that does not require restructuring the query's overall logic or flows, only correcting the column name and possibly the date condition.",
            "error_type": "Local: table/column reference error"
        },
        "1bf033967053a3e88b60ffec": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many current patients are there?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.hospitaldischargetime IS NULL",
            "pred_sql": "SELECT count(patientunitstayid) FROM patient WHERE hospitaldischargeoffset IS NULL",
            "gold_answer": "[['23.0']]",
            "pred_answer": "no such column: hospitaldischargeoffset",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column hospitaldischargeoffset instead of the correct hospitaldischargetime. This is a simple column reference error and does not affect the overall query structure or logic. It can be fixed by using the correct column name without altering the query\u2019s logical flow.",
            "error_type": "Local: column reference error"
        }
    },
    "deepseek": {
        "e6c1ce3d1f2119c0c4efd410": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many patients had antidepressant two or more times?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, COUNT(*) AS C1 FROM patient WHERE patient.patientunitstayid = ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'antidepressant' ) GROUP BY patient.uniquepid ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(DISTINCT m.patientunitstayid) AS count_patients FROM medication m WHERE m.drugname IN ('sertraline', 'fluoxetine', 'paroxetine', 'amitriptyline', 'duloxetine') GROUP BY m.patientunitstayid HAVING COUNT(DISTINCT m.medicationid) >= 2;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication table and specific drug names (sertraline, fluoxetine, etc.) instead of using the treatment table and the generic 'antidepressant' treatmentname as specified. It also groups by patientunitstayid rather than uniquepid, thus altering the fundamental logic of which patients are counted and how. Fixing this would require changing the primary table, the grouping key, and the condition, which is more than a localized adjustment.",
            "error_type": "Global: wrong-table error"
        },
        "3a8fb984bbdae02fefa70866": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the name of the medication that patient 006-106259 received two or more times on the current hospital visit?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, COUNT( medication.drugstarttime ) AS C1 FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-106259' AND patient.hospitaldischargetime IS NULL ) ) GROUP BY medication.drugname ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT DISTINCT T2.drugname FROM patient AS T1 INNER JOIN medication AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.uniquepid = '006-106259' AND T1.hospitaldischargetime IS NULL -- current hospital visit GROUP BY T2.drugname, T2.routeadmin HAVING COUNT(T2.medicationid) >= 2;",
            "gold_answer": "[['duoneb 3 mg-0.5 mg/3 ml ud inh soln'], ['lisinopril 10 mg tab'], ['metoprolol tartrate 25 mg tab']]",
            "pred_answer": "[['azithromycin 500 mg in ns 250 ml iv'], ['clopidogrel 75 mg tab'], ['duoneb 3 mg-0.5 mg/3 ml ud inh soln'], ['duoneb 3 mg-0.5 mg/3 ml ud inh soln'], ['enoxaparin 40 mg/0.4 ml syr'], ['famotidine 20 mg tab'], ['lisinopril 10 mg tab'], ['lisinopril 10 mg tab'], ['methylprednisolone 40 mg inj'], ['metoprolol tartrate 25 mg tab'], ['metoprolol tartrate 25 mg tab'], ['prednisone 20 mg tab'], ['vancomycin 1 gm in ns 250 ml ivpb'], ['vancomycin 750 mg in ns 250 ml ivpb']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in its GROUP BY clause: it groups by both drugname and routeadmin, then SELECTs only drugname (with DISTINCT). This splitting by route makes counts incorrect and duplicates appear, but it doesn\u2019t change the tables joined or the overall query structure. It\u2019s a localized mistake in the GROUP BY, so it\u2019s a local error.",
            "error_type": "Local: grouping error"
        },
        "b133ec5f24c3cd166147709c": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the 3-month survival rate for patients diagnosed with gi obstruction / ileus - small bowel obstruction?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'gi obstruction / ileus - small bowel obstruction' AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 4 GROUP BY patient.uniquepid HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN patient ON T1.uniquepid = patient.uniquepid GROUP BY patient.uniquepid HAVING MAX(patient.hospitaldischargetime) = patient.hospitaldischargetime )",
            "pred_sql": "WITH diagnosed_patients AS ( SELECT p.uniquepid, p.hospitaldischargestatus, MIN(d.diagnosistime) AS first_diagnosis_time FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid WHERE d.diagnosisname = 'gi obstruction / ileus - small bowel obstruction' GROUP BY p.uniquepid, p.hospitaldischargestatus ) SELECT COUNT(CASE WHEN (hospitaldischargestatus = 'expired' AND datetime(hospitaldischargetime) <= datetime(first_diagnosis_time, '+91 days')) THEN NULL ELSE 1 END) / COUNT(1) AS survival_rate FROM diagnosed_patients;",
            "gold_answer": "[['0.833']]",
            "pred_answer": "no such column: hospitaldischargetime",
            "is_correct": false,
            "reasoning": "The predicted SQL references the column hospitaldischargetime in the outer SELECT, but that column is not selected in the diagnosed_patients CTE. This is a simple column reference error that can be fixed by including hospitaldischargetime in the CTE, without altering the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "e18271222fca09e96024c7ae": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "List the top four most frequent specimens that patients are tested within the same month after receiving glucocorticoid administration this year.",
            "gold_sql": "SELECT T3.culturesite FROM ( SELECT T2.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'glucocorticoid administration' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, microlab.culturesite, microlab.culturetakentime FROM microlab JOIN patient ON microlab.patientunitstayid = patient.patientunitstayid WHERE datetime(microlab.culturetakentime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.culturetakentime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.culturetakentime,'start of month') GROUP BY T2.culturesite ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH ranked_specs AS ( SELECT l.labname, COUNT(*) AS count_specimen, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS rank_specimen FROM medication m JOIN lab l ON m.patientunitstayid = l.patientunitstayid WHERE m.drugname LIKE '%cortisone%' OR m.drugname LIKE '%prednisone%' OR m.drugname LIKE '%dexamethasone%' OR m.drugname LIKE '%hydrocortisone%' AND strftime('%Y-%m', m.drugstarttime) = strftime('%Y-%m', l.labresulttime) AND strftime('%Y', m.drugstarttime) = '2100' GROUP BY l.labname ) SELECT labname FROM ranked_specs WHERE rank_specimen <= 4;",
            "gold_answer": "[['sputum, tracheal specimen']]",
            "pred_answer": "[['bedside glucose'], ['creatinine'], ['glucose'], ['potassium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication and lab tables instead of the treatment and microlab tables, matches generic steroid drug names rather than the specific 'glucocorticoid administration' treatment name, and counts lab tests rather than culture specimens. It also misplaces the time relationship (using same-month but not ensuring culture occurs after treatment) and entirely alters the query\u2019s structure and intended relationships. This is not a simple typo or clause mistake but a fundamental change in tables, joins, and logic.",
            "error_type": "Global: incorrect tables and join logic"
        },
        "d83e7ab0c5e37663deb01e91": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "On the current hospital visit, how many hours have passed since the first time patient 025-10477 was prescribed 1000 ml : sodium chloride 0.9 % 0.9 % iv solp?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',medication.drugstarttime) ) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-10477' ) AND patient.hospitaldischargetime IS NULL ) AND medication.drugname = '1000 ml : sodium chloride 0.9 % 0.9 % iv solp' ORDER BY medication.drugstarttime ASC LIMIT 1",
            "pred_sql": "SELECT p.uniquepid, MIN(m.drugstarttime) AS first_prescription_time, ROUND((strftime('%s', '2100-12-31 23:59:00') - strftime('%s', MIN(m.drugstarttime))) / 3600, 0) AS hours_since_first_prescription FROM patient p JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE p.uniquepid = '025-10477' AND p.hospitaldischargetime IS NULL AND m.drugname LIKE '%sodium chloride 0.9% 1000 ml iv soln%' GROUP BY p.uniquepid;",
            "gold_answer": "[['41.483']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses an incorrect LIKE pattern for the drugname, so no rows are returned. The mistake is localized to the WHERE clause\u2019s string matching of the drug name, and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        }
    },
    "qwen3": {
        "5b7e59064a534ba9cf05d2cd": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "How many days have elapsed since the first time on the current hospital visit patient 004-10202 received a lab test for alt (sgpt)?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',lab.labresulttime) ) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-10202' AND patient.hospitaldischargetime IS NULL ) ) AND lab.labname = 'alt (sgpt)' ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT CAST((julianday('2100-12-31 23:59:00') - julianday(earliest_lab_time)) AS INTEGER) AS days_elapsed FROM ( SELECT MIN(labresulttime) AS earliest_lab_time FROM lab WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '004-10202' AND hospitaldischargetime IS NULL ) AND (labname = 'alt' OR labname = 'sgpt') );",
            "gold_answer": "[['4.02']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly splits the lab name filter into labname = 'alt' OR labname = 'sgpt' instead of matching the single value 'alt (sgpt)'. This is a localized mistake in the WHERE clause value filter and can be fixed by correcting the literal without altering the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "1a00dd0718094715b48766e9": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "How many patients had antithyroid therapy - methimazole done since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'antithyroid therapy - methimazole' AND strftime('%Y',treatment.treatmenttime) >= '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) FROM medication m JOIN patient p ON m.patientunitstayid = p.patientunitstayid WHERE m.drugname LIKE '%methimazole%' AND datetime(m.drugstarttime) >= '2100-01-01 00:00:00';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication table and filters on m.drugname and m.drugstarttime, whereas the correct logic requires querying the treatment table for treatmentname = 'antithyroid therapy - methimazole'. This is not a minor syntax mistake but a fundamental misuse of tables and relationships, altering the intended semantics of the query. To fix it would require restructuring the query to target the treatment table rather than a simple local correction within a clause.",
            "error_type": "Global: incorrect table usage"
        },
        "ae12d280809717b148037f25": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "How many times was ondansetron hcl last month prescribed to patient 025-17363?",
            "gold_sql": "SELECT COUNT(*) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '025-17363' ) ) AND medication.drugname = 'ondansetron hcl' AND datetime(medication.drugstarttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT COUNT(*) AS count FROM patient JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE patient.uniquepid = '025-17363' AND medication.drugname LIKE '%ondansetron hcl%' AND strftime('%Y-%m', medication.drugstarttime) = strftime('%Y-%m', datetime('2100-12-31 23:59:00', '-1 month'));",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s structure (JOIN vs nested IN) is fine and doesn\u2019t break the overall logic. The mistake lies in the WHERE clause\u2019s filtering of the prescription date: it uses strftime('%Y-%m', \u2026) against datetime('\u2026','-1 month') (which yields \u20182100-11-30 23:59:00\u2019, giving \u20182100-11\u2019) instead of matching the start\u2010of\u2010month as required by the spec. This localized date\u2010filtering error causes the predicate to fail and return 0 instead of 1. No other cross\u2010clause logic or joins need changing\u2014only the date condition does.",
            "error_type": "Local: date filter error"
        }
    },
    "o4-mini": {
        "e989ad2c573c923a5fa15813": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Since the first diagnosis of patient 027-105098 with sepsis - severe on the current hospital visit, how many days have passed?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) ) FROM diagnosis WHERE diagnosis.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-105098' AND patient.hospitaldischargetime IS NULL ) ) AND diagnosis.diagnosisname = 'sepsis - severe' ORDER BY diagnosis.diagnosistime ASC LIMIT 1",
            "pred_sql": "SELECT CAST(julianday('2100-12-31 23:59:00') - julianday(MIN(d.diagnosistime)) AS INTEGER) AS days_passed FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid WHERE p.uniquepid = '027-105098' AND p.hospitaldischargetime IS NULL AND d.diagnosisname = 'sepsis - severe';",
            "gold_answer": "[['17.529']]",
            "pred_answer": "[['17.0']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly identifies the first diagnosis on the current hospital visit and computes the day difference, so the overall structure and joins are sound. The discrepancy comes from casting the result to an INTEGER (truncating the fractional days) instead of preserving the decimal precision (and rounding to three decimals) as the ground truth does. This is a localized issue in the SELECT expression and output formatting, not a flaw in the join logic or overall query structure.",
            "error_type": "Local: value parsing/truncation error"
        }
    },
    "llm2sql": {
        "dceb8374d4d0a4d482941c77": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "On the last hospital visit, what was the daily maximum dose of oral for patient 017-106093?",
            "gold_sql": "SELECT MAX(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '017-106093' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND intakeoutput.celllabel = 'oral' AND intakeoutput.cellpath LIKE '%intake%' GROUP BY strftime('%Y-%m-%d',intakeoutput.intakeoutputtime)",
            "pred_sql": "SELECT MAX(total_daily_dose) AS max_daily_dose FROM ( SELECT date(m.drugstarttime) AS drug_date, SUM(CAST(dosage AS REAL)) AS total_daily_dose FROM patient p JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE p.uniquepid = '017-106093' AND m.routeadmin = 'po' AND p.hospitaldischargetime IS NOT NULL ORDER BY p.hospitaladmittime DESC LIMIT 1 GROUP BY date(m.drugstarttime));",
            "gold_answer": "[['500.0']]",
            "pred_answer": "near \"GROUP\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL not only has a syntax error (ORDER BY and LIMIT before GROUP BY) but more fundamentally it uses the medication table instead of intakeoutput and sums dosage rather than intake volumes. This changes the overall logic and structure of the query\u2014joining the wrong table and misplacing clauses\u2014so a simple local fix won\u2019t suffice; the query needs to be restructured to follow the intended logic.",
            "error_type": "Global: incorrect table usage/structure error"
        }
    },
    "macsql": {
        "afb33348ed88ccb48d19b5cb": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "How many days have passed since the first time patient 027-105098 received a lab test of hgb on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',lab.labresulttime) ) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-105098' AND patient.hospitaldischargetime IS NULL ) ) AND lab.labname = 'hgb' ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT JULIANDAY('2100-12-31') - JULIANDAY(( SELECT MIN(`labresulttime`) FROM lab WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '027-105098' AND `hospitaldischargetime` IS NULL ) AND `labname` = 'hgb' )) AS days_passed",
            "gold_answer": "[['29.548']]",
            "pred_answer": "[['28.548']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s error is localized to how the current date is specified and used in the date arithmetic. It uses JULIANDAY('2100-12-31') (midnight) instead of the full timestamp '2100-12-31 23:59:00' as in the ground truth, causing about a one-day offset in the result. This is a value\u2010parsing/time\u2010specification mistake within the SELECT expression and does not alter the overall query structure or logic.",
            "error_type": "Local: other-temporal error"
        },
        "7c6e86c631d2d02c2573b329": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Count the number of patients diagnosed with metastatic lung ca who did not come to the hospital within 2 months this year.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'metastatic lung ca' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'metastatic lung ca' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 JOIN patient ON T2.uniquepid = patient.uniquepid WHERE T2.diagnosistime < patient.hospitaladmittime AND datetime(patient.hospitaladmittime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND datetime(patient.hospitaladmittime) BETWEEN datetime(T2.diagnosistime) AND datetime(T2.diagnosistime,'+2 month') )",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT T1.`uniquepid` FROM patient AS T1 INNER JOIN diagnosis AS T2 ON T1.`patientunitstayid` = T2.`patientunitstayid` WHERE T2.`diagnosisname` = 'metastatic lung ca' ) AS diagnosed_patients WHERE diagnosed_patients.`uniquepid` IN ( SELECT DISTINCT T1.`uniquepid` FROM patient AS T1 WHERE datetime(T1.`hospitaladmittime`) < datetime('2100-12-31 23:59:00', '-2 months') )",
            "gold_answer": "[['8.0']]",
            "pred_answer": "[['14.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the intended logic: it fails to restrict diagnoses to this year, ignores the 2\u2010month window relative to each diagnosis, and compares hospital admission time to a fixed 'now \u2013 2 months' instead of between diagnosis time and diagnosis time + 2 months. It also misuses the IN subquery on the patient table rather than joining diagnosis records for the correct time frame. These errors affect multiple clauses (SELECT, WHERE, JOIN) and require rewriting the overall query structure and logic.",
            "error_type": "Global: incorrect logical structure"
        }
    }
}