{
    "codes": {
        "5578185fae1372b4229faeb9": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many hours has it been since patient 006-106828 got their first prescription of dext 5%-nacl 0.45%-kcl 20meq lvp on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',medication.drugstarttime) ) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-106828' ) AND patient.hospitaldischargetime IS NULL ) AND medication.drugname = 'dext 5%-nacl 0.45%-kcl 20meq lvp' ORDER BY medication.drugstarttime ASC LIMIT 1",
            "pred_sql": "SELECT strftime('%J', medication.drugstoptime) - strftime('%J', medication.drugstarttime) FROM patient INNER JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE patient.uniquepid = '006-106828'",
            "gold_answer": "[['29.85']]",
            "pred_answer": "[['0.001'], ['0.003'], ['0.008'], ['0.008'], ['0.008'], ['0.012'], ['0.018'], ['0.04'], ['0.062'], ['0.115'], ['0.211'], ['0.285'], ['0.722'], ['1.006'], ['1.09'], ['1.407'], ['1.407'], ['1.407'], ['1.407'], ['1.407'], ['1.979'], ['2.008'], ['2.008'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None'], ['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to implement multiple crucial aspects of the requested query: it uses drugstoptime instead of the current time, omits filtering for the specific drug name, does not restrict to the current hospital visit, does not select only the first prescription (no ORDER BY/LIMIT), and does not multiply the day difference by 24. These omissions and miscalculations affect the overall logic and structure of the query, requiring substantial restructuring rather than a simple fix in one clause.",
            "error_type": "Global: flawed logic and structure"
        },
        "e54f79288280b63cd8d19288": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Show me the four most common specimens tested in 2100.",
            "gold_sql": "SELECT T1.culturesite FROM ( SELECT microlab.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM microlab WHERE strftime('%Y',microlab.culturetakentime) = '2100' GROUP BY microlab.culturesite ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT labname FROM lab GROUP BY labname ORDER BY count(labname) DESC LIMIT 4",
            "gold_answer": "[['blood, venipuncture'], ['nasopharynx'], ['sputum, tracheal specimen'], ['urine, catheter specimen']]",
            "pred_answer": "[['bedside glucose'], ['glucose'], ['potassium'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls from the lab table and uses labname, whereas the question concerns specimen cultures (microlab.culturesite) tested in year 2100. This is due to referencing the wrong table and column, not a deeper structural or multi-clause logical error.",
            "error_type": "Local: table reference error"
        },
        "e62f5265a62839d76b9c6085": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me the total number of patients in ward 1027 since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.wardid = 1027 AND datetime(patient.unitadmittime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(patientunitstayid) FROM patient WHERE wardid = 1027 AND hospitaldischargetime IS NOT NULL AND hospitaldischargetime > (TIMESTAMP 'now' - 365)",
            "gold_answer": "[['1.0']]",
            "pred_answer": "near \"'2100-12-31 23:59:00'\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL not only contains a syntax error with the TIMESTAMP 'now' expression (violating the time\u2010handling assumptions) but also fundamentally changes the query\u2019s logic. It filters on hospitaldischargetime instead of unitadmittime, requires hospitaldischargetime IS NOT NULL (thus excluding current patients), and counts patientunitstayid rather than distinct patient.uniquepid. These mistakes span multiple clauses and alter the intended meaning, so correcting it requires restructuring the WHERE clause and the SELECT aggregation, not just tweaking a single condition.",
            "error_type": "Global: logical error"
        },
        "f909c0386f0ae38828a9dc22": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was the last time patient 033-11873 came to the hospital through floor?",
            "gold_sql": "SELECT patient.hospitaladmittime FROM patient WHERE patient.uniquepid = '033-11873' AND patient.hospitaladmitsource = 'floor' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1",
            "pred_sql": "SELECT patient.unitadmittime FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '033-11873' ORDER BY patient.unitadmittime DESC LIMIT 1",
            "gold_answer": "[['2100-03-07 21:45:00']]",
            "pred_answer": "[['2100-03-18 02:57:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by joining the wrong table (vitalperiodic) and using unitadmittime instead of hospitaladmittime. It also omits the required filter on hospitaladmitsource = 'floor' and the discharge check. This is not a minor syntax or reference mistake but a complete change of the intended structure and meaning.",
            "error_type": "Global: join error"
        },
        "900d73ff4784d0bc4c545e13": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Find the length in days of the first icu stay for patient 012-11370.",
            "gold_sql": "SELECT strftime('%J',patient.unitdischargetime) - strftime('%J',patient.unitadmittime) FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-11370' ) AND patient.unitadmittime IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%J', patient.hospitaladmittime) - strftime('%J', patient.unitadmittime)) - 1 FROM patient INNER JOIN icustay ON patient.patientunitstayid = icustay.patientunitstayid INNER JOIN vitalaperiodic ON icustay.patientunitstayid = vitalaperiodic.patientunitstayid WHERE patient.uniquepid = '012-11370' ORDER BY patient.hospitaladmittime LIMIT 1",
            "gold_answer": "[['5.401']]",
            "pred_answer": "no such table: icustay",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent table `icustay` (and even misspells `vitalperiodic` as `vitalaperiodic`), causing a missing table error. This is a straightforward table/column reference mistake localized to the FROM/JOIN clauses and does not require restructuring the entire query logic.",
            "error_type": "Local: table reference error"
        },
        "1db95e938a46a412595101d6": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Among patients in their 50s , what are the top five prescribed drugs in 2100?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age BETWEEN 50 AND 59 ) AND strftime('%Y',medication.drugstarttime) = '2100' GROUP BY medication.drugname ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT medication.drugname FROM medication INNER JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE strftime('%Y', patient.admissiontime) = '2100' AND strftime('%Y', medication.drugstarttime) = '2100' AND patient.patientage >= '50' ORDER BY medication.drugname DESC LIMIT 5",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['acetaminophen'], ['fentanyl'], ['midazolam'], ['potassium chloride']]",
            "pred_answer": "no such column: patient.admissiontime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column patient.admissiontime, causing a 'no such column' error. This is a localized column reference mistake that does not alter the overall query structure or logic.",
            "error_type": "Local: table/column reference error"
        }
    },
    "macsql": {
        "c3b217adda54fe59d3548b45": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "In patients who had the enteral feeds - nutritional supplement this year, what are the four most common diagnoses that followed within the same month?",
            "gold_sql": "SELECT T3.diagnosisname FROM ( SELECT T2.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'enteral feeds - nutritional supplement' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.diagnosistime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.diagnosistime,'start of month') GROUP BY T2.diagnosisname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT T2.diagnosisname FROM diagnosis AS T2 JOIN ( SELECT DISTINCT T1.patientunitstayid, T1.treatmenttime FROM treatment AS T1 WHERE T1.treatmentname = 'enteral feeds - nutritional supplement' AND datetime(T1.treatmenttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS T3 ON T2.patientunitstayid = T3.patientunitstayid AND datetime(T2.diagnosistime, 'start of month') = datetime(T3.treatmenttime, 'start of month') GROUP BY T2.diagnosisname ORDER BY COUNT(*) DESC LIMIT 4",
            "gold_answer": "[['acute respiratory distress'], ['anemia'], ['chronic respiratory failure'], ['copd'], ['dysphagia'], ['encephalopathy - post-anoxic'], ['esophagitis - regurgitant'], ['hyperglycemia'], ['hyperlipidemia'], ['neuropathy - polyneuropathy'], ['obesity - morbid'], ['pneumonia'], ['seizures'], ['sepsis']]",
            "pred_answer": "[['chronic respiratory failure'], ['hyperglycemia'], ['obesity - morbid'], ['sepsis']]",
            "is_correct": false,
            "reasoning": "The predicted query omits the crucial condition that the diagnosis must occur after the enteral feed treatment (T1.treatmenttime < T2.diagnosistime). It also joins on patientunitstayid instead of uniquepid, altering the patient grouping logic. These omissions fundamentally change which diagnoses are counted and require restructuring the query logic, not just a minor fix in one clause.",
            "error_type": "Global: incorrect join logic"
        },
        "71e4c636dcade292123624ea": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What is the number of patients in ward 505 since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.wardid = 505 AND strftime('%Y',patient.unitadmittime) >= '2100'",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT DISTINCT `uniquepid` FROM `patient` WHERE `wardid` = 505 AND datetime(`hospitaladmittime`) >= datetime('2100-01-01 00:00:00') ) AS unique_patients",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['12.0']]",
            "is_correct": false,
            "reasoning": "The predicted query filters on hospitaladmittime instead of unitadmittime (the date of ICU/ward admission) as required by the question. This is a local mistake in the WHERE clause\u2014using the wrong date column\u2014without altering the overall structure or logic of the query.",
            "error_type": "Local: column reference error"
        }
    },
    "qwen3": {
        "ead4af74ff9f344c2ee4482b": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "On the first hospital visit, what was patient 017-100472's last procedure time?",
            "gold_sql": "SELECT treatment.treatmenttime FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '017-100472' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "WITH first_admission AS ( SELECT patientunitstayid, hospitaladmittime, hospitaldischargetime FROM patient WHERE uniquepid = '017-100472' ORDER BY hospitaladmittime ASC LIMIT 1 ) SELECT MAX(event_time) AS last_procedure_time FROM ( SELECT treatmenttime AS event_time FROM treatment WHERE patientunitstayid = (SELECT patientunitstayid FROM first_admission) AND treatmenttime BETWEEN (SELECT hospitaladmittime FROM first_admission) AND (SELECT hospitaldischargetime FROM first_admission) UNION ALL SELECT labresulttime AS event_time FROM lab WHERE patientunitstayid = (SELECT patientunitstayid FROM first_admission) AND labresulttime BETWEEN (SELECT hospitaladmittime FROM first_admission) AND (SELECT hospitaldischargetime FROM first_admission) UNION ALL SELECT drugstarttime AS event_time FROM medication WHERE patientunitstayid = (SELECT patientunitstayid FROM first_admission) AND drugstarttime BETWEEN (SELECT hospitaladmittime FROM first_admission) AND (SELECT hospitaldischargetime FROM first_admission) UNION ALL SELECT drugstoptime AS event_time FROM medication WHERE patientunitstayid = (SELECT patientunitstayid FROM first_admission) AND drugstoptime BETWEEN (SELECT hospitaladmittime FROM first_admission) AND (SELECT hospitaldischargetime FROM first_admission) UNION ALL SELECT culturetakentime AS event_time FROM microlab WHERE patientunitstayid = (SELECT patientunitstayid FROM first_admission) AND culturetakentime BETWEEN (SELECT hospitaladmittime FROM first_admission) AND (SELECT hospitaldischargetime FROM first_admission) ) AS all_events;",
            "gold_answer": "[['2100-02-16 07:45:00']]",
            "pred_answer": "[['2100-02-20 17:47:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic in two major ways. First, its CTE for the first admission does not enforce that the visit be completed (hospitaldischargetime IS NOT NULL), so it may pick the wrong stay. Second, it reinterprets \u201cprocedure\u201d to include labs, medications, and microlabs by unioning multiple tables and taking the max timestamp, rather than returning the last treatment time alone. These changes span multiple clauses and restructure the query\u2019s intended meaning, making this a global error.",
            "error_type": "Global: incorrect query logic"
        },
        "acc3a9c211f42e9d6d154a20": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What are the top five diagnoses associated with the highest 3-month mortality rate?",
            "gold_sql": "SELECT T4.diagnosisname FROM ( SELECT T3.diagnosisname, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C2 FROM ( SELECT T2.diagnosisname, AVG(C1) AS C2 FROM ( SELECT T1.diagnosisname, ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 4 GROUP BY patient.uniquepid, diagnosis.diagnosisname HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN patient ON T1.uniquepid = patient.uniquepid ) AS T2 GROUP BY T2.diagnosisname ) AS T3 ) AS T4 WHERE T4.C2 <= 5",
            "pred_sql": "SELECT d.diagnosisname FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid WHERE p.hospitaldischargestatus IS NOT NULL AND p.hospitaldischargetime IS NOT NULL GROUP BY d.diagnosisname ORDER BY SUM(CASE WHEN p.hospitaldischargestatus = 'expired' AND julianday(p.hospitaldischargetime) <= julianday(d.diagnosistime) + 91.25 THEN 1 ELSE 0 END) * 1.0 / COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['acute copd exacerbation - with baseline hypoxemia'], ['acute lung injury'], ['acute renal failure - due to sepsis'], ['acute renal failure - etiology unknown'], ['anuria'], ['ards - pulmonary etiology'], ['arterial thromboembolism'], ['arterial thromboembolism - kidney'], ['av block - pacemaker in place, pacing and capturing'], ['bacteremia - gram negative rod'], ['bacteremia - respiratory source'], ['biliary obstruction - cholelithiasis'], ['bone fracture(s) - right upper extremity'], ['cardiac arrest - initial rhythm: asystole'], ['cardiac arrest - initial rhythm: pulseless electrical activity'], ['cardiac arrest - initial rhythm: ventricular tachycardia'], ['cardiac arrest - un-witnessed'], ['cellulitis - abdomen/pelvis'], ['cerebral subdural hematoma - secondary to anticoagulation'], ['cholecystitis - acute'], ['congestive heart failure - left-sided'], ['dvt - left upper extremity'], ['dvt - right lower extremity'], ['dvt - right upper extremity'], ['encephalopathy - post-anoxic'], ['encephalopathy - toxic'], ['enteritis - colitis, presumed infectious'], ['facial bone fracture - maxillary fracture'], ['facial bone fracture - orbital fracture'], ['gastric ca'], ['gi obstruction / ileus'], ['hepatic encephalopathy'], ['hyperglycemia - corticosteroid related'], ['hypernatremia - severe (> 155 meq/dl)'], ['hypochloremia'], ['hypothermia'], ['hypovolemic shock - hemorrhagic'], ['intracranial injury'], ['liver ca'], ['melanoma - nodular'], ['mouth and jaw tumor - oral cavity squamous cell ca'], ['myopathy - malignant hyperthermia'], ['neck tumor'], ['necrotizing fasciitis - abdomen/pelvis'], ['peripheral vascular ischemia - atherosclerotic thrombo-occlusion'], ['peripheral vascular ischemia - postoperative'], ['pleural effusion - due to heart failure'], ['pneumothorax - spontaneous'], ['s/p aneurysm resection/repair abdominal aorta'], ['seizures - from etoh withdrawal'], ['seizures - from ischemia/infarction'], ['sepsis - sepsis with multi-organ dysfunction'], ['sepsis - sepsis with single organ dysfunction- circulatory system failure'], ['septic shock - cultures pending'], ['shock, etiology unclear'], ['thrombocytopenia - primary hematologic malignancy'], ['valve repair >= 7 days - aortic / mitral'], ['vascular insufficiency of intestine - due to extrinsic compression'], ['ventricular tachycardia - with hemodynamic compromise']]",
            "pred_answer": "[['thrombocytopenia - primary hematologic malignancy'], ['valve repair >= 7 days - aortic / mitral'], ['vascular insufficiency of intestine'], ['vascular insufficiency of intestine - due to extrinsic compression'], ['ventricular tachycardia - with hemodynamic compromise']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the logic for computing 3-month mortality. Instead of using each patient\u2019s first diagnosis and ensuring at least 3 months of follow-up, it simply aggregates over all diagnosis rows, fails to filter out diagnoses occurring within the last 3 months, and uses COUNT(*) of diagnosis records rather than distinct patients as the denominator. These mistakes span multiple clauses (WHERE, GROUP BY, ORDER BY) and change the intended meaning of the query. Fixing this would require restructuring the query to first select one diagnosis per patient and enforce the follow-up window, not merely tweaking a single clause.",
            "error_type": "Global: flawed mortality rate calculation"
        },
        "8698359fd57df45989fcd9a0": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What are the five most commonly prescribed drugs for patients diagnosed with chronic kidney disease previously within the same hospital visit, since 1 year ago?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'chronic kidney disease' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime, patient.patienthealthsystemstayid FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT drugname FROM ( SELECT m.drugname, COUNT(*) AS cnt FROM diagnosis d JOIN medication m ON d.patientunitstayid = m.patientunitstayid WHERE d.diagnosisname = 'chronic kidney disease' AND d.diagnosistime >= datetime('2100-12-31 23:59:00', '-1 year') AND m.drugstarttime > d.diagnosistime GROUP BY m.drugname ORDER BY cnt DESC LIMIT 5 );",
            "gold_answer": "[['carvedilol'], ['furosemide'], ['glycopyrrolate'], ['hydrocodone-acetaminophen'], ['lorazepam'], ['morphine']]",
            "pred_answer": "[['carvedilol'], ['furosemide'], ['glycopyrrolate'], ['hydrocodone-acetaminophen'], ['morphine']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only joins diagnosis and medication on patientunitstayid and does not ensure that the diagnosis and subsequent medication prescriptions occurred within the same hospital visit (patienthealthsystemstayid). The ground truth query explicitly joins through patient.patienthealthsystemstayid to enforce the same hospital admission and uses DENSE_RANK to pick the top five by frequency. The predicted query\u2019s missing join on patienthealthsystemstayid changes the overall logic and data associations, leading to incorrect drug counts and the wrong fifth drug. Fixing this requires restructuring the join logic to include hospital visit linkage, not just a local tweak.",
            "error_type": "Global: join-relationship error"
        }
    },
    "deepseek": {
        "7c21a831254af88eb13b28b7": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Can you get me the top three diagnoses that have the highest 6-month mortality rate?",
            "gold_sql": "SELECT T4.diagnosisname FROM ( SELECT T3.diagnosisname, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C2 FROM ( SELECT T2.diagnosisname, AVG(C1) AS C2 FROM ( SELECT T1.diagnosisname, ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 2 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 2 GROUP BY patient.uniquepid, diagnosis.diagnosisname HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN patient ON T1.uniquepid = patient.uniquepid ) AS T2 GROUP BY T2.diagnosisname ) AS T3 ) AS T4 WHERE T4.C2 <= 3",
            "pred_sql": "WITH mortality AS ( SELECT d.diagnosisname, COUNT(CASE WHEN p.hospitaldischargestatus = 'expired' AND p.hospitaldischargetime <= DATE(d.diagnosistime, '+182 day') THEN 1 END) AS deaths, COUNT(*) AS total FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid GROUP BY d.diagnosisname ) SELECT diagnosisname, CAST(deaths AS REAL) / total AS mortality_rate FROM mortality ORDER BY mortality_rate DESC LIMIT 3;",
            "gold_answer": "[['anuria'], ['ards - pulmonary etiology'], ['av block - pacemaker in place, pacing and capturing'], ['bacteremia - gram negative rod'], ['bacteremia - respiratory source'], ['cardiac arrest - initial rhythm: pulseless electrical activity'], ['cardiac arrest - initial rhythm: ventricular tachycardia'], ['cardiac arrest - un-witnessed'], ['cholecystitis - acute'], ['dvt - left upper extremity'], ['dvt - right upper extremity'], ['encephalopathy - toxic'], ['facial bone fracture - maxillary fracture'], ['facial bone fracture - orbital fracture'], ['gastric ca'], ['hyperglycemia - corticosteroid related'], ['hypernatremia - moderate (146 - 155 meq/dl)'], ['hypokalemia - moderate ( 2.8 - 3.5 meq/dl)'], ['hypothermia'], ['liver ca'], ['melanoma - nodular'], ['mouth and jaw tumor - oral cavity squamous cell ca'], ['peripheral vascular ischemia - atherosclerotic thrombo-occlusion'], ['peripheral vascular ischemia - postoperative'], ['pleural effusion - due to heart failure'], ['pneumothorax - spontaneous'], ['seizures - from ischemia/infarction'], ['sepsis - sepsis with single organ dysfunction- circulatory system failure'], ['shock, etiology unclear'], ['valve repair >= 7 days - aortic / mitral'], ['vascular insufficiency of intestine - due to extrinsic compression'], ['ventricular tachycardia - with hemodynamic compromise']]",
            "pred_answer": "[['acute copd exacerbation - with baseline hypoxemia', '1.0'], ['anal ca', '1.0'], ['anuria', '1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the logic for computing 6-month mortality: it counts all diagnosis rows (not first diagnoses per patient), does not filter out diagnoses within the last six months (so some patients lack full follow-up), and aggregates raw counts of events rather than computing the patient-level survival rate as specified. It also returns both the rate and name and orders by deaths/total, rather than ranking by average survival and selecting the top three diagnoses. Fixing this requires reworking multiple clauses (subquery, filtering by time, grouping, ranking), not just a minor tweak. Therefore this is a global error.",
            "error_type": "Global: incorrect mortality calculation and grouping logic"
        },
        "6e9ba38f419c202ab212ef98": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Compared to yesterday, what new drugs are present in the prescription of patient 015-100025 today?",
            "gold_sql": "SELECT medication.drugname FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.uniquepid = '015-100025' ) AND datetime(medication.drugstarttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') EXCEPT SELECT medication.drugname FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.uniquepid = '015-100025' ) AND datetime(medication.drugstarttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day')",
            "pred_sql": "SELECT DISTINCT m1.drugname FROM medication m1 JOIN patient p1 ON m1.patientunitstayid = p1.patientunitstayid WHERE p1.uniquepid = '015-100025' AND datetime(m1.drugstarttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-0 day') AND m1.drugname NOT IN ( SELECT DISTINCT m2.drugname FROM medication m2 JOIN patient p2 ON m2.patientunitstayid = p2.patientunitstayid WHERE p2.uniquepid = '015-100025' AND datetime(m2.drugstarttime, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-1 day') );",
            "gold_answer": "[['1 ml : heparin sodium (porcine) 5000 unit/ml ij soln'], ['1 ml vial : heparin sodium (porcine) 1000 unit/ml ij soln'], ['100 ml flex cont : sodium chloride 0.9 % iv soln'], ['acetaminophen 325 mg po tabs'], ['alteplase 2 mg ij solr'], ['aspirin 81 mg po chew'], ['atorvastatin calcium 40 mg po tabs'], ['carvedilol 25 mg po tabs'], ['diphenhydramine hcl 25 mg po caps'], ['famotidine 20 mg po tabs'], ['nitroglycerin 0.4 mg sl subl'], ['oxycodone hcl 5 mg po tabs'], ['prednisone 10 mg po tabs'], ['prednisone 20 mg po tabs'], ['prednisone 5 mg po tabs'], ['sodium chloride 0.9 % ij soln']]",
            "pred_answer": "[['1 ml : heparin sodium (porcine) 5000 unit/ml ij soln'], ['1 ml vial : heparin sodium (porcine) 1000 unit/ml ij soln'], ['100 ml flex cont : sodium chloride 0.9 % iv soln'], ['100 ml flex cont : sodium chloride 0.9 % iv soln'], ['acetaminophen 325 mg po tabs'], ['alteplase 2 mg ij solr'], ['aspirin 81 mg po chew'], ['atorvastatin calcium 40 mg po tabs'], ['carvedilol 25 mg po tabs'], ['diphenhydramine hcl 25 mg po caps'], ['famotidine 20 mg po tabs'], ['nitroglycerin 0.4 mg sl subl'], ['oxycodone hcl 5 mg po tabs'], ['prednisone 10 mg po tabs'], ['prednisone 20 mg po tabs'], ['prednisone 5 mg po tabs'], ['sodium chloride 0.9 % ij soln']]",
            "is_correct": false,
            "reasoning": "The user\u2019s predicted SQL mirrors the ground\u2010truth logic (finding today\u2019s drugs not in yesterday\u2019s list) and even uses a NOT IN subquery instead of EXCEPT. The only discrepancy is a duplicate row in the output, indicating a missing or misapplied DISTINCT removal. This is a confined, clause\u2010level issue and does not alter the overall join or filtering logic of the query. Thus it is a local error.",
            "error_type": "Local: missing-distinct error"
        },
        "e7377d5873adc22bdb17fa78": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Count the number of patients who were tested in 2100 for glucose - csf.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT lab.patientunitstayid FROM lab WHERE lab.labname = 'glucose - csf' AND strftime('%Y',lab.labresulttime) = '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT patientunitstayid) FROM lab WHERE labname = \"glucose - csf\" AND strftime('%Y', labresulttime) = \"2100\";",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['16.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts distinct patientunitstayid in the lab table, effectively counting unique visits where 'glucose - csf' was tested, rather than counting distinct patients. The ground truth joins through the patient table and counts DISTINCT patient.uniquepid, ensuring each patient is only counted once even if they had multiple unit stays. This mistake changes the fundamental logic of what is being counted (visits vs. patients) and cannot be fixed by a small localized edit\u2014it requires restructuring the query to join the patient table and count patient.uniquepid instead of patientunitstayid.",
            "error_type": "Global: wrong-entity-counting"
        }
    },
    "o4-mini": {
        "26cc7b020c66234c64fc04de": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Since 2100, how many patients died after having been diagnosed with mouth and jaw tumor - oral cavity squamous cell ca within the same month?",
            "gold_sql": "SELECT COUNT( DISTINCT T2.uniquepid ) FROM ( SELECT T1.uniquepid, T1.diagnosistime FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'mouth and jaw tumor - oral cavity squamous cell ca' ) AS T1 GROUP BY T1.uniquepid HAVING MIN(T1.diagnosistime) = T1.diagnosistime AND strftime('%Y',T1.diagnosistime) >= '2100' ) AS T2 JOIN ( SELECT patient.uniquepid, patient.patienthealthsystemstayid, patient.hospitaldischargetime FROM patient WHERE patient.hospitaldischargestatus = 'expired' ) as T3 ON T2.uniquepid = T3.uniquepid AND datetime(T2.diagnosistime,'start of month') = datetime(T3.hospitaldischargetime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) AS num_patients FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid WHERE d.diagnosistime >= '2100-01-01' AND d.diagnosisname = 'mouth and jaw tumor - oral cavity squamous cell ca' AND p.hospitaldischargestatus = 'expired' AND p.hospitaldischargetime > d.diagnosistime AND strftime('%Y-%m', d.diagnosistime) = strftime('%Y-%m', p.hospitaldischargetime');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "unrecognized token: \"');\"",
            "is_correct": false,
            "reasoning": "The failure \u201cunrecognized token: \"');\"\u201d indicates a small syntax mistake (an extra or misplaced quote/parenthesis) in the predicted SQL\u2019s WHERE clause, specifically around the strftime call. The underlying logic (joining diagnosis to patient, filtering by date, expired status, same month) is intact; only a local syntax/parsing error prevents execution. Therefore this is a Local error.",
            "error_type": "Local: syntax error"
        },
        "c408e0d5803d34c692a5b804": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What are the five most frequent lab tests that patients had within 2 months after receiving therapeutic antibacterials - empiric antibacterial coverage?",
            "gold_sql": "SELECT T3.labname FROM ( SELECT T2.labname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'therapeutic antibacterials - empiric antibacterial coverage' ) AS T1 JOIN ( SELECT patient.uniquepid, lab.labname, lab.labresulttime FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.labresulttime AND datetime(T2.labresulttime) BETWEEN datetime(T1.treatmenttime) AND datetime(T1.treatmenttime,'+2 month') GROUP BY T2.labname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT lab.labname, COUNT(*) AS frequency FROM treatment JOIN lab ON treatment.patientunitstayid = lab.patientunitstayid AND datetime(lab.labresulttime) > datetime(treatment.treatmenttime) AND datetime(lab.labresulttime) <= datetime(treatment.treatmenttime, '+2 months') WHERE treatment.treatmentname = 'therapeutic antibacterials - empiric antibacterial coverage' GROUP BY lab.labname ORDER BY frequency DESC LIMIT 5;",
            "gold_answer": "[['bedside glucose'], ['bicarbonate'], ['bun'], ['calcium'], ['chloride'], ['creatinine'], ['glucose'], ['potassium'], ['sodium']]",
            "pred_answer": "[['bedside glucose', '374.0'], ['creatinine', '113.0'], ['glucose', '113.0'], ['potassium', '128.0'], ['sodium', '113.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly filters for the treatment name, joins lab records, and applies the two-month time window. However, it returns both the lab name and its frequency count, whereas the question (and the assumptions) require only the lab names for the top five tests, excluding their counts. This is a confined issue in the SELECT clause that doesn\u2019t alter the overall logic of the query.",
            "error_type": "Local: wrong select columns"
        }
    },
    "llm2sql": {
        "ee5ae34a56ec77cfa4217ecd": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What are the top four most frequent diagnoses that patients are given within the same hospital visit after being diagnosed with ventricular premature beats since 1 year ago?",
            "gold_sql": "SELECT T3.diagnosisname FROM ( SELECT T2.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'ventricular premature beats' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosisname, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.diagnosistime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.diagnosisname ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT D1.patientunitstayid, D1.diagnosistime FROM diagnosis AS D1 WHERE D1.diagnosisname = 'ventricular premature beats' AND datetime(D1.diagnosistime) >= datetime('2100-12-31 23:59:00', '-1 year') ), subsequent_diagnoses AS ( SELECT D2.diagnosisname FROM initial_diagnosis AS ID INNER JOIN diagnosis AS D2 ON ID.patientunitstayid = D2.patientunitstayid WHERE datetime(D2.diagnosistime) > datetime(ID.diagnosistime) AND D2.diagnosisname <> 'ventricular premature beats' ) SELECT D2.diagnosisname, COUNT(D2.diagnosisname) AS frequency FROM subsequent_diagnoses AS D2 GROUP BY D2.diagnosisname ORDER BY frequency DESC LIMIT 4",
            "gold_answer": "[['atrial flutter'], ['bipolar disorder'], ['breast ca'], ['cardiomyopathy'], ['coagulopathy - coumadin administration'], ['congestive heart failure'], ['copd'], ['coronary artery disease - known'], ['hyperlipidemia'], ['obstructive sleep apnea'], ['ventricular fibrillation'], ['ventricular tachycardia']]",
            "pred_answer": "[['congestive heart failure', '13.0'], ['obstructive sleep apnea', '13.0'], ['ventricular fibrillation', '13.0'], ['ventricular tachycardia', '13.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to enforce the \u201csame hospital visit\u201d requirement by joining only on patientunitstayid rather than patienthealthsystemstayid, fundamentally altering which diagnoses are grouped together. It also introduces a COUNT in the final SELECT, violating the assumption that only the diagnosis names should be returned for a top-N list. These issues go beyond a simple typo or syntax mistake in one clause and change the overall logic of the query, requiring significant restructuring to correct.",
            "error_type": "Global: incorrect join and output structure"
        }
    },
    "dinsql": {
        "47d54fff12028659efa8b8b1": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "This year, what are the top three most common drugs prescribed within the same hospital visit to male patients aged 60 or above after being diagnosed with delirium?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'delirium' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime, patient.patienthealthsystemstayid FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.gender = 'male' AND patient.age >= 60 AND datetime(medication.drugstarttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH DeliriumPatients AS ( SELECT DISTINCT P.patientunitstayid, P.patienthealthsystemstayid FROM patient AS P INNER JOIN diagnosis AS D ON P.patientunitstayid = D.patientunitstayid WHERE P.gender = 'male' AND CAST(P.age AS INTEGER) >= 60 AND D.diagnosisname = 'delirium' AND datetime(D.diagnosistime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ), PrescribedDrugs AS ( SELECT M.drugname FROM DeliriumPatients AS DP INNER JOIN medication AS M ON DP.patientunitstayid = M.patientunitstayid WHERE datetime(M.drugstarttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') AND DP.patienthealthsystemstayid = (SELECT patienthealthsystemstayid FROM patient WHERE patientunitstayid = M.patientunitstayid) ) SELECT DISTINCT drugname FROM ( SELECT drugname, DENSE_RANK() OVER (ORDER BY COUNT(*) DESC) AS rank FROM PrescribedDrugs GROUP BY drugname ) AS RankedDrugs WHERE rank <= 3",
            "gold_answer": "[['dextrose 50% adult'], ['magnesium oxide'], ['sodium chloride 0.9%']]",
            "pred_answer": "[['dextrose 50% adult'], ['lorazepam'], ['sodium chloride 0.9%']]",
            "is_correct": false,
            "reasoning": "The predicted SQL builds two CTEs to identify male delirium patients and their prescriptions in the same year, then ranks the drugs. However, it never enforces that the drugstarttime occurs after the delirium diagnosistime. In the ground truth SQL, there is a WHERE clause T1.diagnosistime < T2.drugstarttime to ensure prescriptions happen after diagnosis. Omitting this temporal join condition fundamentally changes which drugs are counted and thus alters the intended result. This is not a simple typo or localized syntax issue but a missing logical constraint spanning the diagnosis\u2013medication relationship.",
            "error_type": "Global: missing temporal join condition"
        },
        "ce170c7abf96e8b47789c54d": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "When was patient 016-10564 first discharged from the hospital?",
            "gold_sql": "SELECT patient.hospitaldischargetime FROM patient WHERE patient.uniquepid = '016-10564' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaldischargetime ASC LIMIT 1",
            "pred_sql": "MIN(patient.hospitaldischargetime) AS first_discharge_time FROM patient WHERE patient.uniquepid = '016-10564' AND patient.hospitaldischargetime IS NOT NULL",
            "gold_answer": "[['2100-01-17 13:49:52']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the SELECT keyword before the aggregate function, making it a localized syntax/SELECT clause error. The overall query structure (finding the earliest discharge time) is correct, but the missing SELECT renders it invalid. This is a local syntax error that can be fixed without changing the query\u2019s logic.",
            "error_type": "Local: syntax error"
        },
        "a08d68de4bba22b9d544a0e0": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What is the daily maximum level of patient 006-100338's paco2 in 2100?",
            "gold_sql": "SELECT MAX(lab.labresult) FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-100338' ) ) AND lab.labname = 'paco2' AND strftime('%Y',lab.labresulttime) = '2100' GROUP BY strftime('%Y-%m-%d',lab.labresulttime)",
            "pred_sql": "SELECT strftime('%Y-%m-%d', labresulttime) AS date, MAX(labresult) AS max_paco2 FROM lab WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '006-100338' ) AND labname = 'paco2' AND strftime('%Y', labresulttime) = '2100' GROUP BY date",
            "gold_answer": "[['31.0'], ['35.0'], ['36.0'], ['37.0'], ['39.0'], ['41.0'], ['45.0']]",
            "pred_answer": "[['2100-09-08', '45.0'], ['2100-09-09', '41.0'], ['2100-09-10', '39.0'], ['2100-09-11', '37.0'], ['2100-09-12', '35.0'], ['2100-09-13', '36.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly restricts lab records to a single patientunitstayid derived directly from uniquepid, whereas the ground truth uses the patienthealthsystemstayid to include all relevant unit stays for that patient. This mistake affects the core logic of how patient records are associated across tables, altering the intended dataset. Fixing it would require changing the join/filter strategy rather than a simple typo or minor clause adjustment.",
            "error_type": "Global: join relationship error"
        }
    }
}