{
    "qwen3": {
        "faf27b3469012cf43bdd7181": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "When did patient 002-10915 enter the hospital the first time via icu to sdu?",
            "gold_sql": "SELECT patient.hospitaladmittime FROM patient WHERE patient.uniquepid = '002-10915' AND patient.hospitaladmitsource = 'icu to sdu' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "pred_sql": "SELECT p1.hospitaladmittime FROM patient p1 JOIN patient p2 ON p1.patienthealthsystemstayid = p2.patienthealthsystemstayid WHERE p1.uniquepid = '002-10915' AND p2.uniquepid = '002-10915' AND p1.wardid = 103 AND p2.wardid = 82 AND p1.unitadmittime < p2.unitadmittime ORDER BY p1.hospitaladmittime LIMIT 1;",
            "gold_answer": "[['2100-06-17 11:06:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally restructures the logic by using a self-join on the patient table with wardid filters instead of directly filtering hospitaladmitsource = 'icu to sdu' and checking discharge time. It omits the required hospitaladmitsource condition and introduces an incorrect join on patienthealthsystemstayid and wardid values, altering the intended query meaning. This is not a fixable typo in one clause but a flawed join strategy that changes the overall logic.",
            "error_type": "Global: flawed-join error"
        },
        "4bb4cea94cf2fb2ae68eee36": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What are the three most commonly taken specimens that patients are tested within the same month after receiving a sedative agent - propofol procedure since 1 year ago?",
            "gold_sql": "SELECT T3.culturesite FROM ( SELECT T2.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'sedative agent - propofol' AND datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, microlab.culturesite, microlab.culturetakentime FROM microlab JOIN patient ON microlab.patientunitstayid = patient.patientunitstayid WHERE datetime(microlab.culturetakentime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.culturetakentime AND datetime(T1.treatmenttime,'start of month') = datetime(T2.culturetakentime,'start of month') GROUP BY T2.culturesite ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT lab.labname FROM medication JOIN lab ON medication.patientunitstayid = lab.patientunitstayid WHERE medication.drugname LIKE '%propofol%' AND medication.drugstarttime >= datetime('2100-12-31 23:59:00', '-1 year') AND datetime(lab.labresulttime, 'start of month') = datetime(medication.drugstarttime, 'start of month') AND lab.labresulttime >= medication.drugstarttime GROUP BY lab.labname ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['blood, venipuncture']]",
            "pred_answer": "[['bedside glucose'], ['glucose'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication and lab tables instead of the treatment and microlab tables. It matches on medication.drugname LIKE '%propofol%' and lab.labresulttime rather than treatment.treatmentname = 'sedative agent - propofol' and microlab.culturetakentime, and it selects lab.labname instead of microlab.culturesite. This is not a minor syntax or column name mistake but a fundamental change in the query\u2019s logic and table relationships, requiring restructuring of the entire query to achieve the intended result.",
            "error_type": "Global: incorrect table join/structure"
        },
        "a5f9250cdf485bbb7612f00d": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Tell me the difference in the systolic blood pressure value for patient 027-100313 last measured on the first icu visit compared to the first value measured on the first icu visit?",
            "gold_sql": "SELECT ( SELECT vitalperiodic.systemicsystolic FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-100313' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1 ) AND vitalperiodic.systemicsystolic IS NOT NULL ORDER BY vitalperiodic.observationtime DESC LIMIT 1 ) - ( SELECT vitalperiodic.systemicsystolic FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-100313' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1 ) AND vitalperiodic.systemicsystolic IS NOT NULL ORDER BY vitalperiodic.observationtime ASC LIMIT 1 )",
            "pred_sql": "SELECT (SELECT systemicsystolic FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '027-100313' ORDER BY unitadmittime ASC LIMIT 1 ) ORDER BY observationtime DESC LIMIT 1) - (SELECT systemicsystolic FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '027-100313' ORDER BY unitadmittime ASC LIMIT 1 ) ORDER BY observationtime ASC LIMIT 1) AS difference;",
            "gold_answer": "[['28.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to identify the same first ICU visit that the ground truth uses. It directly picks the first patientunitstayid by uniquepid without filtering for a completed ICU stay (unitdischargetime IS NOT NULL) or joining on patienthealthsystemstayid, leading to the wrong ICU visit (and thus no valid systolic values). This mis-selection is not a minor typo or missing alias but a fundamental logic flaw that spans the subquery structure and the overall meaning of the query.",
            "error_type": "Global: incorrect visit selection"
        }
    },
    "codes": {
        "e6f31fd573d77d5e1ae39ef3": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me the total number of patients who were diagnosed with leukocytosis - leukemoid reaction since 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'leukocytosis - leukemoid reaction' AND strftime('%Y',diagnosis.diagnosistime) >= '2100' )",
            "pred_sql": "SELECT count(DISTINCT patientunitstayid) FROM diagnosis WHERE icd9code IN ( '5188', '5189', '5190', '5191', '5192', '5193', '5194', '5195' )",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query logic: it filters by icd9code instead of diagnosisname, omits the time constraint (since 2100), and counts directly from the diagnosis table rather than linking to the patient table for unique patients. These errors span multiple clauses (SELECT, WHERE) and alter the intended meaning, requiring significant restructuring to correct.",
            "error_type": "Global: logical inconsistency"
        },
        "a99eb67257698883467f88f8": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the length of stay in days for patient 017-101105's first hospital visit?",
            "gold_sql": "SELECT strftime('%J',patient.hospitaldischargetime) - strftime('%J',patient.hospitaladmittime) FROM patient WHERE patient.uniquepid = '017-101105' AND patient.hospitaladmittime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "pred_sql": "SELECT strftime('%J', patient.hospitaldischargetime) - strftime('%J', patient.hospitaladmittime) AS los FROM patient WHERE patientunitstayid = '017-101105'",
            "gold_answer": "[['1.536']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on patientunitstayid instead of uniquepid, using the wrong column to identify the patient. This is a localized mistake in the WHERE clause (column reference error) and can be fixed without restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "bf399a0a344947fcaf45438b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was patient 032-10119's last hospital visit?",
            "gold_sql": "SELECT patient.hospitaladmittime FROM patient WHERE patient.uniquepid = '032-10119' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1",
            "pred_sql": "SELECT patient.hospitaldischargedatetime FROM patient INNER JOIN cost ON patient.patientunitstayid = cost.patientunitstayid WHERE patient.uniquepid = '032-10119' ORDER BY patient.hospitaladmittime DESC LIMIT 1",
            "gold_answer": "[['2100-10-20 06:21:08']]",
            "pred_answer": "no such column: patient.hospitaldischargedatetime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column hospitaldischargedatetime and even includes an unnecessary JOIN to the cost table. The primary failure is a bad column reference (patient.hospitaldischargedatetime does not exist in the schema). This is a localized mistake that doesn\u2019t alter the overall query structure or intended logic beyond the wrong column name.",
            "error_type": "Local: table/column reference error"
        }
    },
    "macsql": {
        "862e1b50da063e296890ae8c": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Is the bedside glucose level of patient 022-105102 last measured on the last hospital visit greater than the first measurement on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-105102' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'bedside glucose' ORDER BY lab.labresulttime DESC LIMIT 1 ) > ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-105102' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'bedside glucose' ORDER BY lab.labresulttime ASC LIMIT 1 )",
            "pred_sql": "WITH GlucoseMeasurements AS ( SELECT `labresult`, `labresulttime` FROM `lab` WHERE `patientunitstayid` IN ( SELECT `patientunitstayid` FROM `patient` WHERE `patienthealthsystemstayid` = ( SELECT `patienthealthsystemstayid` FROM `patient` WHERE `uniquepid` = '022-105102' AND `hospitaldischargetime` IS NOT NULL ORDER BY `hospitaladmittime` DESC LIMIT 1 ) ) AND `labname` = 'glucose' ORDER BY `labresulttime` ) SELECT CASE WHEN (SELECT `labresult` FROM GlucoseMeasurements ORDER BY `labresulttime` DESC LIMIT 1) > (SELECT `labresult` FROM GlucoseMeasurements ORDER BY `labresulttime` ASC LIMIT 1) THEN 'True' ELSE 'False' END AS `IsLastGreaterThanFirst`",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['True']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses labname = 'glucose' instead of the required labname = 'bedside glucose'. This is a local issue with the filter condition (wrong literal/value) and does not require restructuring the query\u2019s logic or joins.",
            "error_type": "Local: value parsing error"
        },
        "3f78a402c7c8d638b4a4de3c": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Count the number of times patient 016-10197 received volume (ml)-levofloxacin (levaquin) ivpb 750 mg on last month/09.",
            "gold_sql": "SELECT COUNT(*) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10197' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND intakeoutput.celllabel = 'volume (ml)-levofloxacin (levaquin) ivpb 750 mg' AND datetime(intakeoutput.intakeoutputtime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',intakeoutput.intakeoutputtime) = '09'",
            "pred_sql": "SELECT COUNT(*) FROM medication WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '016-10197' ) AND `drugname` = 'volume (ml)-levofloxacin (levaquin) ivpb 750 mg' AND datetime(`drugstarttime`, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') AND strftime('%d', `drugstarttime`) = '09'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls from the medication table using drugname and drugstarttime, whereas the question and ground truth require using the intakeoutput table with celllabel and intakeoutputtime filtered by cellpath LIKE '%intake%'. This is not a simple column or syntax fix but a fundamental misuse of the table and join logic, altering the query\u2019s overall intent. Therefore it\u2019s a Global error.",
            "error_type": "Global: incorrect table usage"
        }
    },
    "o4-mini": {
        "3a9accf430c593518ab84181": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many days have passed since the last time patient 027-100843 was given nimodipine on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',treatment.treatmenttime) ) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-100843' AND patient.hospitaldischargetime IS NULL ) ) AND treatment.treatmentname = 'nimodipine' ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "SELECT CAST( julianday('2100-12-31 23:59:00') - julianday(MAX(m.drugstarttime)) AS INTEGER) AS days_since_last_nimodipine FROM medication m JOIN patient p ON m.patientunitstayid = p.patientunitstayid WHERE p.patienthealthsystemstayid = ( SELECT patienthealthsystemstayid FROM patient WHERE uniquepid = '027-100843' AND hospitaldischargetime IS NULL LIMIT 1 ) AND m.drugname = 'nimodipine';",
            "gold_answer": "[['6.125']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the medication table and drugstarttime instead of the treatment table and treatmenttime specified in the ground truth. This is not a simple typo or syntax mistake\u2014it changes the data source and the join logic, fundamentally altering how the patient\u2019s nimodipine event is retrieved. As a result, it fails to capture the correct event from the treatment table for the current hospital visit.",
            "error_type": "Global: incorrect table/join"
        },
        "cb7daad13bef3b54aa70357d": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What are the top five most frequent medications prescribed within the same month to the patients in their 20s after they are diagnosed with stroke - ischemic stroke?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'stroke - ischemic stroke' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.age BETWEEN 20 AND 29 ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND datetime(T1.diagnosistime,'start of month') = datetime(T2.drugstarttime,'start of month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT m.drugname FROM patient p JOIN diagnosis d ON p.patientunitstayid = d.patientunitstayid JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE p.age BETWEEN 20 AND 29 AND d.diagnosisname = 'stroke - ischemic stroke' AND m.drugstarttime >= d.diagnosistime AND strftime('%Y-%m', m.drugstarttime) = strftime('%Y-%m', d.diagnosistime) GROUP BY m.drugname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['5 ml vial : metoprolol tartrate 1 mg/ml iv soln'], ['acetaminophen 325 mg po tabs'], ['atorvastatin calcium 40 mg po tabs'], ['metoprolol tartrate 25 mg po tabs'], ['metoprolol tartrate 50 mg po tabs'], ['potassium chloride 20 meq po pack'], ['warfarin sodium 5 mg po tabs'], ['warfarin sodium 7.5 mg po tabs']]",
            "pred_answer": "[['acetaminophen 325 mg po tabs'], ['metoprolol tartrate 25 mg po tabs'], ['potassium chloride 20 meq po pack'], ['warfarin sodium 5 mg po tabs'], ['warfarin sodium 7.5 mg po tabs']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a simple ORDER BY \u2026 LIMIT 5 instead of applying DENSE_RANK() to include ties at the 5th rank as required by the \u201ctop N\u201d ranking assumption. All joins, filters, and groupings are otherwise correct\u2014the mistake is confined to the ranking logic in the SELECT/GROUP BY clause. This is a localized error that can be fixed by replacing LIMIT with DENSE_RANK(), without restructuring the query.",
            "error_type": "Local: ranking error"
        }
    }
}