{
    "qwen3": {
        "8a1c32c5bac4ca824a13e857": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What is the percentile of 8.9 in the total bilirubin lab test with the same age as patient 002-10845 on the first hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT lab.labresult, PERCENT_RANK() OVER ( ORDER BY lab.labresult ) AS C1 FROM lab WHERE lab.labname = 'total bilirubin' AND lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age = ( SELECT patient.age FROM patient WHERE patient.uniquepid = '002-10845' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) ) AS T1 WHERE T1.labresult = 8.9",
            "pred_sql": "WITH first_admission_age AS ( SELECT uniquepid, age FROM patient WHERE (uniquepid, hospitaladmittime) IN ( SELECT uniquepid, MIN(hospitaladmittime) FROM patient GROUP BY uniquepid ) ), target_age AS ( SELECT age FROM patient WHERE uniquepid = '002-10845' ORDER BY hospitaladmittime ASC LIMIT 1 ) SELECT (COUNT(CASE WHEN lab.labresult <= 8.9 THEN 1 END) * 100.0 / COUNT(*)) AS percentile FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid JOIN first_admission_age ON patient.uniquepid = first_admission_age.uniquepid CROSS JOIN target_age WHERE first_admission_age.age = target_age.age AND lab.labname = 'total bilirubin';",
            "gold_answer": "[['0.973']]",
            "pred_answer": "[['97.368']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the intended percentile computation in several ways: it replaces the window\u2010function PERCENT_RANK() with a custom COUNT-based calculation, multiplies the result by 100 (producing 97.368 instead of the expected 0.973), and reshapes the query logic around aggregations rather than a straightforward window function. This error spans the overall query logic (how the percentile is computed and formatted), not just a minor syntactic or reference issue.",
            "error_type": "Global: incorrect percentile calculation"
        },
        "c939da95b18a427be1e09ceb": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What methods are used to ingest haloperidol lactate?",
            "gold_sql": "SELECT DISTINCT medication.routeadmin FROM medication WHERE medication.drugname = 'haloperidol lactate'",
            "pred_sql": "SELECT DISTINCT routeadmin FROM medication WHERE drugname LIKE '%haloperidol lactate%';",
            "gold_answer": "[['inj'], ['intramuscular'], ['intravenou'], ['intravenous'], ['iv'], ['zpyxvend']]",
            "pred_answer": "[['im'], ['inj'], ['intramuscular'], ['intravenou'], ['intravenous'], ['iv'], ['iv push'], ['zpyxvend']]",
            "is_correct": false,
            "reasoning": "The only difference between the ground truth and predicted SQL is in the WHERE clause: the ground truth uses an exact match (drugname = 'haloperidol lactate'), while the predicted SQL uses a LIKE pattern match. This change in condition leads to extra, unintended routes (e.g., 'im', 'iv push'). The error is confined to the WHERE clause and does not alter the overall structure or joins of the query, so it\u2019s a localized mistake.",
            "error_type": "Local: where-clause error"
        }
    },
    "dinsql": {
        "25e1fa3607d0a4f426631379": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What was the name of the medication that patient 003-10264 was prescribed within the same day after having received a stress ulcer prophylaxis - antacids procedure since 03/2100?",
            "gold_sql": "SELECT T2.drugname FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '003-10264' ) AND treatment.treatmentname = 'stress ulcer prophylaxis - antacids' AND strftime('%Y-%m',treatment.treatmenttime) >= '2100-03' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '003-10264' ) AND strftime('%Y-%m',medication.drugstarttime) >= '2100-03' ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.drugstarttime AND datetime(T1.treatmenttime,'start of day') = datetime(T2.drugstarttime,'start of day')",
            "pred_sql": "SELECT DISTINCT T2.drugname FROM treatment AS T1 JOIN medication AS T2 ON T1.patientunitstayid = T2.patientunitstayid JOIN patient AS T3 ON T1.patientunitstayid = T3.patientunitstayid WHERE T3.uniquepid = '003-10264' AND T1.treatmentname = 'stress ulcer prophylaxis - antacids' AND datetime(T1.treatmenttime) >= datetime('2100-03-01') AND datetime(T1.treatmenttime, 'start of day') = datetime(T2.drugstarttime, 'start of day')",
            "gold_answer": "[['fentanyl citrate inj']]",
            "pred_answer": "[['acetaminophen'], ['bisacodyl supp'], ['fentanyl citrate inj'], ['fentanyl citrate inj'], ['hydralazine inj'], [\"lactated ringer's 1,000 ml bag.\"], [\"lactated ringer's 1,000 ml bag.\"], [\"lactated ringer's 1,000 ml bag.\"], ['magnesium hydroxide'], ['ondansetron inj'], ['ondansetron inj'], ['ondansetron inj']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the necessary condition that the medication start time must be after the treatment time (T1.treatmenttime < T2.drugstarttime). It only enforces that they occur on the same day, so medications given earlier that day are erroneously included. This mistake is confined to the WHERE clause logic and does not change the overall query structure or table relationships.",
            "error_type": "Local: missing time order condition"
        }
    },
    "codes": {
        "a22b861f262fa2427d8449ff": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What are the top three frequent input events this year?",
            "gold_sql": "SELECT T1.celllabel FROM ( SELECT intakeoutput.celllabel, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM intakeoutput WHERE intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY intakeoutput.celllabel ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT cellpath FROM intakeoutput WHERE cellpath LIKE '%input%' AND intakeoutputtime >= datetime('2100 12 31 23:59:00','start of year') GROUP BY cellpath ORDER BY count(cellpath) DESC LIMIT 3",
            "gold_answer": "[['continuous infusion meds'], ['ns ivf'], ['p.o.']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s mistakes are confined to specific clauses: it uses the wrong column (cellpath instead of celllabel), the wrong LIKE pattern ('%input%' instead of '%intake%'), and an incorrectly formatted datetime condition. These are localized errors in the SELECT and WHERE clauses that can be fixed without restructuring the entire query logic.",
            "error_type": "Local: column reference/value parsing error"
        },
        "d6c3eb4c10881ccf9ced74ae": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Give me the three most frequent procedures performed.",
            "gold_sql": "SELECT T1.treatmentname FROM ( SELECT treatment.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM treatment GROUP BY treatment.treatmentname ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT procedure_name FROM procedure WHERE procedure_name IS NOT NULL GROUP BY procedure_name ORDER BY count(procedure_name) DESC LIMIT 3",
            "gold_answer": "[['chest x-ray'], ['mechanical ventilation'], ['stress ulcer prophylaxis - pantoprazole']]",
            "pred_answer": "no such table: procedure",
            "is_correct": false,
            "reasoning": "The predicted SQL references a table named `procedure` and column `procedure_name`, which do not exist in the given schema. This is a localized issue with table/column references and does not involve the overall query logic or structure.",
            "error_type": "Local: table/column reference error"
        },
        "34d60bc7469406d563e8cc3b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was patient 010-10081's first value of ionized calcium this month?",
            "gold_sql": "SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '010-10081' ) ) AND lab.labname = 'ionized calcium' AND datetime(lab.labresulttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT DISTINCT lab.labresult FROM lab INNER JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE lab.labname = 'ionized calcium' AND patient.uniquepid = '010-10081' AND strftime('%Y %m', lab.labresulttime) = '2100 10' LIMIT 1",
            "gold_answer": "[['4.88']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it misparses \u201cthis month,\u201d using strftime('%Y %m') = '2100 10' (October) instead of filtering December 2100 via the prescribed start\u2010of\u2010month logic. This is a localized mistake in the WHERE clause\u2019s time filter, not a fundamental restructuring of joins or the overall logic.",
            "error_type": "Local: temporal filter error"
        },
        "107f5deba92d185055ac3f20": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many patients were diagnosed with acute respiratory failure within 2 months following the previous diagnosis of pneumonia in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'pneumonia' AND strftime('%Y',diagnosis.diagnosistime) = '2100' ) AS T1 JOIN ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute respiratory failure' AND strftime('%Y',diagnosis.diagnosistime) = '2100' ) AS T2 WHERE T1.diagnosistime < T2.diagnosistime AND datetime(T2.diagnosistime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month')",
            "pred_sql": "SELECT count(patient.patientunitstayid) FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE vitalperiodic.observationtime = ( SELECT diagnosis.diagnosistime FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid WHERE diagnosis.icd9code = '78552' ORDER BY diagnosis.diagnosistime DESC LIMIT 1 ) + 2 MONTH AND vitalperiodic.observationtime < ( SELECT diagnosis.diagnosistime FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid WHERE diagnosis.icd9code = '250.41' ORDER BY diagnosis.diagnosistime DESC LIMIT 1 ) + 2 MONTH",
            "gold_answer": "[['107.0']]",
            "pred_answer": "near \"MONTH\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted query completely diverges from the intended logic: it brings in the vitalperiodic table (irrelevant to counting diagnoses), uses ICD9 codes instead of diagnosis names, compares observationtime rather than diagnosistime, and applies invalid \u2018+ 2 MONTH\u2019 syntax. These issues span multiple clauses and fundamentally alter the query structure and meaning, not just a local mistake in a single clause.",
            "error_type": "Global: logical structure error"
        },
        "765941e9ac0c6dd1539c97a8": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When did patient 030-12080 come to the hospital via chest pain center for the first time?",
            "gold_sql": "SELECT patient.hospitaladmittime FROM patient WHERE patient.uniquepid = '030-12080' AND patient.hospitaladmitsource = 'chest pain center' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "pred_sql": "SELECT patient.hospitaladmittime FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '030-12080'",
            "gold_answer": "[['2100-01-02 15:12:00']]",
            "pred_answer": "[['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00'], ['2100-01-02 15:12:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query structure by introducing an unnecessary JOIN to the vitalperiodic table, omitting the hospitaladmitsource filter ('chest pain center'), excluding the discharge-time check (hospitaldischargetime IS NOT NULL), and failing to order and limit for the first admission. These issues go beyond a simple typo or missing condition in one clause; they alter the overall logic and data relationships, producing repeated rows rather than the single first admission time.",
            "error_type": "Global: incorrect join strategy"
        },
        "73901b4a804f0cdbd67987e9": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What medication was patient 002-11177 prescribed two times this month?",
            "gold_sql": "SELECT T1.drugname FROM ( SELECT medication.drugname, COUNT( medication.drugstarttime ) AS C1 FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-11177' ) ) AND datetime(medication.drugstarttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') GROUP BY medication.drugname ) AS T1 WHERE T1.C1 = 2",
            "pred_sql": "SELECT medication.drugname FROM medication INNER JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '002-11177' AND medication.drugstarttime LIKE '2100-02-%' ORDER BY medication.drugstarttime DESC LIMIT 2",
            "gold_answer": "[['1000 ml  -  potassium chloride in nacl 20-0.9 meq/l-% iv soln'], ['500 ml  -  sodium chloride 0.9 % iv soln'], ['alprazolam 0.25 mg po tabs'], ['morphine inj'], ['nitroglycerin 0.4 mg sl subl']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic: it uses a simple LIKE filter on medication.drugstarttime for \u201c2100-02-%\u201d (wrong month), fails to restrict to the current month (December 2100), omits the GROUP BY/COUNT/HAVING to find drugs prescribed exactly twice, and arbitrarily LIMITs to 2 rows. Fixing this is not a local tweak but requires rebuilding the aggregation and proper time filtering. This is a structural/logic error.",
            "error_type": "Global: incorrect logic"
        },
        "44c293f67498109bcfc02beb": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the organism found in the first microbiology test done on the patient 031-10407's nasopharynx last month?",
            "gold_sql": "SELECT microlab.organism FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10407' ) ) AND microlab.culturesite = 'nasopharynx' AND datetime(microlab.culturetakentime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY microlab.culturetakentime ASC LIMIT 1",
            "pred_sql": "SELECT microlab.organism FROM patient INNER JOIN microlab ON patient.patientunitstayid = microlab.patientunitstayid WHERE patient.uniquepid = '031-10407' AND strftime('%Y-%m-%d',microlab.culturetakentime) LIKE '2015-11-18'",
            "gold_answer": "[['other']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails only in the WHERE clause: it hardcodes a specific date ('2015-11-18') instead of filtering for \u201clast month\u201d and also omits the culturesite = 'nasopharynx' condition. The table relationships and overall structure (JOIN patient-to-microlab, selecting microlab.organism) are correct. These mistakes are confined to the value conditions in the WHERE clause and do not require reworking the query\u2019s joins or logic flow. Therefore, this is a localized WHERE\u2010clause error.",
            "error_type": "Local: value parsing error"
        },
        "c6ba9aee9d82b7f7b53f02c0": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Get me patient 006-101341's total output volume on 01/07/2100.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-101341' ) ) AND intakeoutput.cellpath LIKE '%output%' AND strftime('%Y-%m-%d',intakeoutput.intakeoutputtime) = '2100-01-07'",
            "pred_sql": "SELECT intakeoutput.cellvalue * intakeoutput.cellvaluenumerical AS totalvol FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.uniquepid = '006-101341' AND intakeoutput.intakeoutputtime = '2100-01-07' AND intakeoutput.cellpath LIKE '%output%'",
            "gold_answer": "[['5415.0']]",
            "pred_answer": "no such column: intakeoutput.cellvalue",
            "is_correct": false,
            "reasoning": "The predicted SQL misreferences columns inside the SELECT clause\u2014there is no intakeoutput.cellvalue or cellvaluenumerical column (the correct column is cellvaluenumeric). This is a localized column reference error that doesn\u2019t require restructuring joins or overall logic.",
            "error_type": "Local: column reference error"
        },
        "620abef8e30470561bc7876b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the name of the procedure that was given two times to patient 004-10113 in 08/this year?",
            "gold_sql": "SELECT T1.treatmentname FROM ( SELECT treatment.treatmentname, COUNT( treatment.treatmenttime ) AS C1 FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-10113' ) ) AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',treatment.treatmenttime) = '08' GROUP BY treatment.treatmentname ) AS T1 WHERE T1.C1 = 2",
            "pred_sql": "SELECT DISTINCT diagnosis.diagnosisname FROM patient INNER JOIN diagnosis ON patient.patientunitstayid = diagnosis.patientunitstayid WHERE patient.uniquepid = '004-10113' AND strftime('%m', diagnosis.diagnosistime) = '08' AND diagnosis.diagnosisid IN (1116, 1117)",
            "gold_answer": "[['vascular catheter placement'], ['vasodilating agent - iv - nicardipine']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query completely uses the diagnosis table instead of the treatment table and filters on diagnosis IDs, whereas the intended query must count treatments per patient in a given month and then select those with exactly two occurrences. This is not a minor typo or misplaced condition but a fundamental misselection of the data source and logic. Fixing it would require rewriting the entire query structure, switching tables, joins, and grouping logic, so it is a global error.",
            "error_type": "Global: incorrect-table-and-logic"
        }
    },
    "macsql": {
        "f6ec755f6e8667eca464825c": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Get me patient 006-100471's total ivpb input amount.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-100471' ) ) AND intakeoutput.celllabel = 'ivpb' AND intakeoutput.cellpath LIKE '%intake%'",
            "pred_sql": "SELECT SUM(`cellvaluenumeric`) AS total_ivpb_input FROM intakeoutput WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '006-100471' ) AND `cellpath` LIKE '%ivpb%'",
            "gold_answer": "[['4865.0']]",
            "pred_answer": "[['700.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL restricts to a single patientunitstayid (from uniquepid) rather than summing across all unit stays within the same health system stay, and it omits the required celllabel = 'ivpb' and intake filter (cellpath LIKE '%intake%'). These omissions change the fundamental logic of the query and require restructuring the filtering and subquery logic, making this a global error.",
            "error_type": "Global: incorrect filtering and join logic"
        }
    },
    "deepseek": {
        "6d83bd4ff59e92870efdc23a": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What number of hours has passed since patient 015-100025's last procedure on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',treatment.treatmenttime) ) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '015-100025' AND patient.hospitaldischargetime IS NULL ) ) ORDER BY treatment.treatmenttime DESC LIMIT 1",
            "pred_sql": "SELECT ROUND( (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', ( SELECT MAX(treatmenttime) FROM treatment WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '015-100025' AND hospitaldischargetime IS NULL ) )) / 3600 ) AS hours_since_last_procedure ;",
            "gold_answer": "[['15.183']]",
            "pred_answer": "near \"AS\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a misplaced AS clause and mismatched parentheses around the ROUND function, leading to a syntax error near \"AS\". This is a localized syntax mistake in the SELECT expression and does not alter the overall logical structure of the query.",
            "error_type": "Local: syntax error"
        },
        "8b41023860c7ee88b1ce0979": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Tell me the minimum total hospital cost that includes hypertension - with renal failure since 1 year ago.",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT diagnosis.patientunitstayid FROM diagnosis WHERE diagnosis.diagnosisname = 'hypertension - with renal failure' ) ) AND datetime(cost.chargetime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "WITH relevant_patients AS ( SELECT DISTINCT p.patienthealthsystemstayid FROM patient p JOIN diagnosis d1 ON p.patientunitstayid = d1.patientunitstayid JOIN diagnosis d2 ON p.patientunitstayid = d2.patientunitstayid WHERE d1.diagnosisname = 'hypertension' AND d2.diagnosisname = 'renal failure' AND p.hospitaladmittime >= datetime('2100-12-31 23:59:00', '-1 year') ) SELECT MIN(total_cost) AS min_total_cost FROM ( SELECT p.patienthealthsystemstayid, SUM(DISTINCT c.cost) AS total_cost FROM patient p JOIN cost c ON p.patienthealthsystemstayid = c.patienthealthsystemstayid WHERE c.eventtype IN ('diagnosis', 'treatment', 'lab', 'medication') AND p.patienthealthsystemstayid IN (SELECT patienthealthsystemstayid FROM relevant_patients) GROUP BY p.patienthealthsystemstayid );",
            "gold_answer": "[['933.35']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic: it treats 'hypertension - with renal failure' as two separate diagnoses ('hypertension' AND 'renal failure') instead of one combined diagnosis name. It also filters on patient.hospitaladmittime rather than cost.chargetime, restricts to certain event types, and applies DISTINCT to costs, none of which align with the intended summation over all cost records since one year ago for the exact diagnosis. These changes affect multiple clauses and the overall meaning of the query.",
            "error_type": "Global: logical join and filter error"
        },
        "e77454c3e63ce2ee7641860d": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the duration in days of patient 010-10025's first stay in the icu?",
            "gold_sql": "SELECT strftime('%J',patient.unitdischargetime) - strftime('%J',patient.unitadmittime) FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '010-10025' ) AND patient.unitadmittime IS NOT NULL ORDER BY patient.unitadmittime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', unitdischargetime) - strftime('%s', unitadmittime)) / (86400) AS duration_days FROM patient WHERE uniquepid = '010-10025' ORDER BY unitadmittime ASC LIMIT 1;",
            "gold_answer": "[['4.454']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly identifies the patient\u2019s first ICU stay by ordering on unitadmittime and limiting to one record. The error lies solely in how the duration is calculated: using strftime('%s') and integer division by 86400 yields a truncated result of 4.0 days rather than the precise 4.454 days calculated by the ground truth\u2019s Julian day difference. This is a localized mistake in the temporal calculation, not a structural or logical join issue.",
            "error_type": "Local: other-temporal error"
        }
    },
    "llm2sql": {
        "2791f3d7ba24deb3fd9b5d0b": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "On the first hospital visit, when did patient 031-10095 receive the first nasopharynx microbiology test?",
            "gold_sql": "SELECT microlab.culturetakentime FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10095' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) AND microlab.culturesite = 'nasopharynx' ORDER BY microlab.culturetakentime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(T2.culturetakentime) AS first_microbiology_test FROM patient AS T1 INNER JOIN microlab AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.uniquepid = '031-10095' AND T1.hospitaldischargetime IS NOT NULL ORDER BY T1.hospitaladmittime ASC LIMIT 1 AND T2.culturesite = 'nasopharynx'",
            "gold_answer": "[['2100-02-09 15:50:00']]",
            "pred_answer": "no such column: T2.culturesite",
            "is_correct": false,
            "reasoning": "The predicted query misplaces the filter on T2.culturesite after the LIMIT clause, causing SQLite to interpret it as a column reference in the LIMIT/ORDER BY context. This is a localized syntax/column reference error that can be fixed by moving AND T2.culturesite = 'nasopharynx' into the WHERE clause and properly ordering the ORDER BY and LIMIT clauses. The overall query logic and join structure remain intact.",
            "error_type": "Local: table/column reference error"
        },
        "f551c3dec5e7817a5dee59e7": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "Among patients who are prescribed with lactated ringers injection 1,000 ml, what are the top three most frequently prescribed drugs at the same time?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'lactated ringers injection 1,000 ml' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE datetime(T1.drugstarttime) = datetime(T2.drugstarttime) GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT other_med.drugname, COUNT(*) AS prescription_count FROM medication AS lr_med JOIN medication AS other_med ON lr_med.patientunitstayid = other_med.patientunitstayid AND lr_med.drugstarttime <= other_med.drugstoptime AND lr_med.drugstoptime >= other_med.drugstarttime WHERE lr_med.drugname = 'lactated ringers injection 1,000 ml' AND other_med.drugname <> 'lactated ringers injection 1,000 ml' GROUP BY other_med.drugname ORDER BY prescription_count DESC LIMIT 3;",
            "gold_answer": "[['dilaudid'], ['fentanyl'], ['lactated ringers injection 1,000 ml']]",
            "pred_answer": "[['dilaudid', '20.0'], ['fentanyl', '43.0'], ['zofran', '26.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the join condition from matching identical start times (as required to find drugs prescribed at the exact same time) to an interval-overlap condition (lr_med.drugstarttime <= other_med.drugstoptime AND lr_med.drugstoptime >= other_med.drugstarttime). This alters the core logic of how co-prescribed drugs are identified, affecting the overall meaning and results of the query. The error spans the JOIN strategy and requires restructuring beyond a simple fix in a single clause.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "o4-mini": {
        "d202e3cc28a5d8b7e342ebe9": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What are the five most frequent procedures for patients who are in their 30s?",
            "gold_sql": "SELECT T1.treatmentname FROM ( SELECT treatment.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age BETWEEN 30 AND 39 ) GROUP BY treatment.treatmentname ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT t.treatmentname FROM treatment t JOIN patient p ON t.patientunitstayid = p.patientunitstayid WHERE CAST(p.age AS INTEGER) BETWEEN 30 AND 39 GROUP BY t.treatmentname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['antiemetic - serotonin antagonist'], ['chest x-ray'], ['cultures - blood'], ['mechanical ventilation'], ['normal saline administration'], ['stress ulcer prophylaxis - pantoprazole']]",
            "pred_answer": "[['antiemetic - serotonin antagonist'], ['cultures - blood'], ['mechanical ventilation'], ['normal saline administration'], ['stress ulcer prophylaxis - pantoprazole']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses a simple ORDER BY ... LIMIT 5 approach instead of the required DENSE_RANK() method to handle ties properly. This is a mistake in the ranking logic within the ORDER BY/LIMIT clause\u2014it does not involve join conditions, table references, or the overall structure of the query. It's a localized error in how the top N results are selected.",
            "error_type": "Local: ranking error"
        },
        "3ae1a558a87d4de503e658ec": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many patients were treated with antidepressant - amitriptyline two times since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, COUNT(*) AS C1 FROM patient WHERE patient.patientunitstayid = ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'antidepressant - amitriptyline' AND strftime('%Y',treatment.treatmenttime) >= '2100' ) GROUP BY patient.uniquepid ) AS T1 WHERE T1.C1 = 2",
            "pred_sql": "SELECT COUNT(*) FROM ( SELECT p.uniquepid FROM patient p JOIN treatment t ON p.patientunitstayid = t.patientunitstayid WHERE t.treatmentname = 'antidepressant - amitriptyline' AND t.treatmenttime >= '2100-01-01' GROUP BY p.uniquepid HAVING COUNT(*) >= 2 ) AS sub;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the HAVING clause: it uses COUNT(*) >= 2 to include patients treated two or more times, whereas the ground truth requires exactly two treatments (COUNT = 2). This is a localized mistake within the HAVING clause and does not affect the overall query structure or table relationships.",
            "error_type": "Local: having-clause error"
        }
    }
}