{
    "deepseek": {
        "7e8eb77494306257f1f762c2": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many hours have elapsed since the first time on the current hospital visit that patient 012-10585 was diagnosed with pneumonia?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) ) FROM diagnosis WHERE diagnosis.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-10585' AND patient.hospitaldischargetime IS NULL ) ) AND diagnosis.diagnosisname = 'pneumonia' ORDER BY diagnosis.diagnosistime ASC LIMIT 1",
            "pred_sql": "SELECT ROUND((strftime('%s', '2100-12-31 23:59:00') - strftime('%s', (SELECT MIN(d.diagnosistime) FROM diagnosis d JOIN patient p ON d.patientunitstayid = p.patientunitstayid WHERE p.uniquepid = '012-10585' AND d.diagnosisname = 'pneumonia' AND p.hospitaldischargetime IS NULL))) / 3600) AS hours_since_diagnosis;",
            "gold_answer": "[['97.367']]",
            "pred_answer": "[['97.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL follows the same overall logic\u2014finding the first pneumonia diagnosis for the current hospital stay of patient '012-10585' and computing the hours elapsed until the fixed current time. The only substantive difference is that it uses UNIX timestamps and ROUND to return an integer number of hours (97.0) rather than the fractional hours (97.367) produced by the ground\u2010truth\u2019s Julian day calculation. This is a localized mistake in how the time difference is formatted (rounding off the decimal part) rather than a flaw in the query\u2019s structure or joins.",
            "error_type": "Local: value rounding error"
        },
        "66093ba08255a2b508004b34": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Calculate the number of patients who were given sodium phosphate prescription within the same hospital visit following their exploratory laparotomy.",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'exploratory laparotomy' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugstarttime, patient.patienthealthsystemstayid FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'sodium phosphate' ) AS T2 WHERE T1.treatmenttime < T2.drugstarttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid",
            "pred_sql": "SELECT COUNT(DISTINCT p.patientunitstayid) AS num_patients FROM patient p INNER JOIN diagnosis d ON p.patienthealthsystemstayid = d.patienthealthsystemstayid INNER JOIN medication m ON p.patienthealthsystemstayid = m.patienthealthsystemstayid WHERE d.diagnosisname LIKE '%exploratory laparotomy%' AND m.drugname LIKE '%sodium phosphate%' AND m.drugstarttime > d.diagnosistime;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: d.patienthealthsystemstayid",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it refers to a non\u2010existent column d.patienthealthsystemstayid in the diagnosis table. This is a simple column reference error within the JOIN/WHERE clause and does not affect the overall query structure or logic beyond that misreference. Hence it is a local error.",
            "error_type": "Local: table/column reference error"
        },
        "52c3e4197877ebed571489ef": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What is the number of patients who were prescribed pneumococcal polysaccharides since 1 year ago?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid = ( SELECT medication.patientunitstayid FROM medication WHERE medication.drugname = 'pneumococcal polysaccharides' AND datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) FROM patient p JOIN medication m ON p.patientunitstayid = m.patientunitstayid WHERE m.drugname = 'pneumococcal polysaccharides' AND m.drugstarttime >= '2099-12-31 23:59:00';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground\u2010truth only in the WHERE clause\u2019s time condition: it hardcodes '2099-12-31 23:59:00' instead of using the required datetime expression datetime('2100-12-31 23:59:00','-1 year') as per the SQL assumptions. This is a localized mistake in the date literal/formatting and does not require reworking the JOINs or overall query structure.",
            "error_type": "Local: incorrect date literal"
        },
        "eda63f3824496fdde6aee946": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How long has it been in days since the last time 37.5 g tube : dextrose (diabetic use) 40 % po gel was prescribed to patient 016-10029 on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',medication.drugstarttime) ) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10029' ) AND patient.hospitaldischargetime IS NULL ) AND medication.drugname = '37.5 g tube : dextrose (diabetic use) 40 % po gel' ORDER BY medication.drugstarttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', T1.drugstarttime)) / 86400 AS days_since_last_prescription FROM patient INNER JOIN medication AS T1 ON patient.patientunitstayid = T1.patientunitstayid WHERE patient.uniquepid = '016-10029' AND patient.hospitaldischargetime IS NULL AND T1.drugname = '37.5 g tube : dextrose (diabetic use) 40 % po gel' ORDER BY T1.drugstarttime DESC LIMIT 1;",
            "gold_answer": "[['6.089']]",
            "pred_answer": "[['6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs only in how it computes the day difference: it uses UNIX-second timestamps divided by 86400, which truncates fractional days to an integer, rather than using Julian day difference (strftime('%J')) as in the ground truth. All other parts of the query (joins, filters, ordering, and limits) correctly target the same patient and drug on the current hospital visit. This is a localized mistake in the time-difference calculation and does not require restructuring the query\u2019s overall logic.",
            "error_type": "Local: other-temporal error"
        },
        "af68cfb7669d83903df2e7b1": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Tell me the total volume of input for patient 033-10044 on the current icu visit.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '033-10044' ) AND patient.unitdischargetime IS NULL ) AND intakeoutput.cellpath LIKE '%intake%'",
            "pred_sql": "SELECT SUM(DISTINCT io.cellvaluenumeric) AS total_input_volume FROM patient p JOIN intakeoutput io ON p.patientunitstayid = io.patientunitstayid WHERE p.uniquepid = '033-10044' AND p.unitdischargetime IS NULL AND io.cellpath LIKE '%intake%' AND io.cellpath NOT LIKE '%output%' ;",
            "gold_answer": "[['2170.0']]",
            "pred_answer": "[['1970.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates from the ground truth only in the SELECT clause by using SUM(DISTINCT io.cellvaluenumeric) instead of SUM(io.cellvaluenumeric). This change in aggregation is a localized mistake in the SELECT expression: it removes duplicate values before summation, leading to an incorrect total. The JOIN conditions, filtering for the current ICU stay, and the intake filter are all correct. Since the error is confined to the aggregate function in the SELECT clause and does not affect the overall query structure or table relationships, it is a local error.",
            "error_type": "Local: incorrect aggregation in SELECT"
        }
    },
    "o4-mini": {
        "2760d8fb8525ac6cc81c7d8e": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Tell me the lab test that was first given to patient 030-11183 since 06/2100?",
            "gold_sql": "SELECT lab.labname FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '030-11183' ) ) AND strftime('%Y-%m',lab.labresulttime) >= '2100-06' ORDER BY lab.labresulttime ASC LIMIT 1",
            "pred_sql": "WITH filtered_labs AS ( SELECT lab.labname, lab.labresulttime FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '030-11183' AND datetime(lab.labresulttime) >= datetime('2100-06-01','start of month') ), first_time AS ( SELECT MIN(labresulttime) AS ft FROM filtered_labs ) SELECT DISTINCT labname FROM filtered_labs WHERE labresulttime = (SELECT ft FROM first_time);",
            "gold_answer": "[['-lymphs']]",
            "pred_answer": "[['-bands'], ['-lymphs'], ['-monos'], ['-polys'], ['hct'], ['hgb'], ['mch'], ['mchc'], ['mcv'], ['platelets x 1000'], ['rbc'], ['rdw'], ['wbc x 1000']]",
            "is_correct": false,
            "reasoning": "The predicted query correctly filters lab records for the given patient since June 2100 and finds the earliest timestamp. However, instead of returning exactly one lab test (LIMIT 1), it returns all lab names recorded at that earliest time by using DISTINCT without a LIMIT. The mistake is localized to the final projection/selection logic; the overall query structure and joins are correct.",
            "error_type": "Local: projection error"
        },
        "aafda5799cef4cdbb1a8c0e0": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What's the count of current patients?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.hospitaldischargetime IS NULL",
            "pred_sql": "SELECT COUNT(*) AS current_patient_count FROM patient WHERE hospitaldischargetime IS NULL;",
            "gold_answer": "[['23.0']]",
            "pred_answer": "[['30.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only counts rows (COUNT(*)) instead of counting unique patients (COUNT(DISTINCT patient.uniquepid)), thereby overcounting patients with multiple stays. This is a mistake in the SELECT aggregation clause (missing DISTINCT), and it doesn\u2019t require restructuring joins or filters\u2014just adding DISTINCT. Hence, it\u2019s a localized error.",
            "error_type": "Local: missing-distinct aggregation error"
        }
    },
    "dinsql": {
        "814a25457ce2b7f59394f632": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "How many patients underwent anticoagulant administration - conventional heparin therapy within 2 months after the diagnosis with acute coronary syndrome - acute myocardial infarction (no st elevation), this year?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute coronary syndrome - acute myocardial infarction (no st elevation)' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'anticoagulant administration - conventional heparin therapy' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 WHERE T1.diagnosistime < T2.treatmenttime AND datetime(T2.treatmenttime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.patientunitstayid) FROM diagnosis AS T1 JOIN medication AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.diagnosisname = 'acute coronary syndrome - acute myocardial infarction (no st elevation)' AND T2.drugname = 'conventional heparin therapy' AND datetime(T2.drugstarttime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime, '+2 months') AND strftime('%Y', T1.diagnosistime) = '2100'",
            "gold_answer": "[['48.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the medication table instead of the treatment table and joins on patientunitstayid rather than counting unique patients (uniquepid). This is not a simple column or syntax mistake but a fundamental change to the query\u2019s logic: it selects from the wrong table, applies filters to drugname instead of treatmentname, and misaligns the intended event timing and patient identifier. Fixing this requires restructuring the core FROM/JOIN clauses and filters, so it\u2019s a global logic error.",
            "error_type": "Global: incorrect table join"
        }
    },
    "codes": {
        "9ebae192531eacc7f1be3ed9": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Find out the number of drugs given to patient 006-112408 last month.",
            "gold_sql": "SELECT COUNT(*) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-112408' ) ) AND datetime(medication.drugstarttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT count(DISTINCT medication.drugname) FROM patient INNER JOIN medication ON patient.patientunitstayid = medication.patientunitstayid WHERE patient.uniquepid = '006-112408' AND datetime(medication.drugstarttime) BETWEEN datetime('now','start of month', '-1 month') AND datetime('now','start of month', 'start of day', '-1 month')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only mis\u2010implements the counting and date condition but does not alter the overall query structure or table relationships. Specifically, it uses count(DISTINCT drugname) instead of COUNT(*) and uses datetime('now',\u2026) instead of the required fixed datetime('2100-12-31 23:59:00', \u2026) logic for last month. These are mistakes localized to the SELECT aggregation and the WHERE temporal filter, not structural join or multi\u2010clause logic errors.",
            "error_type": "Local: other-temporal error"
        },
        "562e56ddd403d8f9db6e6844": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Please list the four most common procedures carried out since 1 year ago.",
            "gold_sql": "SELECT T1.treatmentname FROM ( SELECT treatment.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM treatment WHERE datetime(treatment.treatmenttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY treatment.treatmentname ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT treatment.treatmentname FROM patient INNER JOIN treatment ON patient.patientunitstayid = treatment.patientunitstayid WHERE treatment.treatmenttime >= datetime('now', '1 year') GROUP BY treatment.treatmentname ORDER BY count(treatment.treatmentname) DESC LIMIT 4",
            "gold_answer": "[['antiemetic - serotonin antagonist'], ['chest x-ray'], ['mechanical ventilation'], ['stress ulcer prophylaxis - pantoprazole']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s structure is mostly correct (it selects treatment names, groups, orders by count, and limits to 4), but its WHERE clause incorrectly uses datetime('now','1 year') instead of the required fixed current time '2100-12-31 23:59:00' with '-1 year'. This violates the temporal assumption and causes no rows to be returned. This is a localized error in the time filter and does not require restructuring the joins or overall logic of the query.",
            "error_type": "Local: temporal error"
        },
        "427cf46f018ef0bce1e0d80a": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What drug was prescribed to patient 012-1134 within the same day subsequent to their narcotic overdose diagnosis on the last hospital visit?",
            "gold_sql": "SELECT T2.drugname FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-1134' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) AND diagnosis.diagnosisname = 'narcotic overdose' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '012-1134' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.drugstarttime AND datetime(T1.diagnosistime,'start of day') = datetime(T2.drugstarttime,'start of day')",
            "pred_sql": "SELECT medication.drugname FROM medication INNER JOIN cost ON medication.patientunitstayid = cost.patientunitstayid INNER JOIN patient ON patient.patientunitstayid = cost.patientunitstayid WHERE medication.patientunitstayid = '012-1134' AND cost.admittime = patient.dischargetime AND cost.diagnosis = 'Narcotic overdose'",
            "gold_answer": "[['0.4 ml syringe : enoxaparin sodium 40 mg/0.4ml sc soln'], ['10 ml vial : calcium gluconate 10 % iv soln'], ['50 ml syringe : dextrose 50 % iv soln'], ['aspirin 81 mg po chew']]",
            "pred_answer": "no such column: cost.admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the cost table instead of the diagnosis and medication tables that contain the necessary diagnosis and prescription times. It references non\u2010existent columns (cost.admittime, cost.diagnosis) and completely misaligns with the intended logic of finding prescriptions after a specific diagnosis on the last hospital visit. Fixing this would require restructuring the query to use diagnosis and medication tables, correct joins, and time comparisons, not just a minor correction in a single clause.",
            "error_type": "Global: incorrect table/join logic"
        },
        "ad04b8b2e4d28dd51a23c781": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Can you tell me about patient 016-10131's age on the first hospital visit?",
            "gold_sql": "SELECT patient.age FROM patient WHERE patient.uniquepid = '016-10131' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1",
            "pred_sql": "SELECT patient.age FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '016-10131' AND patient.hospitaldischargeoffset = 0",
            "gold_answer": "[['51.0']]",
            "pred_answer": "no such column: patient.hospitaldischargeoffset",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN on vitalperiodic which is unnecessary but not intrinsically breaking the query\u2019s logic, and more critically refers to a non-existent column hospitaldischargeoffset instead of hospitaldischargetime. This is a localized column reference error that can be fixed by correcting the column name without restructuring the query.",
            "error_type": "Local: column reference error"
        },
        "2ce97a7fc52bca5094ea71b8": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Gather the total output on last month/05 for patient 016-10197.",
            "gold_sql": "SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '016-10197' ) ) AND intakeoutput.cellpath LIKE '%output%' AND datetime(intakeoutput.intakeoutputtime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',intakeoutput.intakeoutputtime) = '05'",
            "pred_sql": "SELECT sum(intakeoutput.cellvaluenumeric) FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.uniquepid = '016-10197' AND intakeoutput.cellpath = 'output/urine/output.total' AND intakeoutput.cellvaluedatetime BETWEEN '2100-04-01 00:00:00' AND '2100-04-30 23:59:59'",
            "gold_answer": "[['30.0']]",
            "pred_answer": "no such column: intakeoutput.cellvaluedatetime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non\u2010existent column intakeoutput.cellvaluedatetime instead of the correct intakeoutput.intakeoutputtime. This is a simple column reference mistake localized in the WHERE clause and does not require restructuring the query\u2019s joins or logic.",
            "error_type": "Local: column reference error"
        },
        "f325dc3d2c7b44a43dc1532c": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What is the average yearly number of patient records marked with valvular stenosis - mitral since 1 year ago?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT COUNT( DISTINCT diagnosis.patientunitstayid ) AS C1 FROM diagnosis WHERE diagnosis.diagnosisname = 'valvular stenosis - mitral' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY strftime('%Y',diagnosis.diagnosistime) ) AS T1",
            "pred_sql": "SELECT avg(patient.patienthealthsystemstayid) FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE vitalperiodic.observationtime >= datetime('now','start of year', '-1 year') AND vitalperiodic.diastolicbp >= 60 AND vitalperiodic.systolicbp > 120",
            "gold_answer": "[['2.0']]",
            "pred_answer": "no such column: vitalperiodic.diastolicbp",
            "is_correct": false,
            "reasoning": "The predicted SQL uses non-existent columns vitalperiodic.diastolicbp and vitalperiodic.systolicbp, resulting in a column reference error. This mistake is confined to incorrect column names in the WHERE clause and does not alter the overall query structure or logic flow.",
            "error_type": "Local: column reference error"
        },
        "07dbbde6bb9c8b1e8da9444b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How much does the bicarbonate lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.eventtype = 'lab' AND cost.eventid IN ( SELECT lab.labid FROM lab WHERE lab.labname = 'bicarbonate' )",
            "pred_sql": "SELECT cost.cost FROM lab INNER JOIN cost ON lab.patientunitstayid = cost.patientunitstayid WHERE lab.labname = 'Bicarbonate'",
            "gold_answer": "[['8.13']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins lab and cost on patientunitstayid rather than linking cost.eventid to lab.labid and filtering for eventtype = 'lab'. This incorrect join changes the overall logic of the query and fails to restrict to the bicarbonate lab test cost. Fixing this requires altering the join keys and adding filters, which is a structural change, not just a simple typo or localized fix.",
            "error_type": "Global: join error"
        },
        "6f28bb73040fa4b58ef368b2": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Since 11 months ago, when was the last microbiology test conducted for patient 031-10778?",
            "gold_sql": "SELECT microlab.culturetakentime FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10778' ) ) AND datetime(microlab.culturetakentime) >= datetime('2100-12-31 23:59:00','-11 month') ORDER BY microlab.culturetakentime DESC LIMIT 1",
            "pred_sql": "SELECT strftime('%Y-%m', microlab.culturetakentime) FROM patient INNER JOIN microlab ON patient.patientunitstayid = microlab.patientunitstayid WHERE patient.uniquepid = '031-10778' ORDER BY microlab.culturetakentime DESC LIMIT 1",
            "gold_answer": "[['2100-09-15 16:29:00']]",
            "pred_answer": "[['2100-09']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only joins patient\u2192microlab and orders by culturetakentime, but it never applies the \u201csince 11 months ago\u201d filter in the WHERE clause and even formats the date incorrectly (using strftime('%Y-%m') instead of returning the full timestamp). Both issues are confined to the SELECT and WHERE clauses (i.e., they don\u2019t require rethinking the JOINs or overall structure), so they are localized mistakes that can be fixed by adding the datetime filter and correcting the projection.",
            "error_type": "Local: missing time filter and incorrect date formatting"
        },
        "471462b1ab28191d8570e787": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Give me the five most common output events since 2100.",
            "gold_sql": "SELECT T1.celllabel FROM ( SELECT intakeoutput.celllabel, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM intakeoutput WHERE intakeoutput.cellpath LIKE '%output%' AND strftime('%Y',intakeoutput.intakeoutputtime) >= '2100' GROUP BY intakeoutput.celllabel ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT intakeoutput.cellpath FROM intakeoutput INNER JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE patient.unitadmittime < '2100' AND patient.unitadmittime > '1900' AND intakeoutput.cellpath LIKE '%output%' GROUP BY intakeoutput.cellpath ORDER BY count(intakeoutput.cellpath) DESC LIMIT 5",
            "gold_answer": "[['indwelling catheter'], ['stool'], ['urinary catheter output: indwelling/continuous ure'], ['urine'], ['urine catheter']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic and structure: it joins the intakeoutput table to patient on unitadmittime, applies incorrect time filters on patient.unitadmittime instead of filtering intakeoutput.intakeoutputtime by year \u2265 '2100', and selects cellpath rather than celllabel. These errors span multiple clauses (JOIN, WHERE, SELECT) and change the intended meaning of the query, requiring a significant rewrite rather than a local fix.",
            "error_type": "Global: incorrect join and filter logic"
        }
    },
    "macsql": {
        "81269a4127553519e317b8e4": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "When was the last time that patient 006-106828 had per iv flush: wrist r 20 gauge input today?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-106828' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND intakeoutput.celllabel = 'per iv flush: wrist r 20 gauge' AND datetime(intakeoutput.intakeoutputtime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(`intakeoutputtime`) FROM intakeoutput WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '006-106828' ) AND `cellpath` LIKE '%intake%' AND `celllabel` = 'per iv flush: wrist r 20 gauge' AND datetime(`intakeoutputtime`, 'start of day') = datetime('2100-12-31 23:59:00', 'start of day', '-0 day')",
            "gold_answer": "[['2100-12-31 11:00:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL simplifies the relationship between the patient and intakeoutput tables by directly equating intakeoutput.patientunitstayid to a single patient.patientunitstayid returned by the subquery on uniquepid. The ground truth SQL correctly handles potential multiple stays by joining through patienthealthsystemstayid and using IN, ensuring all relevant unit stays are considered. This mistake alters the fundamental data relationship and logic (it may miss the correct stay that has today's intakeoutput record), so it cannot be fixed by a minor syntax tweak but requires restructuring the join logic.",
            "error_type": "Global: join relationship error"
        },
        "a47c31b12b805a632b215dea": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What was the diagnosis received by patient 027-102287 on the last hospital visit for the first time?",
            "gold_sql": "SELECT diagnosis.diagnosisname FROM diagnosis WHERE diagnosis.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-102287' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) ORDER BY diagnosis.diagnosistime ASC LIMIT 1",
            "pred_sql": "SELECT `diagnosisname` FROM diagnosis WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '027-102287' AND `hospitaldischargetime` IS NOT NULL ORDER BY `hospitaladmittime` DESC LIMIT 1 )",
            "gold_answer": "[['seizures - status epilepticus']]",
            "pred_answer": "[['encephalopathy - metabolic'], ['encephalopathy - toxic'], ['metabolic acidosis - lactic acidosis'], ['seizures - generalized tonic -clonic'], ['seizures - status epilepticus'], ['sinus tachycardia']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the intermediate step of identifying the hospital-level stay (patienthealthsystemstayid) and instead directly uses a single patientunitstayid, thereby failing to capture all relevant unit stays within the last hospital visit. This is not a simple typo or missing alias but a fundamental change to the way tables are related and the overall logic of the query. Fixing this requires restructuring the query to include the patienthealthsystemstayid subquery and then deriving all patientunitstayid values from it, which is beyond a localized clause adjustment.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "llm2sql": {
        "2185bdc78a815e3114fbcbce": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What are the top three drugs that are prescribed to patients within 2 months after they are prescribed clopidogrel bisulfate 75 mg po tabs in 2100?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'clopidogrel bisulfate 75 mg po tabs' AND strftime('%Y',medication.drugstarttime) = '2100' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE strftime('%Y',medication.drugstarttime) = '2100' ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.drugstarttime < T2.drugstarttime AND datetime(T2.drugstarttime) BETWEEN datetime(T1.drugstarttime) AND datetime(T1.drugstarttime,'+2 month') GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH clopidogrel_prescriptions AS ( SELECT DISTINCT patientunitstayid, drugstarttime FROM medication WHERE drugname = 'clopidogrel bisulfate 75 mg po tabs' AND strftime('%Y', drugstarttime) = '2100' ), subsequent_drugs AS ( SELECT m.drugname, m.patientunitstayid FROM medication m INNER JOIN clopidogrel_prescriptions c ON m.patientunitstayid = c.patientunitstayid WHERE m.drugstarttime > c.drugstarttime AND m.drugstarttime <= datetime(c.drugstarttime, '+2 months') AND m.drugname != 'clopidogrel bisulfate 75 mg po tabs' ) SELECT drugname FROM subsequent_drugs GROUP BY drugname ORDER BY COUNT(drugname) DESC LIMIT 3;",
            "gold_answer": "[['1000 ml flex cont : sodium chloride 0.9 % iv soln'], ['4 ml vial : furosemide 10 mg/ml ij soln'], ['potassium chloride crys er 20 meq po tbcr']]",
            "pred_answer": "[['4 ml vial : furosemide 10 mg/ml ij soln'], ['famotidine 20 mg po tabs'], ['vancomycin hcl 1000 mg iv solr']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses patientunitstayid instead of patient.uniquepid, causing it to count drug prescriptions per unit stay rather than per unique patient. This changes the intended logic of ranking drugs by the number of patients receiving them within two months after clopidogrel. The error affects the core aggregation and grouping logic of the query, requiring restructuring beyond a single clause.",
            "error_type": "Global: incorrect grouping/aggregation logic"
        }
    }
}