{
    "dinsql": {
        "3cada3b262b36ccaf92ce91b": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What was the first time that patient 010-10482 had the maximum value of -basos?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '010-10482' ) ) AND lab.labname = '-basos' ORDER BY lab.labresult DESC, lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT labresulttime FROM lab WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '010-10482' ) AND labname = '-basos' ORDER BY CAST(labresult AS REAL) DESC, labresulttime ASC LIMIT 1",
            "gold_answer": "[['2100-05-15 17:04:00']]",
            "pred_answer": "[['2100-05-18 07:39:00']]",
            "is_correct": false,
            "reasoning": "The ground truth SQL first finds the patienthealthsystemstayid for uniquepid '010-10482', then retrieves all related patientunitstayid values to include every ICU stay for that patient. The predicted SQL, however, directly equates patientunitstayid to a single value retrieved by uniquepid, thus ignoring additional stays and changing the dataset over which the max is computed. This is not a minor fix in a single clause but alters the overall table relationship logic.",
            "error_type": "Global: incorrect join relationship"
        },
        "488a44148ca1110d01cc6bf6": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Can i know the 3-month survival rate of patients diagnosed with acute copd exacerbation who were then prescribed methylprednisolone sod suc(pf)?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (patient.hospitaldischargestatus = 'expired') AND(strftime('%J',patient.hospitaldischargetime) - strftime('%J',T1.diagnosistime)) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'acute copd exacerbation' AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnosis.diagnosistime) >= 365 / 4 GROUP BY patient.uniquepid HAVING MIN(diagnosis.diagnosistime) = diagnosis.diagnosistime ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'methylprednisolone sod suc(pf)' GROUP BY patient.uniquepid HAVING MAX(medication.drugstarttime) = medication.drugstarttime ) AS T2 ON T1.uniquepid = T2.uniquepid JOIN patient ON T1.uniquepid = patient.uniquepid WHERE T1.diagnosistime < T2.drugstarttime GROUP BY patient.uniquepid HAVING MAX(patient.hospitaldischargetime) = patient.hospitaldischargetime )",
            "pred_sql": "WITH DiagnosisPatients AS ( SELECT DISTINCT patientunitstayid, diagnosistime FROM diagnosis WHERE diagnosisname = 'acute copd exacerbation' ), MedicationPatients AS ( SELECT DISTINCT patientunitstayid FROM medication WHERE drugname = 'methylprednisolone sod suc(pf)' ), SurvivalPatients AS ( SELECT DISTINCT T1.patientunitstayid FROM DiagnosisPatients AS T1 INNER JOIN MedicationPatients AS T2 ON T1.patientunitstayid = T2.patientunitstayid INNER JOIN patient AS T3 ON T1.patientunitstayid = T3.patientunitstayid WHERE datetime(T3.hospitaldischargetime) > datetime(T1.diagnosistime, '+91 days') ), TotalPatients AS ( SELECT DISTINCT T1.patientunitstayid FROM DiagnosisPatients AS T1 INNER JOIN MedicationPatients AS T2 ON T1.patientunitstayid = T2.patientunitstayid ) SELECT CAST(COUNT(SurvivalPatients.patientunitstayid) AS FLOAT) / COUNT(TotalPatients.patientunitstayid) AS survival_rate FROM TotalPatients LEFT JOIN SurvivalPatients ON TotalPatients.patientunitstayid = SurvivalPatients.patientunitstayid",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic of the intended query. It does not enforce that the prescription occurs after the first diagnosis (it merely joins on patientunitstayid), it does not use the first diagnosis event, it does not filter patients whose diagnosis occurred at least three months ago relative to the fixed current time, and it misdefines survival by comparing discharge time to diagnosis + 91 days rather than checking death within three months. These issues span multiple clauses and alter the overall structure and meaning of the query, requiring significant restructuring to correct.",
            "error_type": "Global: incorrect join and survival logic"
        },
        "e6422d952e5bd12cfbf2a976": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Can you tell me the minimum total hospital cost if it involves a albumin lab test since 2100?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT lab.patientunitstayid FROM lab WHERE lab.labname = 'albumin' ) ) AND strftime('%Y',cost.chargetime) >= '2100' GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) FROM ( SELECT SUM(DISTINCT C.cost) AS total_cost FROM cost AS C JOIN lab AS L ON C.patientunitstayid = L.patientunitstayid JOIN patient AS P ON C.patienthealthsystemstayid = P.patienthealthsystemstayid WHERE L.labname = 'albumin' AND datetime(L.labresulttime) >= datetime('2100-01-01 00:00:00') AND C.eventtype = 'lab' AND P.hospitaldischargetime IS NOT NULL GROUP BY C.patienthealthsystemstayid )",
            "gold_answer": "[['122.78']]",
            "pred_answer": "[['59.74']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the original: it joins cost to lab directly on patientunitstayid (instead of filtering by patienthealthsystemstayid), restricts costs to only lab event types, sums DISTINCT costs (dropping duplicate costs), filters on labresulttime rather than cost.chargetime, and imposes a hospitaldischargetime IS NOT NULL condition. These changes span multiple clauses and alter the intended meaning and scope of the calculation rather than fixing a localized mistake.",
            "error_type": "Global: flawed query logic"
        }
    },
    "codes": {
        "28eacb920c6c640e5f539aef": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Tell me the difference between the total input and output of patient 002-10122 today.",
            "gold_sql": "SELECT ( SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-10122' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') ) - ( SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '002-10122' ) ) AND intakeoutput.cellpath LIKE '%output%' AND datetime(intakeoutput.intakeoutputtime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-0 day') )",
            "pred_sql": "SELECT sum(CASE WHEN intakeoutput.cellpath LIKE '%input%' THEN intakeoutput.cellvaluenumeric ELSE 0 END) - sum(CASE WHEN intakeoutput.cellpath LIKE '%output%' THEN intakeoutput.cellvaluenumeric ELSE 0 END) AS difference FROM intakeoutput INNER JOIN patient ON intakeoutput.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '002-10122' AND intakeoutput.intakeoutputtime LIKE '2100-05-21%'",
            "gold_answer": "[['-725.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query\u2019s logic (subtracting summed inputs minus outputs) matches the intent, but its WHERE clause is wrong. It uses intakeoutput.intakeoutputtime LIKE '2100-05-21%' instead of the prescribed datetime comparison for today ('2100-12-31'), and even uses '%input%' instead of '%intake%'. These mistakes are confined to specific filter conditions and do not require restructuring the overall query, so this is a localized temporal/filtering error.",
            "error_type": "Local: other-temporal error"
        },
        "0506de2f8e7cd7c9aa03f59d": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was patient 004-10164's first minimum total bilirubin in 04/this year?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-10164' ) ) AND lab.labname = 'total bilirubin' AND datetime(lab.labresulttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',lab.labresulttime) = '04' ORDER BY lab.labresult ASC, lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT vitalaperiodic.observationtime FROM vitalaperiodic INNER JOIN patient ON vitalaperiodic.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '004-10164' AND strftime('%m', vitalaperiodic.observationtime) = '04' AND vitalaperiodic.vitalaperiodicid IN ( SELECT vitalaperiodic.vitalaperiodicid FROM vitalaperiodic INNER JOIN patient ON vitalaperiodic.patientunitstayid = patient.patientunitstayid WHERE patient.uniquepid = '004-10164' AND strftime('%m', vitalaperiodic.observationtime) = '04' GROUP BY vitalaperiodic.observationtime, vitalaperiodic.systemicmean ORDER BY sum(vitalaperiodic.systemicmean) ASC LIMIT 1 )",
            "gold_answer": "[['2100-04-15 22:50:00']]",
            "pred_answer": "no such table: vitalaperiodic",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent table \"vitalaperiodic\" instead of the correct \"vitalperiodic\". This is a simple table name typo/local reference mistake. It can be fixed by correcting the table name without altering the overall query structure or logic flow. Thus, this is a localized error.",
            "error_type": "Local: table reference error"
        },
        "25b215ec07b541e4460d1bd8": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Calculate the difference between the total input and output volume for patient 005-11459 on the last icu visit.",
            "gold_sql": "SELECT ( SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '005-11459' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND intakeoutput.cellpath LIKE '%intake%' ) - ( SELECT SUM(intakeoutput.cellvaluenumeric) FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '005-11459' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND intakeoutput.cellpath LIKE '%output%' )",
            "pred_sql": "SELECT sum(intakeoutput.cellvaluenumeric) - sum(intakeoutput.cellvaluenumeric) FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.uniquepid = '005-11459'",
            "gold_answer": "[['675.1']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely abandons the intended logic of the query by: 1) joining patient to intakeoutput without restricting to the last ICU stay (no filter on unitdischargetime IS NOT NULL or ORDER BY patient.unitadmittime DESC LIMIT 1), 2) failing to distinguish intake vs output via cellpath LIKE filters, and 3) subtracting the same SUM(intakeoutput.cellvaluenumeric) from itself. These omissions are not localized syntax mistakes but rather fundamental logical flaws in how the query is structured and how tables are combined to achieve the goal. Fixing this would require restructuring the query to retrieve the correct stay and apply correct filters, not just tweaking a clause.",
            "error_type": "Global: incorrect query logic"
        },
        "afe9c889301128590f41e44b": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "When was the first time that patient 007-10026 had his/her respiratory rate checked?",
            "gold_sql": "SELECT vitalperiodic.observationtime FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '007-10026' ) ) AND vitalperiodic.respiration IS NOT NULL ORDER BY vitalperiodic.observationtime ASC LIMIT 1",
            "pred_sql": "SELECT observationtime FROM vitalperiodic WHERE patientunitstayid = 11026 AND respiration IS NOT NULL ORDER BY observationtime ASC LIMIT 1",
            "gold_answer": "[['2100-04-20 03:04:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL makes an incorrect assumption about the patientunitstayid by hard-coding = 11026 instead of deriving it via the patient.uniquepid = '007-10026'. This is a localized error in the WHERE clause filter, not a fundamental join or structural issue. It can be fixed by replacing the literal with the correct subquery or lookup, without altering the overall query structure.",
            "error_type": "Local: value parsing error"
        }
    },
    "llm2sql": {
        "62dc915b80b2af835e9f281f": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "Since 1 year ago, what are the top four most common specimens tested on patients within 2 months after the diagnosis of peritonitis - spontaneous bacterial?",
            "gold_sql": "SELECT T3.culturesite FROM ( SELECT T2.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'peritonitis - spontaneous bacterial' AND datetime(diagnosis.diagnosistime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT patient.uniquepid, microlab.culturesite, microlab.culturetakentime FROM microlab JOIN patient ON microlab.patientunitstayid = patient.patientunitstayid WHERE datetime(microlab.culturetakentime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.culturetakentime AND datetime(T2.culturetakentime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month') GROUP BY T2.culturesite ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT T2.culturesite, COUNT(*) as NumTests FROM diagnosis AS T1 JOIN microlab AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.diagnosisname = 'peritonitis - spontaneous bacterial' AND datetime(T1.diagnosistime) >= datetime('2100-12-31 23:59:00', '-1 year') AND datetime(T2.culturetakentime) <= datetime(T1.diagnosistime, '+2 months') GROUP BY T2.culturesite ORDER BY NumTests DESC LIMIT 4;",
            "gold_answer": "[['nasopharynx']]",
            "pred_answer": "[['blood, venipuncture', '12.0'], ['nasopharynx', '6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies an upper bound on microlab.culturetakentime (<= diagnosis time + 2 months) but omits the lower bound (>= diagnosis time). This mistake is confined to the WHERE clause filters and does not require restructuring joins or the overall query logic. It is a localized error in the condition logic.",
            "error_type": "Local: incorrect WHERE clause filter"
        },
        "ab74a84f2dac454c1858fb2c": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "When did patient 031-10510's first microbiological test occur on the first hospital visit?",
            "gold_sql": "SELECT microlab.culturetakentime FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10510' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) ORDER BY microlab.culturetakentime ASC LIMIT 1",
            "pred_sql": "SELECT T2.culturetakentime FROM patient AS T1 INNER JOIN microlab AS T2 ON T1.patientunitstayid = T2.patientunitstayid WHERE T1.uniquepid = '031-10510' ORDER BY T1.hospitaladmittime ASC LIMIT 1;",
            "gold_answer": "[['2100-05-20 17:15:00']]",
            "pred_answer": "[['2100-05-30 22:27:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins patient and microlab directly and orders only by the patient admission time, without filtering for the first completed hospital visit (hospitaldischargetime IS NOT NULL) or ordering by the test time within that visit. This changes the overall logic of the query\u2014it does not properly identify the first hospital encounter nor the earliest microbiological test within that encounter. Fixing this would require restructuring the query (adding subqueries or additional filters and ordering by microlab.culturetakentime), not just tweaking a single clause.",
            "error_type": "Global: flawed join and ordering logic"
        }
    },
    "deepseek": {
        "6d7e48a3d6e297467b1e7db0": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What was the drug that was prescribed to patient 007-10111 within 2 days after the ercp on the first hospital visit?",
            "gold_sql": "SELECT T2.drugname FROM ( SELECT patient.uniquepid, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '007-10111' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) AND treatment.treatmentname = 'ercp' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '007-10111' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.drugstarttime AND datetime(T2.drugstarttime) BETWEEN datetime(T1.treatmenttime) AND datetime(T1.treatmenttime,'+2 day')",
            "pred_sql": "SELECT M.drugname FROM patient P JOIN treatment T ON P.patientunitstayid = T.patientunitstayid JOIN medication M ON P.patientunitstayid = M.patientunitstayid WHERE P.uniquepid = '007-10111' AND T.treatmentname LIKE '%erpc%' AND M.drugstarttime >= T.treatmenttime AND M.drugstarttime <= datetime(T.treatmenttime, '+2 days') AND P.hospitaladmittime = ( SELECT MIN(hospitaladmittime) FROM patient WHERE uniquepid = '007-10111' );",
            "gold_answer": "[['albuterol'], ['albuterol'], ['aspart insulin'], ['colace'], ['colace'], ['dextrose 50%'], ['dextrose 50%'], ['dextrose 50%'], ['dextrose 50%'], ['dilaudid'], ['dilaudid'], ['dilaudid'], ['dilaudid'], ['dulcolax laxative'], ['fentanyl'], ['fentanyl'], ['glucagen'], ['glucagen'], ['lasix'], ['lasix'], ['lidocaine'], ['lidocaine'], ['milk of magnesia'], ['oxycodone'], ['oxycodone'], ['oxycodone'], ['oxycodone'], ['oxycodone'], ['phenylephrine'], ['phenylephrine'], ['piperacillin-tazobactam'], ['piperacillin-tazobactam'], ['piperacillin-tazobactam'], ['piperacillin-tazobactam'], ['piperacillin-tazobactam'], ['piperacillin-tazobactam'], ['propofol'], ['propofol'], ['rocuronium'], ['rocuronium'], ['succinylcholine'], ['succinylcholine'], ['tylenol'], ['vancomycin'], ['vancomycin'], ['vancomycin'], ['vancomycin'], ['vancomycin'], ['zofran']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally alters the intended logic in several places rather than just fixing a minor typo.  First, it joins treatment and medication on patientunitstayid instead of using the hospital\u2010level patienthealthsystemstayid, changing the scope from a hospital visit to an ICU stay.  Second, it filters the earliest visit by comparing hospitaladmittime only, omitting the hospitaldischargetime IS NOT NULL condition, which breaks the \u201cfirst completed hospital visit\u201d requirement.  Third, it uses a LIKE '%erpc%' filter (misspelling the treatment name) and even misuses the SQLite datetime modifier '+2 days' instead of '+2 day'.  All of these issues require restructuring the joins and filtering logic, so the error is global.",
            "error_type": "Global: join-relationship error"
        },
        "043d8bebfcf620d4a94036f0": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Since patient 022-1011's admission in the icu, how many hours have elapsed?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',patient.unitadmittime) ) FROM patient WHERE patient.uniquepid = '022-1011' AND patient.unitdischargetime IS NULL",
            "pred_sql": "SELECT ROUND( ( strftime('%s', '2100-12-31 23:59:00') - strftime('%s', patient.unitadmittime) ) / 3600 ) AS hours_since_icu_admission FROM patient WHERE uniquepid = '022-1011';",
            "gold_answer": "[['787.333']]",
            "pred_answer": "[['787.0']]",
            "is_correct": false,
            "reasoning": "The predicted query omits the critical WHERE condition \"patient.unitdischargetime IS NULL\" that restricts to the current ICU admission. This missing filter changes the overall logic of the query (it could include past admissions or discharged records), and thus it is not a simple syntax or localized error but a global logic error.",
            "error_type": "Global: missing-current-admission-filter"
        },
        "f4edb21f87ce00914040c235": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many hours have passed since the first time patient 004-11678 underwent a procedure on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J',treatment.treatmenttime) ) FROM treatment WHERE treatment.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '004-11678' AND patient.hospitaldischargetime IS NULL ) ) ORDER BY treatment.treatmenttime ASC LIMIT 1",
            "pred_sql": "SELECT ROUND((strftime('%s', '2100-12-31 23:59:00') - strftime('%s', ( SELECT MIN(treatmenttime) FROM treatment WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '004-11678' AND hospitaldischargetime IS NULL ) )) / 3600) ) AS hours_since_first_procedure;",
            "gold_answer": "[['150.55']]",
            "pred_answer": "[['4132832563.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is making a mistake solely in the arithmetic expression inside the SELECT clause \u2013 it divides only the second timestamp by 3600 instead of dividing the overall difference, and misplaces the parentheses. The table references, WHERE filtering and subqueries are otherwise correct for the current hospital visit. This is a localized error in the computation of hours, not a flaw in the overall query structure or joins. Therefore, it is a Local error.",
            "error_type": "Local: temporal arithmetic error"
        }
    },
    "qwen3": {
        "7c73804551fbdd9ef9889a18": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "How many people were given oxygen therapy (> 60%) - >90% two or more times in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.uniquepid ) FROM ( SELECT patient.uniquepid, COUNT(*) AS C1 FROM patient WHERE patient.patientunitstayid = ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'oxygen therapy (> 60%) - >90%' AND strftime('%Y',treatment.treatmenttime) = '2100' ) GROUP BY patient.uniquepid ) AS T1 WHERE T1.C1 >= 2",
            "pred_sql": "SELECT COUNT(*) AS count FROM ( SELECT p.uniquepid FROM vitalperiodic v JOIN patient p ON v.patientunitstayid = p.patientunitstayid WHERE v.sao2 > 90 AND strftime('%Y', v.observationtime) = '2100' GROUP BY p.uniquepid HAVING COUNT(*) >= 2 ) AS subquery;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1327.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL entirely changes the logic by using the vitalperiodic table and checking soh2 > 90 instead of looking for the specific treatment 'oxygen therapy (> 60%) - >90%' in the treatment table. This is not a minor fix in a single clause but a fundamental change in which table and condition define the event, thus altering the intended meaning of the query. It requires restructuring the query\u2019s core logic, so this is a global error.",
            "error_type": "Global: incorrect table and logic"
        },
        "502d5227ec779976f20ab9a0": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Could you fetch the top five most common specimens tested in 2100 for me?",
            "gold_sql": "SELECT T1.culturesite FROM ( SELECT microlab.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM microlab WHERE strftime('%Y',microlab.culturetakentime) = '2100' GROUP BY microlab.culturesite ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT labname FROM lab WHERE strftime('%Y', labresulttime) = '2100' GROUP BY labname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['blood, venipuncture'], ['nasopharynx'], ['sputum, tracheal specimen'], ['urine, catheter specimen'], ['urine, voided specimen']]",
            "pred_answer": "[['bedside glucose'], ['chloride'], ['glucose'], ['potassium'], ['sodium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the lab table and labname column, whereas the question and ground truth refer to specimen sites in the microlab table (culturesite). This is not a minor syntax or alias issue but a fundamental misuse of the wrong table and column, which changes the entire meaning of the query.",
            "error_type": "Global: wrong table used"
        },
        "bd60c97129317d3f97c71815": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Can you list the top five most frequent procedures that patients received within the same hospital visit after having systemic glucocorticoid in 2100?",
            "gold_sql": "SELECT T3.treatmentname FROM ( SELECT T2.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'systemic glucocorticoid' AND strftime('%Y',treatment.treatmenttime) = '2100' ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmentname, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE strftime('%Y',treatment.treatmenttime) = '2100' ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.treatmenttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.treatmentname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT t.treatmentname FROM medication m JOIN patient p1 ON m.patientunitstayid = p1.patientunitstayid JOIN patient p2 ON p1.patienthealthsystemstayid = p2.patienthealthsystemstayid JOIN treatment t ON t.patientunitstayid = p2.patientunitstayid WHERE m.drugname LIKE '%glucocorticoid%' AND strftime('%Y', m.drugstarttime) = '2100' AND t.treatmenttime > m.drugstarttime GROUP BY t.treatmentname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['anticonvulsant - gabapentin'], ['antihyperlipidemic agent - hmg-coa reductase inhibitor'], ['chemotherapy'], ['cultures - sputum'], ['intravenous diuretic - iv furosemide'], ['oxygen therapy (> 60%) - non-rebreather mask'], ['stress ulcer prophylaxis - pantoprazole'], ['systemic glucocorticoid'], ['therapeutic antibacterials - penicillins']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query filters on medication.drugname and joins medication to two patient aliases before finally pulling treatments. The original question asked about \u2018systemic glucocorticoid\u2019 in the treatment table. The predicted SQL\u2019s error is entirely in the FROM/JOIN clause (it uses the wrong table for the initial glucocorticoid event and incorrect patient joins), but the overall structure (grouping, ordering, limit) remains intact. This is a localized table reference/join mistake, not a broader logical restructuring error.",
            "error_type": "Local: incorrect-table/join error"
        },
        "818d2b11247df11000ce8ce0": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What are the top three most frequent specimens ordered for patients after being diagnosed with bacteremia within the same hospital visit this year?",
            "gold_sql": "SELECT T3.culturesite FROM ( SELECT T2.culturesite, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime, patient.patienthealthsystemstayid FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'bacteremia' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, microlab.culturesite, microlab.culturetakentime, patient.patienthealthsystemstayid FROM microlab JOIN patient ON microlab.patientunitstayid = patient.patientunitstayid WHERE datetime(microlab.culturetakentime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.culturetakentime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.culturesite ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH BacteremiaDiagnoses AS ( SELECT patientunitstayid, MIN(diagnosistime) AS first_diagnosistime FROM diagnosis WHERE diagnosisname = 'bacteremia' AND strftime('%Y', diagnosistime) = '2100' GROUP BY patientunitstayid ) SELECT ml.culturesite FROM microlab ml INNER JOIN BacteremiaDiagnoses bd ON ml.patientunitstayid = bd.patientunitstayid AND ml.culturetakentime > bd.first_diagnosistime GROUP BY ml.culturesite ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['blood, venipuncture'], ['csf'], ['sputum, expectorated']]",
            "pred_answer": "[['blood, venipuncture'], ['nasopharynx'], ['sputum, expectorated']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters diagnoses to this year but does not restrict microlab records to this year, nor does it enforce the \u2018same hospital visit\u2019 requirement via patient.patienthealthsystemstayid. Instead it joins on patientunitstayid, which ties to individual ICU/unit stays rather than the overall hospital encounter. This incorrect join key and missing filter change the core logic of the query, leading to different top specimens.",
            "error_type": "Global: incorrect join/key relationship"
        }
    },
    "o4-mini": {
        "f605d9fb2fedb46a1785bdd7": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "What is the average total hospital cost that includes a procedure named folate in 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.patientunitstayid IN ( SELECT treatment.patientunitstayid FROM treatment WHERE treatment.treatmentname = 'folate' ) ) AND strftime('%Y',cost.chargetime) = '2100' GROUP BY cost.patienthealthsystemstayid ) AS T1",
            "pred_sql": "WITH folate_stays AS ( SELECT DISTINCT p.patienthealthsystemstayid FROM lab l JOIN patient p ON l.patientunitstayid = p.patientunitstayid WHERE l.labname LIKE '%folate%' AND strftime('%Y', l.labresulttime) = '2100' ), stay_costs AS ( SELECT c.patienthealthsystemstayid, SUM(c.cost) AS total_cost FROM cost c WHERE c.patienthealthsystemstayid IN (SELECT patienthealthsystemstayid FROM folate_stays) GROUP BY c.patienthealthsystemstayid ) SELECT AVG(total_cost) AS average_total_hospital_cost FROM stay_costs;",
            "gold_answer": "[['7410.78']]",
            "pred_answer": "[['4516.97']]",
            "is_correct": false,
            "reasoning": "The predicted query identifies stays by looking for lab records with labname LIKE '%folate%' in the lab table, whereas the question refers to a procedure named folate, which is stored in the treatment table. This error affects the overall logic of how relevant patient stays are selected and requires restructuring the query to use the treatment table instead of lab. Thus it is not a localized syntax mistake but a fundamental logical error in table usage and joins.",
            "error_type": "Global: join/table selection error"
        },
        "cb3541829199c9ac184de2b0": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "Find out how many patients were admitted to the hospital.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient",
            "pred_sql": "SELECT COUNT(DISTINCT patienthealthsystemstayid) AS num_hospital_admissions FROM patient WHERE hospitaladmittime IS NOT NULL;",
            "gold_answer": "[['1384.0']]",
            "pred_answer": "[['1542.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts distinct patienthealthsystemstayid (hospital admissions) rather than distinct uniquepid (patients), fundamentally altering the metric and misinterpreting the question. This is not a simple syntax or reference fix but a change in the core logic/aggregation of the query.",
            "error_type": "Global: incorrect aggregation column"
        }
    },
    "macsql": {
        "8c1b4dadcbd7b9dfc08e81bb": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "When did patient 006-102786 have an input for the first time on 12/27/this year?",
            "gold_sql": "SELECT intakeoutput.intakeoutputtime FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '006-102786' ) ) AND intakeoutput.cellpath LIKE '%intake%' AND datetime(intakeoutput.intakeoutputtime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m-%d',intakeoutput.intakeoutputtime) = '12-27' ORDER BY intakeoutput.intakeoutputtime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(`intakeoutputtime`) AS first_intake_time FROM intakeoutput WHERE `patientunitstayid` = ( SELECT `patientunitstayid` FROM patient WHERE `uniquepid` = '006-102786' ) AND datetime(`intakeoutputtime`, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') AND strftime('%m-%d', `intakeoutputtime`) = '12-27' AND `cellpath` LIKE '%intake%'",
            "gold_answer": "[['2100-12-27 00:00:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only filters on a single patientunitstayid returned by \u2018SELECT patientunitstayid FROM patient WHERE uniquepid = \u2026\u2019, whereas the correct query needs to consider all unit stays associated with that patienthealthsystemstayid. This is not a minor syntax mistake but a faulty join/relationship logic that changes which rows are examined, causing the query to return no results. Hence, it\u2019s a global error in the join/relationship.",
            "error_type": "Global: join relationship error"
        },
        "254f0f752d2b8c107c1f3727": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "What was the first time that patient 027-101846 had the minimum amount of o2 sat (%) on the first hospital visit?",
            "gold_sql": "SELECT lab.labresulttime FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '027-101846' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime ASC LIMIT 1 ) ) AND lab.labname = 'o2 sat (%)' ORDER BY lab.labresult ASC, lab.labresulttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(observationtime) AS first_min_sao2_time FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '027-101846' AND hospitaladmittime = ( SELECT MIN(hospitaladmittime) FROM patient WHERE uniquepid = '027-101846' ) ) AND sao2 = ( SELECT MIN(sao2) FROM vitalperiodic WHERE patientunitstayid = ( SELECT patientunitstayid FROM patient WHERE uniquepid = '027-101846' AND hospitaladmittime = ( SELECT MIN(hospitaladmittime) FROM patient WHERE uniquepid = '027-101846' ) ) )",
            "gold_answer": "[['2100-09-08 16:25:00']]",
            "pred_answer": "[['2100-09-10 19:26:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the vitalperiodic table and its sao2 and observationtime columns, whereas the question and ground truth refer explicitly to the lab table with labname = 'o2 sat (%)' and labresulttime. This is not a minor syntax or column name mistake but a fundamental change in data source and logic, altering the intended meaning of the query.",
            "error_type": "Global: wrong table/field selection"
        }
    }
}