{
    "codes": {
        "0ebd60232cb32fabbf55b709": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many prescriptions for 10 ml vial : calcium chloride 10 % iv soln were made?",
            "gold_sql": "SELECT COUNT(*) FROM medication WHERE medication.drugname = '10 ml vial : calcium chloride 10 % iv soln'",
            "pred_sql": "SELECT sum(CASE WHEN medication.drugname = 'Calcium Chloride 10 % IV Soln' THEN 1 ELSE 0 END) FROM medication INNER JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname LIKE 'Calcium Chloride 10 % IV Soln'",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly filters on the drug name 'Calcium Chloride 10 % IV Soln' using LIKE, instead of the exact value '10 ml vial : calcium chloride 10 % iv soln'. This mismatch in the literal value causes no rows to match, yielding NULL. The error is confined to the WHERE clause\u2019s filter value and does not alter the overall query structure or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "d339235d5a63ad8b045e5ffc": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Can you retrieve the time of patient 031-10198's hospital admission since 1 year ago?",
            "gold_sql": "SELECT patient.hospitaladmittime FROM patient WHERE patient.uniquepid = '031-10198' AND datetime(patient.hospitaladmittime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT patient.hospitaladmittime FROM patient INNER JOIN vitalperiodic ON patient.patientunitstayid = vitalperiodic.patientunitstayid WHERE patient.uniquepid = '031-10198' AND datetime(vitalperiodic.observationtime) >= datetime('2100-12-31', '1 year')",
            "gold_answer": "[['2100-08-12 23:40:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to vitalperiodic and filters on vitalperiodic.observationtime instead of patient.hospitaladmittime, and also misuses the datetime offset (uses '1 year' instead of '-1 year'). These mistakes alter the fundamental logic and structure of the query, joining an unrelated table and applying the wrong filter column, so the error cannot be fixed by a small tweak in a single clause.",
            "error_type": "Global: incorrect join and filter logic"
        },
        "f85c9633e450cf4188a98ce3": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Get me the top three most common diagnoses.",
            "gold_sql": "SELECT T1.diagnosisname FROM ( SELECT diagnosis.diagnosisname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnosis GROUP BY diagnosis.diagnosisname ) AS T1 WHERE T1.C1 <= 3",
            "pred_sql": "SELECT diagnosis.diagnosisname FROM diagnosis INNER JOIN ( SELECT patientunitstayid, count(diagnosisname) AS num FROM diagnosis GROUP BY patientunitstayid ORDER BY num DESC LIMIT 3 ) AS t3 ON diagnosis.patientunitstayid = t3.patientunitstayid",
            "gold_answer": "[['acute renal failure'], ['acute respiratory failure'], ['pneumonia']]",
            "pred_answer": "[['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['abdominal pain / tenderness'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute pyelonephritis'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - acute tubular necrosis'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to hypovolemia/decreased circulating volume'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis'], ['acute renal failure - due to sepsis']]",
            "is_correct": false,
            "reasoning": "The predicted SQL finds the top three patients by number of diagnoses (GROUP BY patientunitstayid) and then joins back to list that patients\u2019 diagnoses, rather than finding the top three diagnoses by frequency (GROUP BY diagnosisname). This mistake spans the aggregation, join, and overall query logic, requiring restructuring the query to group by diagnosis and apply ranking. It is not a simple typo or localized clause fix.",
            "error_type": "Global: join logic error"
        },
        "920217f577771de7e12ed857": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was the name of the organism found in patient 031-10329's last urine, catheter specimen microbiology test?",
            "gold_sql": "SELECT microlab.organism FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10329' ) ) AND microlab.culturesite = 'urine, catheter specimen' ORDER BY microlab.culturetakentime DESC LIMIT 1",
            "pred_sql": "SELECT organism FROM microlab WHERE patientunitstayid = 3110329 AND culturesite = 'urine, catheter specimen' AND culturetakentime = ( SELECT culturetakentime FROM microlab WHERE patientunitstayid = 3110329 AND culturesite = 'urine, catheter specimen' ORDER BY culturetakentime DESC LIMIT 1 )",
            "gold_answer": "[['klebsiella pneumoniae']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL hardcodes patientunitstayid = 3110329 instead of looking up the correct patientunitstayid via the patient.uniquepid field. This is a column/reference mapping error in the WHERE clause and does not require reworking the overall query structure\u2014just correcting the patient identifier join. Therefore, it\u2019s a local error.",
            "error_type": "Local: column reference error"
        },
        "3369b594f521d600670ecb74": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Compute the number of times patient 018-101040 was prescribed naloxone hcl since 1 year ago.",
            "gold_sql": "SELECT COUNT(*) FROM medication WHERE medication.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '018-101040' ) ) AND medication.drugname = 'naloxone hcl' AND datetime(medication.drugstarttime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT count(DISTINCT medication.patientunitstayid) FROM medication INNER JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'naloxone hcl' AND strftime('%Y', medication.drugstarttime) < strftime('%Y', 'now', '-1 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL not only fails to filter on the specific patient (missing any condition on patient.uniquepid), it also uses COUNT(DISTINCT patientunitstayid) instead of counting events, applies the wrong date comparison with strftime('%Y') against 'now' (instead of datetime('2100-12-31 23:59:00','-1 year')), and even inverts the date logic (< instead of \u2265). These mistakes span multiple clauses (JOIN, WHERE, aggregation) and fundamentally alter the query\u2019s intent, so it\u2019s a global error.",
            "error_type": "Global: incorrect filter and aggregation logic"
        },
        "1538db043c5c441dc0f2d160": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What was patient 031-10302's first tested specimen in 04/this year?",
            "gold_sql": "SELECT microlab.culturesite FROM microlab WHERE microlab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10302' ) ) AND datetime(microlab.culturetakentime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',microlab.culturetakentime) = '04' ORDER BY microlab.culturetakentime ASC LIMIT 1",
            "pred_sql": "SELECT lab.labname FROM lab INNER JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE strftime('%Y %m', lab.labresulttime) = '2100 04' AND patient.uniquepid = '031-10302' LIMIT 1",
            "gold_answer": "[['blood, venipuncture']]",
            "pred_answer": "[['anion gap']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls from the lab table and selects lab.labname, whereas the question asks for the first tested specimen (microlab.culturesite). This is not a minor column or alias fix but a fundamental choice of the wrong table and field, altering the entire query logic. Therefore it is a Global error.",
            "error_type": "Global: wrong table used"
        },
        "f96e1a80f60671c7b7c3d8a3": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "How many prescriptions were prescribed in 2100 for dextrose 50% vial?",
            "gold_sql": "SELECT COUNT(*) FROM medication WHERE medication.drugname = 'dextrose 50% vial' AND strftime('%Y',medication.drugstarttime) = '2100'",
            "pred_sql": "SELECT count(DISTINCT medication.medicationid) FROM medication INNER JOIN allergy ON medication.patientunitstayid = allergy.patientunitstayid INNER JOIN vitalperiodic ON medication.patientunitstayid = vitalperiodic.patientunitstayid WHERE allergy.drugname = 'dextrose 50% vial' AND medication.drugname = 'dextrose 50% vial'",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces INNER JOINs to the allergy and vitalperiodic tables, filtering by allergy.drugname, which is unrelated to the question about counting prescriptions. This fundamentally alters query logic and table relationships, requiring restructuring the entire query rather than fixing a small clause. Therefore, this is a global error in the join strategy.",
            "error_type": "Global: join error"
        },
        "3a4a57393220eaed3080ce41": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "What did patient 030-10040 receive on the last icu visit as their last input?",
            "gold_sql": "SELECT intakeoutput.celllabel FROM intakeoutput WHERE intakeoutput.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '030-10040' ) AND patient.unitdischargetime IS NOT NULL ORDER BY patient.unitadmittime DESC LIMIT 1 ) AND intakeoutput.cellpath LIKE '%intake%' ORDER BY intakeoutput.intakeoutputtime DESC LIMIT 1",
            "pred_sql": "SELECT intakeoutput.cellpath FROM patient INNER JOIN intakeoutput ON patient.patientunitstayid = intakeoutput.patientunitstayid WHERE patient.patientunitstayid = 3010040 ORDER BY intakeoutput.intakeoutputoffset DESC LIMIT 1",
            "gold_answer": "[['ns']]",
            "pred_answer": "no such column: intakeoutput.intakeoutputoffset",
            "is_correct": false,
            "reasoning": "The predicted query fails to execute because it orders by intakeoutput.intakeoutputoffset, a column that doesn\u2019t exist in the intakeoutput table. That is a simple table/column reference error localized to the ORDER BY clause and can be fixed by using the correct column (e.g., intakeoutputtime). This does not require restructuring the overall logic into subqueries or JOIN conditions, so it is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "01adde799ca93fbb843aea48": {
            "db_id": "eicu",
            "generator_name": "codes",
            "question": "Is the fio2 value second measured on the last hospital visit in patient 031-10622 greater than the value from the first measurement on the last hospital visit?",
            "gold_sql": "SELECT ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10622' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'fio2' ORDER BY lab.labresulttime ASC LIMIT 1 OFFSET 1 ) > ( SELECT lab.labresult FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '031-10622' AND patient.hospitaldischargetime IS NOT NULL ORDER BY patient.hospitaladmittime DESC LIMIT 1 ) ) AND lab.labname = 'fio2' ORDER BY lab.labresulttime ASC LIMIT 1 )",
            "pred_sql": "SELECT CASE WHEN max(CASE WHEN patient.patientunitstayid = 369273 THEN vitalperiodic.fio2 ELSE NULL END) > max(CASE WHEN patient.patientunitstayid = 1224149 THEN vitalperiodic.fio2 ELSE NULL END) THEN 'YES' ELSE 'NO' END FROM vitalperiodic INNER JOIN patient ON vitalperiodic.patientunitstayid = patient.patientunitstayid WHERE patient.patientunitstayid IN (369273, 1224149)",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: vitalperiodic.fio2",
            "is_correct": false,
            "reasoning": "The predicted SQL references a column vitalperiodic.fio2 that does not exist in the vitalperiodic table. This is a simple column reference error, and correcting or changing that reference would not require restructuring the overall query logic (though the logic is also incorrect). This falls under the category of a localized table/column reference error.",
            "error_type": "Local: table/column reference error"
        }
    },
    "dinsql": {
        "c5519431b0d7603182fa4334": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "Find out how many patients were discharged from the hospital since 1 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.hospitaldischargetime IS NOT NULL AND datetime(patient.hospitaldischargetime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT COUNT(DISTINCT patienthealthsystemstayid) FROM patient WHERE datetime(hospitaldischargetime) >= datetime('2100-12-31 23:59:00', '-1 year')",
            "gold_answer": "[['1367.0']]",
            "pred_answer": "[['1519.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts DISTINCT patienthealthsystemstayid instead of DISTINCT patient.uniquepid, thus counting stays rather than unique patients. It also omits the explicit IS NOT NULL check, but the main issue is the wrong column reference. This is a localized mistake in the SELECT clause and WHERE filter, and does not affect the overall query structure or logic flow.",
            "error_type": "Local: table/column reference error"
        },
        "ef39f3af3855b7761d5db7c9": {
            "db_id": "eicu",
            "generator_name": "dinsql",
            "question": "What's the count of patients who left the hospital in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.hospitaldischargetime IS NOT NULL AND strftime('%Y',patient.hospitaldischargetime) = '2100'",
            "pred_sql": "SELECT COUNT(DISTINCT patienthealthsystemstayid) FROM patient WHERE strftime('%Y', hospitaldischargetime) = '2100'",
            "gold_answer": "[['1367.0']]",
            "pred_answer": "[['1519.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses COUNT(DISTINCT patienthealthsystemstayid) instead of COUNT(DISTINCT patient.uniquepid). This is a column reference error (wrong column used for counting patients). The overall query structure and logic (filtering by year) remains correct, only the column in the SELECT is incorrect, so it is a Local error.",
            "error_type": "Local: column-reference error"
        }
    },
    "o4-mini": {
        "f0a6c28d4198d7d12dff31f3": {
            "db_id": "eicu",
            "generator_name": "o4-mini",
            "question": "How many patients underwent a other test?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT microlab.patientunitstayid FROM microlab WHERE microlab.culturesite = 'other' )",
            "pred_sql": "SELECT COUNT(DISTINCT p.uniquepid) AS num_patients FROM microlab m JOIN patient p ON m.patientunitstayid = p.patientunitstayid WHERE m.organism = 'other';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['21.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause: it filters on m.organism = 'other' instead of microlab.culturesite = 'other'. The join structure and overall logic remain correct, but the column reference is incorrect, leading to the wrong result.",
            "error_type": "Local: column reference error"
        }
    },
    "llm2sql": {
        "61a5db677312c21b1d6f74a1": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "This year, what are the top five most frequent procedures that patients received within 2 months after being diagnosed with obtundation?",
            "gold_sql": "SELECT T3.treatmentname FROM ( SELECT T2.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, diagnosis.diagnosistime FROM diagnosis JOIN patient ON diagnosis.patientunitstayid = patient.patientunitstayid WHERE diagnosis.diagnosisname = 'obtundation' AND datetime(diagnosis.diagnosistime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmentname, treatment.treatmenttime FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.diagnosistime < T2.treatmenttime AND datetime(T2.treatmenttime) BETWEEN datetime(T1.diagnosistime) AND datetime(T1.diagnosistime,'+2 month') GROUP BY T2.treatmentname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT treatment.treatmentname, COUNT(treatment.treatmentid) AS procedure_count FROM diagnosis JOIN treatment ON diagnosis.patientunitstayid = treatment.patientunitstayid WHERE diagnosis.diagnosisname = 'obtundation' AND strftime('%Y', diagnosis.diagnosistime) = '2100' AND datetime(treatment.treatmenttime) BETWEEN datetime(diagnosis.diagnosistime) AND datetime(diagnosis.diagnosistime, '+2 months') GROUP BY treatment.treatmentname ORDER BY procedure_count DESC LIMIT 5;",
            "gold_answer": "[['enteral feeds - tube feeding'], ['foley catheter'], ['mechanical ventilation'], ['mechanical ventilation - pressure support'], ['stress ulcer prophylaxis - famotidine'], ['treatment of hypernatremia - free water administration']]",
            "pred_answer": "[['enteral feeds - tube feeding', '120.0'], ['foley catheter', '105.0'], ['mechanical ventilation - pressure support', '98.0'], ['stress ulcer prophylaxis - famotidine', '119.0'], ['treatment of hypernatremia - free water administration', '113.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a simple ORDER BY ... LIMIT 5 rather than applying a DENSE_RANK() and selecting all treatments whose rank is <= 5, so it will drop tied procedures beyond the fifth position and fail to include all items with equal frequency (as the ground\u2010truth does). This isn\u2019t just a typo or missing column\u2014it undercuts the ranking logic and changes the intended result set. Therefore this is a global error in the overall query logic around ranking.",
            "error_type": "Global: ranking logic error"
        },
        "c4b4ffc15f3431f9c9be0382": {
            "db_id": "eicu",
            "generator_name": "llm2sql",
            "question": "What does a esr lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.eventtype = 'lab' AND cost.eventid IN ( SELECT lab.labid FROM lab WHERE lab.labname = 'esr' )",
            "pred_sql": "SELECT DISTINCT cost FROM cost WHERE eventtype = 'lab';",
            "gold_answer": "[['8.88']]",
            "pred_answer": "[['10.03'], ['10.04'], ['10.06'], ['10.13'], ['10.17'], ['10.19'], ['10.26'], ['10.3'], ['10.4'], ['10.42'], ['10.45'], ['10.5'], ['10.57'], ['10.61'], ['10.62'], ['10.7'], ['10.79'], ['10.82'], ['10.98'], ['11.13'], ['11.14'], ['11.15'], ['11.5'], ['11.66'], ['6.23'], ['6.81'], ['6.91'], ['7.05'], ['7.38'], ['7.46'], ['7.55'], ['7.71'], ['7.72'], ['7.73'], ['7.75'], ['7.77'], ['7.82'], ['7.84'], ['7.9'], ['7.98'], ['8.08'], ['8.11'], ['8.13'], ['8.17'], ['8.2'], ['8.22'], ['8.23'], ['8.25'], ['8.27'], ['8.29'], ['8.3'], ['8.31'], ['8.34'], ['8.35'], ['8.37'], ['8.38'], ['8.47'], ['8.48'], ['8.51'], ['8.52'], ['8.59'], ['8.61'], ['8.63'], ['8.65'], ['8.66'], ['8.74'], ['8.75'], ['8.77'], ['8.78'], ['8.86'], ['8.88'], ['8.91'], ['8.92'], ['8.94'], ['8.95'], ['9.0'], ['9.01'], ['9.08'], ['9.11'], ['9.13'], ['9.14'], ['9.16'], ['9.21'], ['9.22'], ['9.24'], ['9.27'], ['9.28'], ['9.29'], ['9.32'], ['9.33'], ['9.37'], ['9.38'], ['9.44'], ['9.45'], ['9.47'], ['9.48'], ['9.5'], ['9.54'], ['9.56'], ['9.57']]",
            "is_correct": false,
            "reasoning": "The predicted query omits the filter that restricts the cost rows to those for the specific 'esr' lab (the subquery on lab.labid). Instead, it returns all lab costs by only checking eventtype='lab'. This is a localized mistake in the WHERE clause (a missing condition) and does not require restructuring the query\u2019s overall logic or joins.",
            "error_type": "Local: missing filter in WHERE clause"
        }
    },
    "deepseek": {
        "0327353ac5ed15c9699d9ea9": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "Is the value of mean blood pressure in patient 022-1011 from last measurement on the current icu visit less than its first measurement on the current icu visit?",
            "gold_sql": "SELECT ( SELECT vitalperiodic.systemicmean FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-1011' ) AND patient.unitdischargetime IS NULL ) AND vitalperiodic.systemicmean IS NOT NULL ORDER BY vitalperiodic.observationtime DESC LIMIT 1 ) < ( SELECT vitalperiodic.systemicmean FROM vitalperiodic WHERE vitalperiodic.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '022-1011' ) AND patient.unitdischargetime IS NULL ) AND vitalperiodic.systemicmean IS NOT NULL ORDER BY vitalperiodic.observationtime ASC LIMIT 1 )",
            "pred_sql": "SELECT (SELECT systemicmean FROM vitalperiodic WHERE patientunitstayid = (SELECT patientunitstayid FROM patient WHERE uniquepid = '022-1011' AND hospitaldischargetime IS NULL) ORDER BY observationtime DESC LIMIT 1) < (SELECT systemicmean FROM vitalperiodic WHERE patientunitstayid = (SELECT patientunitstayid FROM patient WHERE uniquepid = '022-1011' AND hospitaldischargetime IS NULL) ORDER BY observationtime ASC LIMIT 1) ;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses hospitaldischargetime to identify the current ICU visit and directly equates patientunitstayid from patient.uniquepid, whereas the ground truth correctly filters on patient.unitdischargetime IS NULL for the ICU visit via patienthealthsystemstayid and unitstay relationships. The predicted query thus misidentifies the visit and uses the wrong join/filter logic, fundamentally altering the intended semantics and resulting in no data. This is not a minor syntax or column naming issue but a core structural/logical error in how tables are joined and visits are identified.",
            "error_type": "Global: incorrect relationship error"
        },
        "59a090e63b9aecaa8ba090db": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "How many patients were given #3: in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT patient.uniquepid ) FROM patient WHERE patient.patientunitstayid IN ( SELECT intakeoutput.patientunitstayid FROM intakeoutput WHERE intakeoutput.celllabel = '#3:' AND intakeoutput.cellpath LIKE '%input%' AND strftime('%Y',intakeoutput.intakeoutputtime) = '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT patientunitstayid) FROM treatment WHERE treatmentid = 3 AND strftime('%Y', treatmenttime) = '2100';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the treatment table with condition treatmentid = 3 instead of querying the intakeoutput table for celllabel '#3:' and filtering on input events in 2100. It also omits the subquery against patientunitstayid and fails to count distinct patient.uniquepid as required. This misuse of the wrong table and columns fundamentally alters the query\u2019s logic and fails to capture the intended data source, so the error is not a minor syntactic mistake but a structural and logical flaw.",
            "error_type": "Global: incorrect table and logic"
        },
        "9349e897c6c87592db2b6b4d": {
            "db_id": "eicu",
            "generator_name": "deepseek",
            "question": "What would be the monthly average weight of patient 013-10332 since 2100?",
            "gold_sql": "SELECT AVG(patient.admissionweight) FROM patient WHERE patient.patienthealthsystemstayid IN ( SELECT patient.patienthealthsystemstayid FROM patient WHERE patient.uniquepid = '013-10332' ) AND patient.admissionweight IS NOT NULL AND strftime('%Y',patient.unitadmittime) >= '2100' GROUP BY strftime('%Y-%m',patient.unitadmittime)",
            "pred_sql": "-- Unfortunately, the database schema does not contain periodic weight measurements beyond admission and discharge weights, so it's not possible to calculate the monthly average weight for the patient. SELECT admissionweight, dischargeweight FROM patient WHERE uniquepid = '013-10332';",
            "gold_answer": "[['74.8']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely fails to implement the required aggregation (AVG) and grouping by month, and it does not filter by the date or compute a monthly average at all. Instead, it merely selects admissionweight and dischargeweight for the patient, which is a fundamentally different query structure and logic. Fixing this would require rewriting the query to include the proper aggregation, date filtering, and grouping clauses, not just a minor localized correction.",
            "error_type": "Global: structural-flaw"
        }
    },
    "qwen3": {
        "2288358aa2f90ce1ea419f03": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "What are the five most frequently ordered lab tests this year for patients of age 40s?",
            "gold_sql": "SELECT T1.labname FROM ( SELECT lab.labname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM lab WHERE lab.patientunitstayid IN ( SELECT patient.patientunitstayid FROM patient WHERE patient.age BETWEEN 40 AND 49 ) AND datetime(lab.labresulttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY lab.labname ) AS T1 WHERE T1.C1 <= 5",
            "pred_sql": "SELECT lab.labname FROM lab JOIN patient ON lab.patientunitstayid = patient.patientunitstayid WHERE patient.age >= '40' AND patient.age <= '49' AND strftime('%Y', lab.labresulttime) = '2100' GROUP BY lab.labname ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['bedside glucose'], ['glucose'], ['hct'], ['hgb'], ['potassium'], ['sodium']]",
            "pred_answer": "[['bedside glucose'], ['glucose'], ['hct'], ['hgb'], ['potassium']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally similar to the ground truth: it correctly joins lab to patient, filters ages 40\u201349, restricts to this year, groups by labname, and orders by COUNT(*) DESC. The only difference is that the ground truth uses DENSE_RANK() to capture ties at the fifth position (including 'sodium'), whereas the predicted query uses LIMIT 5 and thus omits any tied 5th-rank items. This mistake is confined to the top-N ranking logic and does not require altering joins, filters, or the overall query structure.",
            "error_type": "Local: ranking error"
        },
        "f45ca3219aafadeec48edf98": {
            "db_id": "eicu",
            "generator_name": "qwen3",
            "question": "Among those who are prescribed with ultram, what are the three most commonly prescribed drugs at the same time?",
            "gold_sql": "SELECT T3.drugname FROM ( SELECT T2.drugname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid WHERE medication.drugname = 'ultram' ) AS T1 JOIN ( SELECT patient.uniquepid, medication.drugname, medication.drugstarttime FROM medication JOIN patient ON medication.patientunitstayid = patient.patientunitstayid ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE datetime(T1.drugstarttime) = datetime(T2.drugstarttime) GROUP BY T2.drugname ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drugname FROM ( SELECT DISTINCT m.patientunitstayid, m.drugname FROM medication m WHERE m.patientunitstayid IN ( SELECT DISTINCT patientunitstayid FROM medication WHERE drugname LIKE '%ultram%' ) AND m.drugname NOT LIKE '%ultram%' ) GROUP BY drugname ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['milk of magnesia'], ['narcan'], ['norco'], ['ultram'], ['zofran']]",
            "pred_answer": "[['klor-con'], ['lasix'], ['sodium chloride 0.9 % iv : 1000 ml']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the critical join on drugstarttime to ensure co\u2010prescriptions occur \u201cat the same time.\u201d Instead, it simply finds all other drugs prescribed at any time during the patient\u2019s stay and ranks them. This fundamentally changes the query\u2019s logic and output, since it no longer restricts to simultaneous prescriptions of ultram. Fixing this would require restructuring the query to join medication records on matching drugstarttime, not just filtering by patientunitstayid. Hence, the error is not a simple typo or minor clause fix but a core logical/structural flaw in how the tables are combined.",
            "error_type": "Global: join-strategy error"
        }
    },
    "macsql": {
        "9be5c5d50e8d9301923761d5": {
            "db_id": "eicu",
            "generator_name": "macsql",
            "question": "Tell me the top five most frequent procedures that patients are given within the same hospital visit after receiving stress ulcer prophylaxis - esomeprazole this year.",
            "gold_sql": "SELECT T3.treatmentname FROM ( SELECT T2.treatmentname, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT patient.uniquepid, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE treatment.treatmentname = 'stress ulcer prophylaxis - esomeprazole' AND datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT patient.uniquepid, treatment.treatmentname, treatment.treatmenttime, patient.patienthealthsystemstayid FROM treatment JOIN patient ON treatment.patientunitstayid = patient.patientunitstayid WHERE datetime(treatment.treatmenttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.uniquepid = T2.uniquepid WHERE T1.treatmenttime < T2.treatmenttime AND T1.patienthealthsystemstayid = T2.patienthealthsystemstayid GROUP BY T2.treatmentname ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT T2.treatmentname FROM treatment AS T2 JOIN ( SELECT DISTINCT T1.patientunitstayid, T1.drugstarttime FROM medication AS T1 WHERE T1.drugname = 'stress ulcer prophylaxis - esomeprazole' AND datetime(T1.drugstarttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS T3 ON T2.patientunitstayid = T3.patientunitstayid WHERE T2.treatmenttime > T3.drugstarttime GROUP BY T2.treatmentname ORDER BY COUNT(*) DESC LIMIT 5",
            "gold_answer": "[['analgesics - neuraxial analgesics'], ['bronchodilator - beta-agonist'], ['bronchodilator - nebulized'], ['diagnostic ultrasound of heart - transthoracic echocardiography'], ['electrolyte administration - intravenous'], ['electrolyte administration - magnesium'], ['enteral feeds - tube feeding'], ['exploratory laparotomy'], ['glucose - d50'], ['insulin'], ['nicotine patch'], ['nitroglycerin'], ['occupational therapy consult'], ['physical therapy consult'], ['sedative agent - lorazepam'], ['stress ulcer prophylaxis - esomeprazole'], ['therapeutic antibacterials - empiric antibacterial coverage'], ['therapeutic antibacterials - metronidazole'], ['therapeutic antibacterials - quinolone'], ['vte prophylaxis - compression boots'], ['vte prophylaxis - low molecular weight heparin']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misinterprets the event type by pulling 'stress ulcer prophylaxis \u2013 esomeprazole' from the medication table instead of the treatment table and then joining treatments without the proper year filter. This alters the core logic of which records to consider (medications vs. treatments) and thus yields no results. Fixing this is not a minor syntax change but requires restructuring the query to use the correct table and filters.",
            "error_type": "Global: incorrect table usage"
        }
    }
}