{
    "o4-mini": {
        "eb5300e70154b4714ca808c6": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is the urea nitrogen level of patient 10004733 last measured on the current hospital visit less than the second to last measurement on the current hospital visit?",
            "gold_sql": "SELECT ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) ORDER BY labevents.charttime DESC LIMIT 1 ) < ( SELECT labevents.valuenum FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'urea nitrogen' ) ORDER BY labevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "SELECT CASE WHEN (SELECT le1.valuenum FROM admissions a JOIN labevents le1 ON a.hadm_id = le1.hadm_id JOIN d_labitems li1 ON le1.itemid = li1.itemid WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND li1.label = 'urea nitrogen' ORDER BY le1.charttime DESC LIMIT 1 ) < (SELECT le2.valuenum FROM admissions a JOIN labevents le2 ON a.hadm_id = le2.hadm_id JOIN d_labitems li2 ON le2.itemid = li2.itemid WHERE a.subject_id = 10004733 AND a.dischtime IS NULL AND li2.label = 'urea nitrogen' ORDER BY le2.charttime DESC LIMIT 1 OFFSET 1 ) THEN 'true' ELSE 'false' END AS is_urea_decreasing;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['true']]",
            "is_correct": false,
            "reasoning": "Both the ground truth and predicted SQL fetch the latest and second\u2010latest urea nitrogen measurements for the current hospital visit and compare them. The predicted SQL merely wraps the boolean comparison in a CASE expression to return the literals 'true'/'false' rather than relying on SQLite\u2019s numeric Boolean result (1.0/0.0). This is a localized difference in result formatting within the SELECT clause and does not change the overall query logic or table relationships.",
            "error_type": "Local: other-syntax error"
        },
        "adfdb251aaa88b786719e2e8": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "For patients who had bypass coronary artery, one artery from aorta with autologous venous tissue, open approach, what are the most frequent four microbiology tests carried out within the same hospital visit this year?",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'bypass coronary artery, one artery from aorta with autologous venous tissue, open approach' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "SELECT m.test_name FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code JOIN microbiologyevents m ON p.subject_id = m.subject_id AND p.hadm_id = m.hadm_id WHERE d.long_title = 'bypass coronary artery, one artery from aorta with autologous venous tissue, open approach' AND strftime('%Y', m.charttime) = '2100' GROUP BY m.test_name ORDER BY COUNT(*) DESC LIMIT 4;",
            "gold_answer": "[['mrsa screen']]",
            "pred_answer": "[['mrsa screen'], ['staph aureus screen'], ['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins procedures_icd to microbiologyevents only on subject_id and hadm_id and filters tests by year, but it omits (1) restricting the procedure events to this year, and (2) requiring the microbiology tests to occur after the bypass procedure (the T1.charttime < T2.charttime condition). This missing temporal join condition fundamentally changes the logic and returns tests before the procedure and across any time in the year, rather than only those after the procedure. Fixing it would require restructuring the query\u2019s join logic and adding additional filters, not just a minor local tweak.",
            "error_type": "Global: join logic error"
        }
    },
    "qwen3": {
        "5516832b69c00c4c165c31d1": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Tell me the top three most frequent procedures that patients are given within the same month after receiving insertion of endotracheal tube since 2100.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'insertion of endotracheal tube' ) AND strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE strftime('%Y',procedures_icd.charttime) >= '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH insertion_events AS ( SELECT p.subject_id, p.hadm_id, p.charttime, p.icd_code AS insertion_code FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title = 'insertion of endotracheal tube' AND datetime(p.charttime) >= datetime('2100-01-01') ) SELECT dp.long_title FROM insertion_events i JOIN procedures_icd p ON i.subject_id = p.subject_id AND i.hadm_id = p.hadm_id JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE datetime(p.charttime, 'start of month') = datetime(i.charttime, 'start of month') AND p.icd_code != i.insertion_code GROUP BY dp.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['arteriography of cerebral arteries'], ['closed [endoscopic] biopsy of bronchus'], ['continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['endovascular (total) embolization or occlusion of head and neck vessels'], ['insertion or replacement of external ventricular drain [evd]'], ['other electroshock therapy'], ['percutaneous [endoscopic] gastrostomy [peg]']]",
            "pred_answer": "[['central venous catheter placement with guidance'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['other electroshock therapy']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly identifies insertion events and joins to subsequent procedures in the same month, groups by procedure name, and takes the top three by count. However, it fails to enforce that the follow\u2010up procedures occur after the insertion event\u2014it only matches on the same month without a p.charttime > i.charttime condition. This mistake is confined to a single WHERE clause condition (a temporal filter) and does not require restructuring the overall query logic or joins.",
            "error_type": "Local: other-temporal error"
        },
        "333d052bb087db30ae9a9518": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Can you get me the top five diagnoses that have the highest 3-month mortality rate?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T4.icd_code FROM ( SELECT T3.icd_code, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C3 FROM ( SELECT T2.icd_code, AVG(C1) AS C2 FROM ( SELECT T1.icd_code, ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T1.charttime) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id, diagnoses_icd.icd_code HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id ) AS T2 GROUP BY T2.icd_code ) AS T3 ) AS T4 WHERE T4.C3 <= 5 )",
            "pred_sql": "WITH first_diagnoses AS ( SELECT subject_id, icd_code, MIN(charttime) AS first_charttime FROM diagnoses_icd GROUP BY subject_id, icd_code ) SELECT long_title FROM ( SELECT d.long_title, (SUM(CASE WHEN p.dod IS NOT NULL AND p.dod BETWEEN fd.first_charttime AND date(fd.first_charttime, '+91 days') THEN 1 ELSE 0 END) * 1.0 / COUNT(*)) AS mortality_rate FROM first_diagnoses fd JOIN patients p ON fd.subject_id = p.subject_id JOIN d_icd_diagnoses d ON fd.icd_code = d.icd_code GROUP BY fd.icd_code, d.long_title ) ORDER BY mortality_rate DESC LIMIT 5;",
            "gold_answer": "[['abnormal involuntary movements'], ['acidosis'], ['acquired hypertrophic pyloric stenosis'], ['acute and subacute hepatic failure with coma'], ['acute and subacute hepatic failure without coma'], ['acute kidney failure with lesion of tubular necrosis'], ['acute kidney failure, unspecified'], ['acute respiratory failure'], ['acute respiratory failure, unspecified whether with hypoxia or hypercapnia'], ['alcohol abuse, continuous'], ['alcohol abuse, in remission'], ['alcoholic cirrhosis of liver'], ['alcoholic polyneuropathy'], ['altered mental status'], ['amyotrophic lateral sclerosis'], ['anemia in other chronic diseases classified elsewhere'], ['arthrodesis status'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['atherosclerotic heart disease of native coronary artery with other forms of angina pectoris'], ['automatic implantable cardiac defibrillator in situ'], ['benign neoplasm of cerebral meninges'], ['cachexia'], ['chronic airway obstruction, not elsewhere classified'], ['chronic hepatitis c with hepatic coma'], ['chronic hepatitis c without mention of hepatic coma'], ['chronic kidney disease, stage 3 (moderate)'], ['chronic kidney disease, stage iii (moderate)'], ['chronic kidney disease, unspecified'], ['chronic osteomyelitis, site unspecified'], ['chronic systolic heart failure'], ['cirrhosis of liver without mention of alcohol'], ['delirium due to conditions classified elsewhere'], ['dementia, unspecified, without behavioral disturbance'], ['dependence on respirator, status'], ['diabetes mellitus without mention of complication, type ii or unspecified type, uncontrolled'], ['diarrhea'], ['diplopia'], ['do not resuscitate status'], ['early satiety'], ['edema'], ['embolism and thrombosis of other specified artery'], ['encounter for palliative care'], ['epistaxis'], ['esophageal varices without mention of bleeding'], ['glucocorticoid deficiency'], ['hemoperitoneum'], ['hemorrhage of gastrointestinal tract, unspecified'], ['hip joint replacement'], ['hyperosmolality and/or hypernatremia'], ['hyperpotassemia'], ['hypertensive chronic kidney disease with stage 1 through stage 4 chronic kidney disease, or unspecified chronic kidney disease'], ['hypertensive chronic kidney disease, unspecified, with chronic kidney disease stage i through stage iv, or unspecified'], ['hypertrophy (benign) of prostate without urinary obstruction and other lower urinary tract symptom (luts)'], ['hypo-osmolality and hyponatremia'], ['hyposmolality and/or hyponatremia'], ['hypospadias'], ['hypotension, unspecified'], ['hypovolemic shock'], ['ileus, unspecified'], ['infection with microorganisms resistant to penicillins'], ['jaundice, unspecified, not of newborn'], ['malignant neoplasm of body of pancreas'], ['malignant neoplasm of liver, secondary'], ['malignant neoplasm of rectosigmoid junction'], ['metabolic encephalopathy'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['mycosis fungoides, unspecified site, extranodal and solid organ sites'], ['natural blood and blood products causing adverse effects in therapeutic use'], ['neoplasm related pain (acute) (chronic)'], ['nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney'], ['nonalcoholic steatohepatitis (nash)'], ['nonspecific elevation of levels of transaminase or lactic acid dehydrogenase [ldh]'], ['old myocardial infarction'], ['oliguria and anuria'], ['other and unspecified coagulation defects'], ['other chronic pulmonary heart diseases'], ['other cirrhosis of liver'], ['other closed fractures of distal end of radius (alone)'], ['other complications due to other vascular device, implant, and graft'], ['other constipation'], ['other convulsions'], ['other dependence on machines, supplemental oxygen'], ['other diseases of lung, not elsewhere classified'], ['other disorders of muscle, ligament, and fascia'], ['other disorders of neurohypophysis'], ['other dysphagia'], ['other fluid overload'], ['other iatrogenic hypotension'], ['other postprocedural shock, initial encounter'], ['other secondary thrombocytopenia'], ['other septicemia due to gram-negative organisms'], ['other specified analgesics and antipyretics causing adverse effects in therapeutic use'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['other specified disorders of peritoneum'], ['other specified disorders of stomach and duodenum'], ['other specified forms of hearing loss'], ['other specified pre-operative examination'], ['paralytic ileus'], ['paroxysmal ventricular tachycardia']]",
            "pred_answer": "[['anemia in other chronic diseases classified elsewhere'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['sepsis due to escherichia coli [e. coli]'], ['unspecified severe protein-calorie malnutrition']]",
            "is_correct": false,
            "reasoning": "The predicted SQL computes a mortality_rate for each diagnosis by dividing the count of deaths within 91 days by the total count of first diagnoses, but it never filters out diagnoses that occurred less than 3 months before the reference date. As a result, recent diagnoses without the full 3-month follow-up window are included in the denominator (and typically have zero deaths), biasing mortality rates downward. This omission is not a minor syntax mistake in one clause but a fundamental flaw in the overall inclusion logic that changes the query\u2019s intended meaning and results.",
            "error_type": "Global: temporal filtering error"
        }
    },
    "deepseek": {
        "fe11b6943e0d6122dbe19246": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What is the patient count for nicardipine iv prescription since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'nicardipine iv' AND strftime('%Y',prescriptions.starttime) >= '2100' )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) AS patient_count FROM prescriptions WHERE drug = 'nicardipine' AND route = 'iv' AND starttime >= '2100-01-01 00:00:00';",
            "gold_answer": "[['9.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only looks at the prescriptions table, splits the drug and route into two predicates (drug = 'nicardipine' AND route = 'iv') instead of using the combined drug value 'nicardipine iv', and uses starttime >= '2100-01-01' rather than filtering by year via strftime. These mistakes are all confined to the WHERE clause predicates (incorrect literal and column usage) and do not change the overall query structure or table relationships. Thus, it is a local error in the WHERE clause value parsing.",
            "error_type": "Local: value parsing error"
        },
        "75e305729a5e29fab9e6d3b4": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When did patient 10029484 receive a stool microbiology test for the last time last month?",
            "gold_sql": "SELECT microbiologyevents.charttime FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029484 ) AND microbiologyevents.spec_type_desc = 'stool' AND datetime(microbiologyevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT m.charttime AS last_stool_test_time FROM microbiologyevents m INNER JOIN admissions a ON m.hadm_id = a.hadm_id INNER JOIN patients p ON a.subject_id = p.subject_id WHERE p.subject_id = 10029484 AND m.test_name LIKE '%stool microbiology%' AND m.spec_type_desc = 'stool' AND datetime(m.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-1 month') ORDER BY m.charttime DESC LIMIT 1;",
            "gold_answer": "[['2100-11-24 18:43:00']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL adds an extra filter on m.test_name (m.test_name LIKE '%stool microbiology%') that is not present in the ground truth. This erroneous condition in the WHERE clause overly restricts results and leads to no output. The overall query structure (joins, date filter, ordering, limit) remains correct, so the mistake is confined to a specific part of the WHERE clause.",
            "error_type": "Local: incorrect filter condition"
        },
        "6f2bbd3e1afa5cfe430892e3": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "This year, what are the top three most frequently ordered laboratory tests for patients within the same hospital visit after diagnosed with scoliosis [and kyphoscoliosis], idiopathic?",
            "gold_sql": "SELECT d_labitems.label FROM d_labitems WHERE d_labitems.itemid IN ( SELECT T3.itemid FROM ( SELECT T2.itemid, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'scoliosis [and kyphoscoliosis], idiopathic' ) AND datetime(diagnoses_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, labevents.itemid, labevents.charttime, admissions.hadm_id FROM labevents JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.itemid ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH scoliosis_patients AS ( SELECT DISTINCT d_icd.subject_id FROM diagnoses_icd JOIN d_icd_diagnoses d_icd ON diagnoses_icd.icd_code = d_icd.icd_code WHERE d_icd.long_title LIKE '%scoliosis [and kyphoscoliosis], idiopathic%' AND STRFTIME('%Y', diagnoses_icd.charttime) = '2100' ) SELECT T.label, COUNT(*) as order_count FROM labevents JOIN d_labitems T ON labevents.itemid = T.itemid WHERE labevents.subject_id IN (SELECT subject_id FROM scoliosis_patients) AND STRFTIME('%Y', labevents.charttime) = '2100' GROUP BY T.label ORDER BY order_count DESC LIMIT 3;",
            "gold_answer": "[['anion gap'], ['bicarbonate'], ['calcium, total'], ['chloride'], ['creatinine'], ['glucose'], ['hematocrit'], ['hemoglobin'], ['magnesium'], ['mch'], ['mchc'], ['mcv'], ['phosphate'], ['platelet count'], ['pt'], ['rdw'], ['red blood cells'], ['sodium'], ['urea nitrogen'], ['white blood cells']]",
            "pred_answer": "no such column: d_icd.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL's CTE aliases d_icd to the d_icd_diagnoses table, then attempts to select d_icd.subject_id. However, the d_icd_diagnoses table does not have a subject_id column (subject_id exists in diagnoses_icd). This is a simple column reference mistake and can be fixed by selecting diagnoses_icd.subject_id instead of d_icd.subject_id. It does not affect the overall query logic or require restructuring of joins or clauses.",
            "error_type": "Local: table/column reference error"
        },
        "632b38e37273b744559d30cf": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "This year, what are the top five most frequent drugs prescribed to patients within the same month after the prescription of emtricitabine-tenofovir (truvada)?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'emtricitabine-tenofovir (truvada)' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.starttime < T2.starttime AND datetime(T1.starttime,'start of month') = datetime(T2.starttime,'start of month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH truvada_prescriptions AS ( SELECT subject_id, strftime('%Y-%m', starttime) AS prescription_month FROM prescriptions WHERE drug = 'emtricitabine-tenofovir (truvada)' AND strftime('%Y', starttime) = '2100' ) SELECT p.drug, COUNT(*) AS frequency FROM prescriptions p JOIN truvada_prescriptions tp ON p.subject_id = tp.subject_id AND strftime('%Y-%m', p.starttime) = tp.prescription_month WHERE p.drug != 'emtricitabine-tenofovir (truvada)' GROUP BY p.drug ORDER BY frequency DESC LIMIT 5;",
            "gold_answer": "[['acetaminophen'], ['albumin 25% (12.5g / 50ml)'], ['albuterol inhaler'], ['aluminum-magnesium hydrox.-simethicone'], ['amitriptyline'], ['bisacodyl'], ['calcium carbonate'], ['darunavir'], ['docusate sodium (liquid)'], ['emtricitabine-tenofovir (truvada)'], ['fluticasone propionate 110mcg'], ['furosemide'], ['heparin'], ['hydroxyzine'], ['ibuprofen'], ['insulin (regular) for hyperkalemia'], ['ipratropium bromide neb'], ['lactulose'], ['milk of magnesia'], ['nicotine polacrilex'], ['raltegravir'], ['rifaximin'], ['sertraline'], ['sodium chloride 0.9%  flush'], ['sodium polystyrene sulfonate'], ['spironolactone'], ['tiotropium bromide'], ['tramadol (ultram)'], ['trazodone']]",
            "pred_answer": "[['bisacodyl', '8.0'], ['furosemide', '6.0'], ['heparin', '6.0'], ['raltegravir', '6.0'], ['sodium chloride 0.9%  flush', '6.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to enforce the \u201cafter\u201d requirement: it matches any prescriptions in the same month without ensuring p.starttime > truvada prescription starttime. This is a localized mistake in the WHERE/join condition and can be fixed by adding that time comparison, without restructuring the overall query logic.",
            "error_type": "Local: missing time filter"
        }
    },
    "dinsql": {
        "e3757f569282c07ba625db2f": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "After getting diagnosed with hyperlipidemia, unspecified, how many patients were found to have nicotine dependence, cigarettes, uncomplicated within 2 months in 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hyperlipidemia, unspecified' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nicotine dependence, cigarettes, uncomplicated' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month')",
            "pred_sql": "WITH initial_diagnosis AS ( SELECT DISTINCT subject_id, MIN(charttime) AS hyperlipidemia_time FROM diagnoses_icd WHERE icd_code = 'icd9|2724' -- hyperlipidemia, unspecified GROUP BY subject_id ), subsequent_diagnosis AS ( SELECT DISTINCT d.subject_id FROM diagnoses_icd d JOIN initial_diagnosis i ON d.subject_id = i.subject_id WHERE d.icd_code = 'icd9|3051' -- nicotine dependence, cigarettes, uncomplicated AND datetime(d.charttime) BETWEEN datetime(i.hyperlipidemia_time) AND datetime(i.hyperlipidemia_time, '+2 months') AND strftime('%Y', d.charttime) = '2100' ) SELECT COUNT(DISTINCT subject_id) AS patient_count FROM subsequent_diagnosis",
            "gold_answer": "[['1.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query logic compared to the ground truth. It uses hard-coded ICD codes instead of looking them up via d_icd_diagnoses, omits the year-2100 filter on the initial hyperlipidemia diagnosis (only applying it to the nicotine diagnosis), and restructures the filtering into CTEs in a way that changes which events are counted. These issues span multiple clauses and change the intended meaning of the question, so the error is not a simple typo or localized syntax mistake but a structural logic error.",
            "error_type": "Global: logical inconsistency"
        },
        "13f87cb21fa6c8cb0f60fd50": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the minimum total hospital cost that includes a procedure named venous catheterization, not elsewhere classified?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT procedures_icd.hadm_id FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'venous catheterization, not elsewhere classified' ) ) GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT MIN(total_cost) AS min_total_cost FROM ( SELECT SUM(cost) AS total_cost FROM cost WHERE hadm_id IN ( SELECT hadm_id FROM procedures_icd WHERE icd_code IN ( SELECT icd_code FROM d_icd_procedures WHERE long_title = 'venous catheterization, not elsewhere classified' ) ) AND event_type = 'procedures_icd' GROUP BY hadm_id )",
            "gold_answer": "[['2891.23']]",
            "pred_answer": "[['23.12']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly adds AND event_type = 'procedures_icd' in the WHERE clause, restricting the sum to only procedure costs. The original intent was to sum all cost.event_type values for admissions that include the specified procedure. This is a localized mistake in the WHERE condition and can be fixed by removing that extra filter, without restructuring the rest of the query.",
            "error_type": "Local: incorrect WHERE filter"
        },
        "db5e5f929e7bbb00f5d16d2b": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Count how many times since 2100 patient 10016150 had left heart cardiac catheterization.",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'left heart cardiac catheterization' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10016150 ) AND strftime('%Y',procedures_icd.charttime) >= '2100'",
            "pred_sql": "SELECT COUNT(*) FROM procedures_icd AS T1 INNER JOIN d_icd_procedures AS T2 ON T1.icd_code = T2.icd_code WHERE T1.subject_id = 10016150 AND T2.long_title = 'left heart cardiac catheterization' AND datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-0 year')",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s filter on charttime is wrong: it uses datetime(T1.charttime) >= datetime('2100-12-31 23:59:00','-0 year'), which effectively only includes events at or after the very end of 2100. The correct interpretation \u201csince 2100\u201d means from the start of 2100 onward. This mistake is confined to the WHERE clause\u2019s temporal condition and can be fixed by adjusting the date comparison without restructuring the query. Therefore, it is a local error in the temporal condition.",
            "error_type": "Local: other-temporal error"
        },
        "a0d1a694272df31d43d58407": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the length of the first hospital stay in days for patient 10001217?",
            "gold_sql": "SELECT strftime('%J',admissions.dischtime) - strftime('%J',admissions.admittime) FROM admissions WHERE admissions.subject_id = 10001217 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime ASC LIMIT 1",
            "pred_sql": "SELECT ROUND((JULIANDAY(dischtime) - JULIANDAY(admittime)), 0) AS length_of_stay FROM admissions WHERE subject_id = 10001217 AND dischtime IS NOT NULL ORDER BY admittime ASC LIMIT 1",
            "gold_answer": "[['6.794']]",
            "pred_answer": "[['7.0']]",
            "is_correct": false,
            "reasoning": "The only difference is the use of ROUND(...,0) in the predicted SQL, which forces the length of stay to be an integer (7.0) instead of preserving the fractional days (6.794) and rounding to three decimals as required. This is a localized formatting/rounding mistake in the SELECT clause and does not affect joins, table relationships, or overall logic. It can be fixed by removing or adjusting the ROUND precision, so it is a Local error.",
            "error_type": "Local: numeric-rounding error"
        },
        "334b1184502387df7abf77d5": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Is the value of mean blood pressure of patient 10039831 last measured on the current icu visit less than the value second to last measured on the current icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) < ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure mean' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 OFFSET 1 )",
            "pred_sql": "WITH RankedBloodPressure AS ( SELECT valuenum AS mean_blood_pressure, DENSE_RANK() OVER (ORDER BY charttime DESC) AS rank FROM chartevents WHERE subject_id = 10039831 AND itemid = (SELECT itemid FROM d_items WHERE label = 'arterial blood pressure mean') AND stay_id = (SELECT stay_id FROM icustays WHERE subject_id = 10039831 AND outtime IS NULL) )",
            "gold_answer": "[['0.0']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL is missing the final SELECT to compare the two ranked blood pressure values and thus does not complete the intended logic. It also misuses DENSE_RANK (not needed per assumptions) and fails to implement the comparison between the latest and second-latest measurements. Fixing this would require adding an entire query block using the CTE and altering the overall structure, not just a small syntactic tweak.",
            "error_type": "Global: structural error"
        }
    },
    "codes": {
        "0d1eedad4af1daa77620607a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the price of a diagnosis for nontoxic uninodular goiter?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nontoxic uninodular goiter' ) )",
            "pred_sql": "SELECT DISTINCT cost.cost FROM diagnoses_icd INNER JOIN cost ON diagnoses_icd.row_id = cost.event_id INNER JOIN d_icd_diagnoses ON cost.diagnosis_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'Nontoxic uninodular goiter'",
            "gold_answer": "[['6.17']]",
            "pred_answer": "no such column: cost.diagnosis_code",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column cost.diagnosis_code, which violates the schema. The overall query structure and logic (joining cost to diagnoses and filtering by long_title) remains intact, but the incorrect column name is a localized mistake.",
            "error_type": "Local: table/column reference error"
        },
        "2b067714ed119dcbd0f6f71a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the name of the procedure that was given two or more times to patient 10020740?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, COUNT( procedures_icd.charttime ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10020740 ) GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 >= 2 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE procedures_icd.subject_id = 10020740 GROUP BY procedures_icd.icd_code HAVING count(procedures_icd.icd_code) > 2",
            "gold_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['insertion of endotracheal tube'], ['other electroshock therapy']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['other electroshock therapy']]",
            "is_correct": false,
            "reasoning": "The predicted SQL\u2019s logic error is confined to the HAVING clause: it filters for count > 2 (i.e., three or more occurrences) instead of count >= 2 (two or more). This is a localized mistake in the threshold condition and does not require restructuring the query or altering table joins. It can be corrected by changing >2 to >=2.",
            "error_type": "Local: having-count-threshold error"
        },
        "aafda5799cef4cdbb1a8c0e0": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the count of current patients?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.dischtime is null",
            "pred_sql": "SELECT count(subject_id) FROM patients WHERE dod IS NULL",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['81.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts patients with a NULL dod (alive patients) from the patients table, whereas the question asks for current patients (ongoing admissions) defined by admissions.dischtime IS NULL. The predicted SQL uses the wrong table and criterion, fundamentally altering the query\u2019s logic and intended meaning. This requires changing the query structure and table references, not just a minor fix.",
            "error_type": "Global: incorrect table and logic"
        },
        "35c0173f97865f92a3780f49": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Which are the top four most frequent procedures patients had within the same hospital visit after being diagnosed with pain in left shoulder?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'pain in left shoulder' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 4 )",
            "pred_sql": "SELECT d_icd_procedures.long_title FROM chartevents INNER JOIN procedures_icd ON chartevents.row_id = procedures_icd.row_id INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Left shoulder pain' GROUP BY d_icd_procedures.long_title ORDER BY count(d_icd_procedures.long_title) DESC LIMIT 4",
            "gold_answer": "[['performance of urinary filtration, multiple']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely misrepresents the intended logic by joining chartevents to procedures_icd on row_id (an unrelated relationship), filtering procedures by a diagnosis title rather than finding procedures after a diagnosis in the same visit, and omitting the temporal and admission-based joins and ranking logic. This is not a minor syntax or column reference mistake but a fundamental restructuring of the query that alters its meaning and yields no results.",
            "error_type": "Global: incorrect join relationship"
        }
    },
    "llm2sql": {
        "5a15ddb42e39b4c1050740e5": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What is the label for hand cleansing prior to procedure (picc)?",
            "gold_sql": "SELECT d_items.label FROM d_items WHERE d_items.abbreviation = 'hand cleansing prior to procedure (picc)'",
            "pred_sql": "// No suitable SQL query can be formed for the given request based on the data available in the provided tables.",
            "gold_answer": "[['hand cleansing prior to procedure (picc)'], ['hand cleansing prior to procedure (picc)']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to provide any query at all, effectively omitting the SELECT and FROM clauses needed to retrieve the label. This is a localized mistake in not referencing the appropriate table or columns, rather than a structural or multi-clause logic error. Therefore, it is classified as a Local error.",
            "error_type": "Local: missing-query error"
        }
    },
    "macsql": {
        "1e56241efb64541676d9aab0": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What was the name of the organism found in patient 10016742's first swab microbiology test since 04/2100?",
            "gold_sql": "SELECT microbiologyevents.org_name FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10016742 ) AND microbiologyevents.spec_type_desc = 'swab' AND microbiologyevents.org_name IS NOT NULL AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-04' ORDER BY microbiologyevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT T1.`org_name` FROM microbiologyevents AS T1 WHERE T1.`subject_id` = 10016742 AND datetime(T1.`charttime`) >= datetime('2100-04-01 00:00:00') ORDER BY T1.`charttime` ASC LIMIT 1",
            "gold_answer": "[['yeast']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict results to swab tests (missing the condition spec_type_desc = 'swab') and does not ensure org_name is not null. These are omissions in the WHERE clause and can be fixed by adding the appropriate filters without restructuring the query logic or joins.",
            "error_type": "Local: missing WHERE filter"
        },
        "8c31e6bff8fb0a1e0a92eaae": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Count the number of patients who were diagnosed with diaphragmatic hernia without mention of obstruction or gangrene but didn't come back to the hospital within 2 months since 1 year ago.",
            "gold_sql": "SELECT ( SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'diaphragmatic hernia without mention of obstruction or gangrene' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 ) - ( SELECT COUNT( DISTINCT T2.subject_id ) FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'diaphragmatic hernia without mention of obstruction or gangrene' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 JOIN admissions ON T2.subject_id = admissions.subject_id WHERE T2.charttime < admissions.admittime AND datetime(admissions.admittime) >= datetime('2100-12-31 23:59:00','-1 year') AND datetime(admissions.admittime) BETWEEN datetime(T2.charttime) AND datetime(T2.charttime,'+2 month') )",
            "pred_sql": "SELECT COUNT(DISTINCT `subject_id`) AS `patient_count` FROM `admissions` WHERE `subject_id` IN ( SELECT DISTINCT `subject_id` FROM `diagnoses_icd` WHERE `icd_code` = ( SELECT `icd_code` FROM `d_icd_diagnoses` WHERE `long_title` = 'diaphragmatic hernia without mention of obstruction or gangrene' ) ) AND `admittime` < datetime('2100-12-31 23:59:00', '-1 year') AND `subject_id` NOT IN ( SELECT DISTINCT `subject_id` FROM `admissions` WHERE `admittime` >= datetime('2100-12-31 23:59:00', '-1 year') AND `admittime` < datetime('2100-12-31 23:59:00', '-1 year', '+2 months') )",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query applies the date filters to the admissions table rather than to the diagnosis timestamp, and it omits any link between the diagnosis date and a subsequent readmission window. It filters for admissions before one year ago instead of diagnoses since one year ago, and then checks for readmissions within two months of that wrong timestamp. This misplacement of time conditions and misassociation of tables fundamentally changes the query\u2019s logic and results, requiring a full rewrite of the date filtering and subquery structure rather than a simple fix in a single clause.",
            "error_type": "Global: incorrect overall filtering and join logic"
        }
    }
}