{
    "macsql": {
        "591138bd0043708115c23cd9": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What proportion of patients with a diagnosis of cirrhosis of liver without mention of alcohol survive over a 6-month period?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN (strftime('%J',patients.dod) - strftime('%J',T1.charttime)) < 365 / 2 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'cirrhosis of liver without mention of alcohol' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 2 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id )",
            "pred_sql": "SELECT AVG(survived) AS survival_proportion FROM ( SELECT T1.subject_id, CASE WHEN T3.dod IS NULL OR T3.dod > DATE(T2.first_diagnosis_date, '+6 months') THEN 1 ELSE 0 END AS survived FROM ( SELECT T1.subject_id, MIN(T1.charttime) AS first_diagnosis_date FROM diagnoses_icd AS T1 JOIN d_icd_diagnoses AS T2 ON T1.icd_code = T2.icd_code WHERE T2.long_title = 'cirrhosis of liver without mention of alcohol' GROUP BY T1.subject_id ) AS T2 LEFT JOIN patients AS T3 ON T2.subject_id = T3.subject_id ) AS survival_data",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The predicted SQL references an undefined alias T1.subject_id in the outer SELECT, but the subquery is aliased as T2. This is a simple column reference/alias mistake and does not alter the overall query logic or structure. It can be fixed by using the correct alias, so it is a localized error.",
            "error_type": "Local: table/column reference error"
        }
    },
    "codes": {
        "e3bb0f8bf5aadcee103960b1": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Since 223 days ago, what was the average value of the sao2 of patient 10038933?",
            "gold_sql": "SELECT AVG(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10038933 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'o2 saturation pulseoxymetry' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime) >= datetime('2100-12-31 23:59:00','-223 day')",
            "pred_sql": "SELECT avg(outputevents.value) FROM patients INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10038933 AND outputevents.itemid IN (50824, 50825) AND outputevents.charttime > (DATETIME('now') - 223)",
            "gold_answer": "[['96.437']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the outputevents table joined to patients, whereas the ground truth correctly uses chartevents (where SpO2 is stored) filtered by stay_id and the appropriate item label. The predicted SQL not only picks the wrong table and columns (outputevents.value instead of chartevents.valuenum) but also misuses the time filter (DATETIME('now') rather than the fixed '2100-12-31 23:59:00') and incorrect item IDs. These mistakes fundamentally change the data source and logic of the query, so fixing them would require restructuring the query beyond a single clause.",
            "error_type": "Global: wrong table usage"
        },
        "ba36ddb9a0893f283433afe3": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10018081 experience the last instance where their diastolic blood pressure was less than 44.0 yesterday?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure diastolic' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum < 44.0 AND datetime(chartevents.charttime,'start of day') = datetime('2100-12-31 23:59:00','start of day','-1 day') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.charttime FROM chartevents INNER JOIN patients ON chartevents.subject_id = patients.subject_id WHERE patients.subject_id = 10018081 AND chartevents.itemid IN (51, 456) AND patients.dob = ( SELECT min(patients.dob) FROM patients INNER JOIN chartevents ON patients.subject_id = chartevents.subject_id WHERE patients.subject_id = 10018081 AND chartevents.itemid IN (51, 456) AND strftime('%Y %m %d',chartevents.charttime) = strftime('%Y %m %d',admissions.admittime) - 1 AND chartevents.valuenum1 < 44 AND patients.gender = 'f' )",
            "gold_answer": "[['2100-12-30 14:00:00']]",
            "pred_answer": "no such column: admissions.admittime",
            "is_correct": false,
            "reasoning": "The predicted SQL completely breaks the intended query structure: it joins chartevents directly to patients rather than going through admissions \u2192 icustays, omits the stay_id filter, and even references admissions.admittime without including the admissions table. This is not a small fix within a clause but a fundamentally flawed join strategy and overall logic that fails to capture the patient\u2019s ICU stay and the 'yesterday' date filter correctly.",
            "error_type": "Global: incorrect join relationships"
        },
        "404fb4798c41a903136d9468": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate patient 10004720's length of stay in days for the last icu stay.",
            "gold_sql": "SELECT strftime('%J', icustays.outtime)-strftime('%J', icustays.intime) FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1",
            "pred_sql": "SELECT sum(strftime('%J', admissions.dischtime) - strftime('%J', admissions.admittime)) FROM admissions INNER JOIN icustays ON admissions.hadm_id = icustays.hadm_id WHERE admissions.subject_id = 10004720",
            "gold_answer": "[['5.056']]",
            "pred_answer": "[['5.02']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic by using the admissions table instead of icustays, summing all hospital stays rather than computing the length of the last ICU stay. It also omits the ORDER BY ... DESC LIMIT 1 to pick the last stay and does not restrict to non-null ICU outtimes. These are structural flaws that change the intended meaning and require major query restructuring.",
            "error_type": "Global: incorrect table join and aggregation logic"
        },
        "8f1d7f8d06674bc4fcf77de8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much is the minimum total hospital cost associated with postprocedural fever this year?",
            "gold_sql": "SELECT MIN(T1.C1) FROM ( SELECT SUM(cost.cost) AS C1 FROM cost WHERE cost.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'postprocedural fever' ) ) AND datetime(cost.chargetime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') GROUP BY cost.hadm_id ) AS T1",
            "pred_sql": "SELECT sum(cost.cost) FROM cost INNER JOIN procedures_icd ON cost.event_id = procedures_icd.row_id WHERE procedures_icd.icd_code = '294.5' AND strftime('%Y', cost.chargetime) = '2100'",
            "gold_answer": "[['1125.6']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally misidentifies the event table and join relationship. Instead of filtering costs for the diagnosis of 'postprocedural fever' via diagnoses_icd and its ICD code lookup, it joins cost to procedures_icd and uses a hard-coded code. This alters the core logic of which hospital costs are aggregated and does not even group by admission to compute a minimum. Fixing this requires restructuring the query to join the correct diagnosis table and apply the proper yearly filter and grouping, not just a minor syntax tweak.",
            "error_type": "Global: join error"
        },
        "83a6c418c954d7546628bd66": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the number of patients who were prescribed olanzapine (disintegrating tablet) since 2100?",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT prescriptions.hadm_id FROM prescriptions WHERE prescriptions.drug = 'olanzapine (disintegrating tablet)' AND strftime('%Y',prescriptions.starttime) >= '2100' )",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE drug = 'olanzapine' AND stoptime > '2100-01-01'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to follow the overall intended logic on multiple fronts: 1) It filters on drug = 'olanzapine' instead of the full name 'olanzapine (disintegrating tablet)', violating the drug-name requirement. 2) It uses stoptime > '2100-01-01' instead of selecting prescriptions by starttime in year \u2265 2100. 3) It counts non\u2010distinct subject_id rows directly from prescriptions, rather than counting distinct patients via admissions.hadm_id. 4) It omits the necessary join or subquery through admissions.subject_id. These errors span several clauses (SELECT, FROM, WHERE) and fundamentally change the query\u2019s meaning, so this is a global logic error.",
            "error_type": "Global: logic error"
        },
        "964ce5a64b7651431e5f28ea": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the number of days since patient 10004733 was first diagnosed with other and unspecified alcohol dependence, unspecified on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', diagnoses_icd.charttime) ) FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'other and unspecified alcohol dependence, unspecified' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004733 AND admissions.dischtime IS NULL ) ORDER BY diagnoses_icd.charttime ASC LIMIT 1",
            "pred_sql": "SELECT (strftime('%J', admissions.admittime) - strftime('%J', patients.dob)) - (strftime('%J', admissions.dischtime) - strftime('%J', patients.dob)) AS days FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id WHERE admissions.subject_id = 10004733",
            "gold_answer": "[['9.522']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely ignores the diagnosis and current\u2010visit requirements. It never joins or filters on diagnoses_icd or the specific ICD code, and it uses admissions.admittime, admissions.dischtime, and patients.dob in a nonsensical subtraction that bears no relation to \u201cdays since first diagnosis\u201d on the current visit. This is not a minor typo or wrong column reference but a fundamentally incorrect query structure and logic, so it is a global error.",
            "error_type": "Global: incorrect overall logic"
        },
        "da0ea1dc89bd6b674a9f17f2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the cost of a diagnosis of adverse effect of sulfonamides, initial encounter?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'diagnoses_icd' AND cost.event_id IN ( SELECT diagnoses_icd.row_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'adverse effect of sulfonamides, initial encounter' ) )",
            "pred_sql": "SELECT cost.cost FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code INNER JOIN cost ON diagnoses_icd.row_id = cost.event_id WHERE d_icd_diagnoses.long_title = 'adverse effect of sulfonamides, initial encounter'",
            "gold_answer": "[['6.62']]",
            "pred_answer": "[['13.86'], ['6.62'], ['8.08']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins diagnoses_icd to cost via event_id but fails to restrict cost.event_type = 'diagnoses_icd'. Without that filter, cost rows from other event types whose event_id numerically matches a diagnoses_icd.row_id are also returned, producing extra costs (13.86 and 8.08). Adding the WHERE cost.event_type = 'diagnoses_icd' condition would fix it without changing the overall query structure or joins.",
            "error_type": "Local: missing event_type filter"
        },
        "1a7ff2b0da25e36ad3c72ba2": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How is isosorbide dinitrate typically consumed?",
            "gold_sql": "SELECT DISTINCT prescriptions.route FROM prescriptions WHERE prescriptions.drug = 'isosorbide dinitrate'",
            "pred_sql": "SELECT DISTINCT CASE WHEN d_items.label IS NULL THEN NULL ELSE d_items.label END FROM d_items INNER JOIN inputevents_cv WHERE d_items.itemid = inputevents_cv.itemid AND d_items.label IS NOT NULL",
            "gold_answer": "[['po/ng']]",
            "pred_answer": "no such table: inputevents_cv",
            "is_correct": false,
            "reasoning": "The predicted query references a non-existent table inputevents_cv, which is a table reference error. This mistake is confined to one part of the query and does not change the overall logic or require restructuring, so it is a local error.",
            "error_type": "Local: table reference error"
        },
        "4d1ec5f37505470d5d68c654": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "When did patient 10006580 get input for the last time?",
            "gold_sql": "SELECT inputevents.starttime FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10006580 ) ) ORDER BY inputevents.starttime DESC LIMIT 1",
            "pred_sql": "SELECT min(charttime) FROM chartevents WHERE subject_id = 10006580",
            "gold_answer": "[['2100-08-20 03:55:00']]",
            "pred_answer": "[['2100-08-19 16:23:00']]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong table (chartevents instead of inputevents), applies the wrong aggregation (min instead of max/last), and omits the necessary joins through admissions and icustays. These errors span across multiple clauses and fundamentally change the query\u2019s intent, so they cannot be fixed by a minor local tweak.",
            "error_type": "Global: wrong table and aggregation"
        }
    },
    "dinsql": {
        "4a52128ae4a63ad0dc8d6410": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the top three diagnoses associated with the highest 3-month mortality rate?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T4.icd_code FROM ( SELECT T3.icd_code, DENSE_RANK() OVER ( ORDER BY T3.C2 ASC ) AS C3 FROM ( SELECT T2.icd_code, AVG(C1) AS C2 FROM ( SELECT T1.icd_code, ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T1.charttime) < 365 / 4 THEN 0 ELSE 1 END ) as C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id, diagnoses_icd.icd_code HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN patients ON T1.subject_id = patients.subject_id ) AS T2 GROUP BY T2.icd_code ) AS T3 ) AS T4 WHERE T4.C3 <= 3 )",
            "pred_sql": "WITH diagnosis_mortality AS ( SELECT d.icd_code, COUNT(DISTINCT d.subject_id) AS total_patients, SUM(CASE WHEN p.dod IS NOT NULL AND julianday(p.dod) <= julianday(d.charttime) + 91.25 THEN 1 ELSE 0 END) AS deaths_within_3_months FROM diagnoses_icd d JOIN patients p ON d.subject_id = p.subject_id GROUP BY d.icd_code ), mortality_rate AS ( SELECT icd_code, (CAST(deaths_within_3_months AS FLOAT) / total_patients) AS mortality_rate FROM diagnosis_mortality ) SELECT d_icd_diagnoses.long_title FROM mortality_rate JOIN d_icd_diagnoses ON mortality_rate.icd_code = d_icd_diagnoses.icd_code ORDER BY mortality_rate.mortality_rate DESC LIMIT 3",
            "gold_answer": "[['abnormal involuntary movements'], ['acquired hypertrophic pyloric stenosis'], ['acute and subacute hepatic failure without coma'], ['acute kidney failure, unspecified'], ['acute respiratory failure, unspecified whether with hypoxia or hypercapnia'], ['alcohol abuse, continuous'], ['alcoholic polyneuropathy'], ['amyotrophic lateral sclerosis'], ['anemia in other chronic diseases classified elsewhere'], ['arthrodesis status'], ['atherosclerotic heart disease of native coronary artery with other forms of angina pectoris'], ['automatic implantable cardiac defibrillator in situ'], ['cachexia'], ['chronic airway obstruction, not elsewhere classified'], ['chronic hepatitis c with hepatic coma'], ['chronic kidney disease, stage 3 (moderate)'], ['chronic osteomyelitis, site unspecified'], ['chronic systolic heart failure'], ['delirium due to conditions classified elsewhere'], ['dementia, unspecified, without behavioral disturbance'], ['dependence on respirator, status'], ['diarrhea'], ['diplopia'], ['do not resuscitate status'], ['early satiety'], ['edema'], ['embolism and thrombosis of other specified artery'], ['encounter for palliative care'], ['epistaxis'], ['esophageal varices without mention of bleeding'], ['glucocorticoid deficiency'], ['hemoperitoneum'], ['hemorrhage of gastrointestinal tract, unspecified'], ['hyperosmolality and/or hypernatremia'], ['hyperpotassemia'], ['hypertrophy (benign) of prostate without urinary obstruction and other lower urinary tract symptom (luts)'], ['hypospadias'], ['hypotension, unspecified'], ['hypovolemic shock'], ['ileus, unspecified'], ['infection with microorganisms resistant to penicillins'], ['jaundice, unspecified, not of newborn'], ['malignant neoplasm of body of pancreas'], ['malignant neoplasm of liver, secondary'], ['malignant neoplasm of rectosigmoid junction'], ['metabolic encephalopathy'], ['methicillin susceptible staphylococcus aureus infection as the cause of diseases classified elsewhere'], ['migraine, unspecified, without mention of intractable migraine without mention of status migrainosus'], ['mycosis fungoides, unspecified site, extranodal and solid organ sites'], ['natural blood and blood products causing adverse effects in therapeutic use'], ['neoplasm related pain (acute) (chronic)'], ['nonalcoholic steatohepatitis (nash)'], ['oliguria and anuria'], ['other cirrhosis of liver'], ['other closed fractures of distal end of radius (alone)'], ['other complications due to other vascular device, implant, and graft'], ['other dependence on machines, supplemental oxygen'], ['other diseases of lung, not elsewhere classified'], ['other disorders of muscle, ligament, and fascia'], ['other disorders of neurohypophysis'], ['other dysphagia'], ['other postprocedural shock, initial encounter'], ['other septicemia due to gram-negative organisms'], ['other specified analgesics and antipyretics causing adverse effects in therapeutic use'], ['other specified bacterial agents as the cause of diseases classified elsewhere'], ['other specified disorders of peritoneum'], ['other specified disorders of stomach and duodenum'], ['other specified forms of hearing loss'], ['other specified pre-operative examination'], ['paroxysmal ventricular tachycardia'], ['peritonitis, unspecified'], ['personal history of alcoholism'], ['personal history of malignant neoplasm of tongue'], ['personal history of peptic ulcer disease'], ['personal history of poliomyelitis'], ['personal history of pulmonary embolism'], ['pneumonia due to klebsiella pneumoniae'], ['postprocedural hemorrhage of a circulatory system organ or structure following a cardiac catheterization'], ['pulmonary congestion and hypostasis'], ['regional enteritis of unspecified site'], ['sepsis due to escherichia coli [e. coli]'], ['septic shock'], ['spinal stenosis, lumbar region, without neurogenic claudication'], ['spontaneous bacterial peritonitis'], ['stricture of artery'], ['subarachnoid hemorrhage following injury without mention of open intracranial wound, with no loss of consciousness'], ['tachypnea'], ['toxic encephalopathy'], ['transfusion related acute lung injury (trali)'], ['unspecified atrial flutter'], ['unspecified condition of brain'], ['unspecified deficiency anemia'], ['unspecified fall'], ['unspecified infection due to central venous catheter, initial encounter'], ['unspecified severe protein-calorie malnutrition'], ['unspecified vascular insufficiency of intestine'], ['unspecified viral hepatitis c with hepatic coma'], ['urethral stricture, unspecified'], ['varices of other sites'], ['ventilator associated pneumonia']]",
            "pred_answer": "[['asymptomatic human immunodeficiency virus [hiv] infection status'], ['cirrhosis of liver without mention of alcohol'], ['other dependence on machines, supplemental oxygen']]",
            "is_correct": false,
            "reasoning": "The predicted query fails to apply the key cohort filter and logic used in the ground truth: it does not restrict diagnoses to those occurring at least 3 months before the reference date nor does it use only the first diagnosis per patient. Without excluding recent diagnoses (for which outcome cannot yet be observed) or properly anchoring on the first diagnosis event, the mortality rate calculation is fundamentally different and incorrect. This error spans multiple parts of the logic (WHERE filtering, cohort definition, and aggregation), so it is a global logic error.",
            "error_type": "Global: missing cohort time-filter"
        },
        "98412e460c98a20d61ef2b78": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Show me the difference between patient 10022041's total input and output on the last icu visit.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022041 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10022041 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime DESC LIMIT 1 ) )",
            "pred_sql": "WITH LastICUVisit AS ( SELECT stay_id FROM icustays WHERE subject_id = 10022041 ORDER BY intime DESC LIMIT 1 ), TotalInput AS ( SELECT SUM(totalamount) AS total_input FROM inputevents WHERE stay_id = (SELECT stay_id FROM LastICUVisit) ), TotalOutput AS ( SELECT SUM(value) AS total_output FROM outputevents WHERE stay_id = (SELECT stay_id FROM LastICUVisit) ) SELECT (TI.total_input - TO.total_output) AS input_output_difference FROM TotalInput TI, TotalOutput TO",
            "gold_answer": "[['6400.0']]",
            "pred_answer": "near \"TO\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails with a syntax error near \"TO\" because it aliases the TotalOutput table as TO, which is a reserved keyword. This is a localized alias/syntax mistake that can be fixed by choosing a different alias. The overall query structure and logic remain intact, so this is a Local error.",
            "error_type": "Local: syntax error"
        },
        "f63abc2721bee6659fec40e2": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "How many patients received a arterial catheterization after the procedure of enteral infusion of concentrated nutritional substances within the same month?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'enteral infusion of concentrated nutritional substances' ) ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'arterial catheterization' ) ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T1.charttime,'start of month') = datetime(T2.charttime,'start of month')",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) FROM procedures_icd AS T1 JOIN procedures_icd AS T2 ON T1.subject_id = T2.subject_id AND T1.hadm_id = T2.hadm_id WHERE T1.icd_code = 'icd9|3961' -- arterial catheterization AND T2.icd_code = 'icd9|9604' -- enteral infusion of concentrated nutritional substances AND datetime(T1.charttime, 'start of month') = datetime(T2.charttime, 'start of month') AND T1.charttime > T2.charttime",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['10.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the core logic of the query by adding a hadm_id join and filtering both procedures within the same hospital admission, whereas the ground truth only requires that the two procedures occur for the same patient in the same calendar month. This incorrect join condition fundamentally alters which patients are counted, leading to a drastically different result. Fixing this would require restructuring the join logic, not just a minor tweak in a single clause.",
            "error_type": "Global: join error"
        },
        "20aaeb128f0419f83461f5c9": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What was the name of the specimen test that was last given to patient 10027602 since 12/2100?",
            "gold_sql": "SELECT microbiologyevents.spec_type_desc FROM microbiologyevents WHERE microbiologyevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND strftime('%Y-%m',microbiologyevents.charttime) >= '2100-12' ORDER BY microbiologyevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT test_name FROM microbiologyevents WHERE subject_id = 10027602 AND datetime(charttime) >= datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY charttime DESC LIMIT 1",
            "gold_answer": "[['urine']]",
            "pred_answer": "[['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs from the ground truth only in the SELECT clause: it returns microbiologyevents.test_name instead of microbiologyevents.spec_type_desc. All other parts (time filter, patient filter, ordering, limit) match the intended logic. This is a localized mistake in selecting the wrong column, not a structural or logical flaw across multiple clauses.",
            "error_type": "Local: column reference error"
        },
        "a9b5787ee9cc74593541e313": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "For patients who underwent diagnostic ultrasound of digestive system , what are the five most common diagnoses within 2 months?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'diagnostic ultrasound of digestive system' ) ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 5 )",
            "pred_sql": "SELECT D.long_title FROM procedures_icd AS P JOIN d_icd_procedures AS DP ON P.icd_code = DP.icd_code JOIN diagnoses_icd AS DI ON P.hadm_id = DI.hadm_id JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code WHERE DP.long_title = 'diagnostic ultrasound of digestive system' AND julianday(DI.charttime) - julianday(P.charttime) <= 60 GROUP BY D.long_title ORDER BY COUNT(DI.icd_code) DESC LIMIT 5",
            "gold_answer": "[['acquired absence of breast and nipple'], ['acute pancreatitis'], ['alcohol abuse, unspecified'], ['anemia, unspecified'], ['cyst and pseudocyst of pancreas'], ['other and unspecified mycoses'], ['other specified diseases of pancreas'], ['personal history of antineoplastic chemotherapy'], ['personal history of malignant neoplasm of breast'], ['pure hypercholesterolemia'], ['secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified'], ['unspecified essential hypertension']]",
            "pred_answer": "[['accidents occurring in residential institution'], ['acute pancreatitis'], ['depressive disorder, not elsewhere classified'], ['long-term (current) use of insulin'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the question by joining procedures and diagnoses on the same hospital admission (P.hadm_id = DI.hadm_id) instead of tying them by patient (subject_id) and looking at diagnoses up to two months after the procedure. It also fails to enforce that diagnoses occur after the ultrasound (it only checks a \u226460-day difference, allowing diagnoses before the procedure). These errors span multiple clauses (JOIN and WHERE) and alter the overall meaning of the query, so this is not a fixable typo or isolated syntax mistake but a structural/logical flaw.",
            "error_type": "Global: incorrect join and time-window logic"
        }
    },
    "o4-mini": {
        "2c017af839adaa90d6aaa933": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "List the top three most frequent diagnoses that patients are diagnosed with within 2 months after receiving extraction of other tooth.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) ) AS T1 JOIN ( SELECT admissions.subject_id, diagnoses_icd.icd_code, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "SELECT dd.long_title AS diagnosis, COUNT(*) AS freq FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code AND dp.long_title = 'extraction of other tooth' JOIN diagnoses_icd di ON di.subject_id = p.subject_id AND di.charttime >= p.charttime AND di.charttime <= datetime(p.charttime, '+2 months') JOIN d_icd_diagnoses dd ON di.icd_code = dd.icd_code GROUP BY dd.long_title ORDER BY freq DESC LIMIT 3;",
            "gold_answer": "[['aortic valve disorders'], ['atrial fibrillation'], ['osteoarthrosis, unspecified whether generalized or localized, site unspecified'], ['paroxysmal ventricular tachycardia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "pred_answer": "[['paroxysmal ventricular tachycardia', '1.0'], ['tobacco use disorder', '1.0'], ['unspecified essential hypertension', '1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL replaces the ground truth\u2019s DENSE_RANK() approach (which handles ties by returning all diagnoses with rank \u2264 3) with a simple ORDER BY ... LIMIT 3. As a result, when there are ties at the cutoff, the prediction arbitrarily truncates the tied diagnoses, returning exactly three rows instead of all diagnoses sharing the top three frequencies. This is not a mere typo or missing table reference but a fundamental logical change in how the top\u2010N is defined, affecting the overall query semantics.",
            "error_type": "Global: ranking logic error"
        },
        "778ad95705de0b45ec4884a7": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Is the value of heart rate in patient 10021666 from last measurement on the first icu visit greater than its first measurement on the first icu visit?",
            "gold_sql": "SELECT ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021666 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime DESC LIMIT 1 ) > ( SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021666 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) ORDER BY chartevents.charttime ASC LIMIT 1 )",
            "pred_sql": "SELECT CASE WHEN (SELECT ce_last.valuenum FROM chartevents ce_last JOIN d_items di_last ON ce_last.itemid = di_last.itemid JOIN icustays ic_last ON ce_last.stay_id = ic_last.stay_id WHERE ic_last.subject_id = 10021666 AND di_last.label = 'heart rate' AND ic_last.intime = ( SELECT MIN(intime) FROM icustays WHERE subject_id = 10021666 ) ORDER BY ce_last.charttime DESC LIMIT 1 ) > (SELECT ce_first.valuenum FROM chartevents ce_first JOIN d_items di_first ON ce_first.itemid = di_first.itemid JOIN icustays ic_first ON ce_first.stay_id = ic_first.stay_id WHERE ic_first.subject_id = 10021666 AND di_first.label = 'heart rate' AND ic_first.intime = ( SELECT MIN(intime) FROM icustays WHERE subject_id = 10021666 ) ORDER BY ce_first.charttime ASC LIMIT 1 ) THEN 'true' ELSE 'false' END AS is_greater;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['false']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the logic for selecting the patient\u2019s first ICU stay by (1) filtering only on icustays.subject_id instead of following the admission\u2192icustays relationship via hadm_id, and (2) omitting the outtime IS NOT NULL check used to identify completed ICU visits. These errors span multiple clauses (JOINs and WHERE) and alter which ICU stay is considered \u201cfirst,\u201d thus requiring restructuring across the query rather than a local fix.",
            "error_type": "Global"
        }
    },
    "deepseek": {
        "19a8fffa7488ae211db659dc": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Please provide the count of patients who had a solution input in 2100.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT icustays.hadm_id FROM icustays WHERE icustays.stay_id IN ( SELECT inputevents.stay_id FROM inputevents WHERE inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'solution' AND d_items.linksto = 'inputevents' ) AND strftime('%Y',inputevents.starttime) = '2100' ) )",
            "pred_sql": "SELECT COUNT(DISTINCT subject_id) AS count FROM inputevents WHERE STRFTIME('%Y', starttime) = '2100';",
            "gold_answer": "[['49.0']]",
            "pred_answer": "[['76.0']]",
            "is_correct": false,
            "reasoning": "The predicted query simply counts distinct subject_id from inputevents in 2100 without restricting to 'solution' inputs or joining through d_items and icustays/admissions. It omits the necessary join to d_items (to filter on label = 'solution' and linksto = 'inputevents') and the layers of mapping from inputevents.stay_id to admissions.subject_id via icustays/admissions. This is not a minor syntactic mistake but a fundamental logical omission affecting multiple parts of the query, altering the intended meaning and resulting dataset.",
            "error_type": "Global: incorrect join/filter logic"
        }
    },
    "llm2sql": {
        "57e1eb9257a000ee96aa3a4e": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "What are the four most frequently prescribed medications for patients in their 20s since 2100?",
            "gold_sql": "SELECT T1.drug FROM ( SELECT prescriptions.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 20 AND 29 ) AND strftime('%Y',prescriptions.starttime) >= '2100' GROUP BY prescriptions.drug ) AS T1 WHERE T1.C1 <= 4",
            "pred_sql": "SELECT P.drug FROM prescriptions AS P JOIN patients AS PT ON P.subject_id = PT.subject_id WHERE (2100 - CAST(SUBSTR(PT.dob,1,4) AS INTEGER)) BETWEEN 20 AND 29 AND datetime(P.starttime) >= datetime('2100-01-01 00:00:00') GROUP BY P.drug ORDER BY COUNT(P.drug) DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['metoprolol tartrate'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the intended query logic in two major ways. First, it computes patient age using patients.dob instead of using the admissions.age column as required by the assumptions, thus joining an unrelated table and misfiltering the cohort. Second, it replaces the DENSE_RANK window function\u2014which preserves ties for the top four most frequent drugs\u2014with a simple ORDER BY ... LIMIT 4, causing it to drop a drug that should be included when there is a tie. These errors span multiple clauses (JOIN, WHERE, SELECT, ranking) and require restructuring the query, so they are Global.",
            "error_type": "Global: incorrect age calculation and ranking strategy"
        }
    },
    "qwen3": {
        "5eef1271ba9f4ff35bc0efa0": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "For patients who are diagnosed with essential (primary) hypertension , what are the top three most common medications that followed afterward within the same hospital visit?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'essential (primary) hypertension' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT p.drug, COUNT(DISTINCT ha.hadm_id) AS cnt FROM ( SELECT DISTINCT d.hadm_id FROM diagnoses_icd d JOIN d_icd_diagnoses dicd ON d.icd_code = dicd.icd_code WHERE dicd.long_title = 'essential (primary) hypertension' ) AS ha JOIN prescriptions p ON ha.hadm_id = p.hadm_id GROUP BY p.drug ORDER BY cnt DESC LIMIT 3 );",
            "gold_answer": "[['bag'], ['potassium chloride replacement (critical care and oncology)'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['bag'], ['heparin'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the crucial temporal filter (T1.charttime < T2.starttime) that ensures medications occurred after the hypertension diagnosis, and it restructures the query by only joining on hadm_id and counting ALL prescriptions in a visit rather than those following the diagnosis event. This is not a simple typo or localized fix but alters the overall logic and intended meaning of the query, requiring a significant restructuring to reintroduce the time-based join and ranking. Therefore, it is a global error.",
            "error_type": "Global: incorrect join/temporal logic"
        }
    }
}