{
    "codes": {
        "3256b9f34e2964c1987aa36e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What is the 3-month survival rate for chronic kidney disease, stage 3 (moderate) patients who were prescribed tobramycin sulfate after the diagnosis?",
            "gold_sql": "SELECT AVG(C1) FROM ( SELECT ( CASE WHEN strftime('%J',patients.dod) - strftime('%J',T3.charttime) < 365 / 4 THEN 0 ELSE 1 END ) AS C1 FROM ( SELECT T1.subject_id, T1.charttime FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'chronic kidney disease, stage 3 (moderate)' ) AND strftime('%J','2100-12-31 23:59:00') - strftime('%J',diagnoses_icd.charttime) >= 365 / 4 GROUP BY admissions.subject_id HAVING MIN(diagnoses_icd.charttime) = diagnoses_icd.charttime ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'tobramycin sulfate' GROUP BY admissions.subject_id HAVING MAX(prescriptions.starttime) = prescriptions.starttime ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime ) AS T3 JOIN patients ON T3.subject_id = patients.subject_id )",
            "pred_sql": "SELECT cast(sum(CASE WHEN patients.dod IS NULL THEN 1 ELSE 0 END) AS REAL) / count(patients.subject_id) FROM patients INNER JOIN prescriptions ON patients.subject_id = prescriptions.subject_id WHERE patients.dod IS NULL AND prescriptions.drug = 'tobramycin sulfate' AND patients.subject_id IN ( SELECT subject_id FROM diagnoses_icd WHERE icd9_code LIKE '585.3%' OR icd9_code LIKE '585.4%' OR icd9_code LIKE '585.5%' )",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: icd9_code",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non\u2010existent column icd9_code in the diagnoses_icd table (the correct column is icd_code). This is a localized column reference error and does not require restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "c6bdaeca60ccfe1176848cdb": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "On the current icu visit, what was the maximum value of the systolic blood pressure for patient 10039831?",
            "gold_sql": "SELECT MAX(chartevents.valuenum) FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 ) AND icustays.outtime IS NULL ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'arterial blood pressure systolic' AND d_items.linksto = 'chartevents' )",
            "pred_sql": "SELECT max(chartevents.valuenum) FROM chartevents INNER JOIN icustays ON chartevents.stay_id = icustays.stay_id WHERE icustays.subject_id = 10039831 AND chartevents.itemid IN (211, 220045)",
            "gold_answer": "[['125.0']]",
            "pred_answer": "[['107.0']]",
            "is_correct": false,
            "reasoning": "The predicted query joins chartevents to icustays on stay_id and filters by icustays.subject_id, but it never restricts to the current ICU stay (missing icustays.outtime IS NULL). It also uses hard-coded itemids (211, 220045) instead of looking up the single correct itemid for 'arterial blood pressure systolic' in d_items. These mistakes alter the overall logic and data scope of the query, producing an incorrect result.",
            "error_type": "Global: flawed join and filtering logic"
        },
        "3761294f8cbb1cb4f643e89c": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many patients were given senna within the same hospital visit after receiving a extraction of other tooth procedure?",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'senna' ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT count(procedures_icd.subject_id) FROM d_icd_procedures INNER JOIN procedures_icd ON d_icd_procedures.icd_code = procedures_icd.icd_code WHERE d_icd_procedures.long_title = 'extraction of other tooth' AND d_icd_procedures.icd_code = 'icd9|4134'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only counts the number of 'extraction of other tooth' procedures, completely ignoring the requirement to find patients who were given senna afterward in the same hospital visit. It does not join the prescriptions table, lacks the temporal condition (procedure before prescription), and does not restrict to the same hadm_id or use a DISTINCT count on subject_id. These omissions fundamentally break the query\u2019s logic rather than fixable within a single clause.",
            "error_type": "Global: incorrect join and missing temporal logic"
        },
        "73393e923fe780f8a485bb41": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Provide the id list of patients who were diagnosed with multiple sclerosis in 2100.",
            "gold_sql": "SELECT DISTINCT admissions.subject_id FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'multiple sclerosis' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' )",
            "pred_sql": "SELECT diagnoses_icd.subject_id FROM diagnoses_icd INNER JOIN d_icd_diagnoses ON diagnoses_icd.icd_code = d_icd_diagnoses.icd_code WHERE d_icd_diagnoses.long_title = 'multiple sclerosis' AND diagnoses_icd.charttime LIKE '2100%' GROUP BY diagnoses_icd.subject_id HAVING count(diagnoses_icd.subject_id) > 1",
            "gold_answer": "[['10001217.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a GROUP BY and HAVING clause to filter patients with more than one 'multiple sclerosis' diagnosis in 2100, whereas the question only asks for patients diagnosed at least once that year. This misuse of aggregation/HAVING is confined to a specific clause and can be corrected by removing or altering that clause without restructuring the whole query. Therefore, it is a Local error.",
            "error_type": "Local: incorrect-aggregation error"
        },
        "8e6c18649967bdf6b995b323": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How frequently were extraction of other tooth procedures performed in 2100?",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'extraction of other tooth' ) AND strftime('%Y',procedures_icd.charttime) = '2100'",
            "pred_sql": "SELECT count(procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title = 'Extraction of other tooth' AND strftime('%Y', procedures_icd.charttime) = '2100'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the two queries is the casing of the procedure name literal: the ground truth uses 'extraction of other tooth' (all lowercase) while the predicted SQL uses 'Extraction of other tooth' (capital 'E'). In SQLite, string equality is binary\u2010sensitive by default, so the join predicate fails and returns zero rows. All other parts of the query (JOIN logic, COUNT usage, year filter) are correct. This is a localized issue with the string literal matching, not the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "f1d174e3c9a7f5b7db8c6d51": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What are the top five most frequent medications prescribed within 2 months to the patients in their 60 or above after they are diagnosed with nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'nephritis and nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.age >= 60 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "SELECT prescriptions.drug FROM admissions INNER JOIN patients ON admissions.subject_id = patients.subject_id INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE admissions.admittime < datetime('now', 'start of month') AND admissions.admittime > datetime('now', '-2 months') AND patients.dod IS NOT NULL AND patients.dod > datetime('now', '-2 months') AND patients.gender = 'f' AND patients.age >= 60 AND admissions.admission_type = 'emergency' AND admissions.discharge_location = 'Nephrology' AND admissions.diagnosis = 'nephritis AND nephropathy, not specified as acute or chronic, with other specified pathological lesion in kidney' LIMIT 5",
            "gold_answer": "[['0.9% sodium chloride'], ['5% dextrose'], ['albuterol 0.083% neb soln'], ['albuterol inhaler'], ['bisacodyl'], ['docusate sodium'], ['donepezil'], ['ezetimibe'], ['fleet enema'], ['fluticasone-salmeterol diskus (100/50)'], ['glucagon'], ['heparin'], ['insulin'], ['levetiracetam'], ['lidocaine jelly 2% (urojet)'], ['metoprolol tartrate'], ['mirtazapine'], ['multivitamin iv'], ['neutra-phos'], ['nimodipine'], ['ondansetron'], ['polyethylene glycol'], ['potassium chl 20 meq / 1000 ml ns'], ['potassium chloride (powder)'], ['pravastatin'], ['readi-cat 2 (barium sulfate 2% suspension)'], ['senna'], ['sertraline'], ['sodium chloride 0.9%  flush'], ['torsemide']]",
            "pred_answer": "no such column: patients.age",
            "is_correct": false,
            "reasoning": "The predicted SQL immediately fails with \u201cno such column: patients.age,\u201d which indicates a column reference error (patients.age doesn\u2019t exist in the patients table). This is a localized issue within the WHERE clause and does not require restructuring the overall query logic\u2014just fixing the column reference. Therefore, it is a Local error.",
            "error_type": "Local: table/column reference error"
        },
        "fb64b71e8c133b3f07389d03": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many days has it been since patient 10021487 got their first prescription of sodium polystyrene sulfonate on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'sodium polystyrene sulfonate' ORDER BY prescriptions.starttime ASC LIMIT 1",
            "pred_sql": "SELECT julianday(admissions.dischtime) - julianday(admissions.admittime) AS days_since_sodium FROM admissions INNER JOIN prescriptions ON admissions.hadm_id = prescriptions.hadm_id WHERE prescriptions.drug = 'sodium polystyrene sulfonate' AND admissions.subject_id = 10021487",
            "gold_answer": "[['24.416']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to (1) limit to the current hospital visit (no filter admissions.dischtime IS NULL), (2) compute the days since the first prescription (it instead computes dischtime\u2013admittime), and (3) reference the prescription start time and the fixed current date. These are not minor clause fixes but rather a fundamentally incorrect join/filter strategy and date calculation. This error spans multiple clauses and alters the intended logic, so it is a global error.",
            "error_type": "Global: structural logic error"
        },
        "95ef5e98f26a8f7f39540db8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How much does the cholesterol ratio (total/hdl) lab test typically cost?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'labevents' AND cost.event_id IN ( SELECT labevents.row_id FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'cholesterol ratio (total/hdl)' ) )",
            "pred_sql": "SELECT cost.cost FROM d_items INNER JOIN labevents ON d_items.itemid = labevents.itemid INNER JOIN cost ON cost.event_id = labevents.row_id WHERE d_items.label = 'cholesterol ratio (total/hdl)'",
            "gold_answer": "[['8.84']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query joins d_items to labevents and then to cost, but cholesterol ratio is a lab test whose labels are stored in d_labitems, not d_items. This is a fundamental join error that breaks the intended table relationships, causing no rows to be returned. Fixing it requires changing the join strategy and table references, not just a minor tweak in a clause.",
            "error_type": "Global: incorrect join"
        },
        "ce4841cfeae7a3eabc232b3f": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the last height reading for patient 10021487 this month?",
            "gold_sql": "SELECT chartevents.valuenum FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'height (cm)' AND d_items.linksto = 'chartevents' ) AND datetime(chartevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ORDER BY chartevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT chartevents.valuenum FROM patients INNER JOIN chartevents ON patients.subject_id = chartevents.subject_id WHERE patients.subject_id = 10021487 AND patients.gender = 'm' AND chartevents.itemid = 220277 AND strftime('%Y-%m', chartevents.charttime) = '2017-03' AND chartevents.valuenum > 0 ORDER BY chartevents.charttime DESC LIMIT 1",
            "gold_answer": "[['185.0']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query\u2019s logic and structure: it joins patients to chartevents on subject_id rather than filtering by ICU stays and admission IDs, hard-codes an incorrect itemid instead of looking up 'height (cm)' in d_items, filters by an unrelated gender condition, and uses a fixed 2017-03 month instead of the current month rule. These mistakes span multiple clauses and require restructuring the query\u2019s joins, filters, and date handling, making it a Global error.",
            "error_type": "Global: flawed join strategy"
        }
    },
    "deepseek": {
        "337b3c0f7c09a85625de5909": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly received procedures for patients in their 40s since 1 year ago?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 40 AND 49 ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 <= 4 )",
            "pred_sql": "SELECT p.long_title AS procedure_name FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id JOIN d_icd_procedures p ON procedures_icd.icd_code = p.icd_code WHERE admissions.age BETWEEN 40 AND 49 AND procedures_icd.charttime >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY p.long_title ORDER BY COUNT(p.long_title) DESC LIMIT 4;",
            "gold_answer": "[['anastomosis of hepatic duct to gastrointestinal tract'], ['arterial catheterization'], ['bypass coronary artery, one artery from aorta with autologous venous tissue, open approach'], ['bypass coronary artery, one artery from left internal mammary with autologous arterial tissue, open approach'], ['bypass coronary artery, one artery from left internal mammary, open approach'], ['bypass coronary artery, one artery from right internal mammary with autologous arterial tissue, open approach'], ['central venous catheter placement with guidance'], ['closed [endoscopic] biopsy of bronchus'], ['colonoscopy'], ['combined right and left heart angiocardiography'], ['combined right and left heart cardiac catheterization'], ['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['diagnostic ultrasound of abdomen and retroperitoneum'], ['diagnostic ultrasound of heart'], ['dilation of left common iliac vein with intraluminal device, percutaneous approach'], ['dilation of left external iliac vein with intraluminal device, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach, diagnostic'], ['drainage of left upper lobe bronchus, via natural or artificial opening endoscopic, diagnostic'], ['drainage of pericardial cavity with drainage device, percutaneous approach'], ['endoscopic insertion of stent (tube) into bile duct'], ['enteral infusion of concentrated nutritional substances'], ['esophagogastroduodenoscopy [egd] with closed biopsy'], ['excision of cerebral ventricle, open approach'], ['excision of left internal mammary artery, percutaneous endoscopic approach'], ['excision of left upper lobe bronchus, via natural or artificial opening endoscopic, diagnostic'], ['excision of other bile duct'], ['excision of right internal mammary artery, percutaneous endoscopic approach'], ['excision of right saphenous vein, percutaneous endoscopic approach'], ['extirpation of matter from coronary artery, one artery, percutaneous approach'], ['extirpation of matter from left common iliac vein, percutaneous approach'], ['extirpation of matter from left femoral vein, percutaneous approach'], ['fluoroscopy of multiple coronary arteries using low osmolar contrast'], ['fluoroscopy of multiple coronary arteries using other contrast'], ['hemodialysis'], ['insertion of biological graft'], ['insertion of endotracheal airway into trachea, via natural or artificial opening'], ['insertion of endotracheal tube'], ['insertion of infusion device into superior vena cava, percutaneous approach'], ['insertion of intraluminal device into inferior vena cava, percutaneous approach'], ['insertion or replacement of other neurostimulator pulse generator'], ['introduction of nutritional substance into lower gi, via natural or artificial opening'], ['introduction of other therapeutic substance into heart, open approach'], ['introduction of other thrombolytic into peripheral vein, percutaneous approach'], ['laparoscopic cholecystectomy'], ['laparoscopic robotic assisted procedure'], ['measurement of cardiac sampling and pressure, left heart, percutaneous approach'], ['partial hepatectomy'], ['percutaneous abdominal drainage'], ['percutaneous balloon valvuloplasty'], ['performance of cardiac output, continuous'], ['repair of rectocele with graft or prosthesis'], ['respiratory ventilation, greater than 96 consecutive hours'], ['right heart cardiac catheterization'], ['simple excision of other lymphatic structure'], ['thoracentesis']]",
            "pred_answer": "[['continuous invasive mechanical ventilation for less than 96 consecutive hours'], ['enteral infusion of concentrated nutritional substances'], ['extirpation of matter from left common iliac vein, percutaneous approach'], ['performance of cardiac output, continuous']]",
            "is_correct": false,
            "reasoning": "The predicted query filters by age and time correctly and joins the right tables, so the table relationships and WHERE clauses are fine. The only mistake is that it uses GROUP BY + ORDER BY \u2026 LIMIT 4 instead of applying DENSE_RANK() to include all procedures tied for the top four counts. This is a localized error in the ranking/selection logic and does not require restructuring joins or filters.",
            "error_type": "Local: ranking error"
        },
        "a695186175740a8315577534": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "Tell me the total number of patients who were diagnosed with resistance to other specified antimicrobial drug since 1 year ago.",
            "gold_sql": "SELECT COUNT( DISTINCT admissions.subject_id ) FROM admissions WHERE admissions.hadm_id IN ( SELECT diagnoses_icd.hadm_id FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'resistance to other specified antimicrobial drug' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') )",
            "pred_sql": "SELECT COUNT(DISTINCT di.subject_id) FROM diagnoses_icd di JOIN d_icd_diagnoses d ON di.icd_code = d.icd_code WHERE d.long_title = 'Resistance to other specified antimicrobial drug' AND di.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it filters on d.long_title = 'Resistance to other specified antimicrobial drug' (capital \u201cR\u201d), whereas the actual title in d_icd_diagnoses is all lowercase (\u2018resistance to other specified antimicrobial drug\u2019). This literal mismatch causes zero rows. This is a localized error in the WHERE clause\u2019s value comparison and can be fixed by correcting the string case, without restructuring the query.",
            "error_type": "Local: value parsing error"
        },
        "0fd90a367f11fe444686ab3e": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "How long has it been in hours since the last time iso-osmotic dextrose was prescribed to patient 10021487 on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', prescriptions.starttime) ) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND prescriptions.drug = 'iso-osmotic dextrose' ORDER BY prescriptions.starttime DESC LIMIT 1",
            "pred_sql": "SELECT (strftime('%s', '2100-12-31 23:59:00') - strftime('%s', (SELECT MAX(p.starttime) FROM prescriptions p JOIN admissions a ON p.hadm_id = a.hadm_id WHERE p.subject_id = '10021487' AND a.dischtime IS NULL AND p.drug LIKE '%iso-osmotic dextrose%'))) / 3600 AS hours_since_last_dose ;",
            "gold_answer": "[['463.983']]",
            "pred_answer": "[['463.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL differs only in how it computes the time difference: it uses strftime('%s') and integer division by 3600, which truncates the fractional hours, instead of the ground truth\u2019s use of Julian days (strftime('%J') difference \u00d724) that preserves the fractional part. All joins, filters, and the overall query structure (focusing on current admission for patient 10021487 and the last \u2018iso\u2010osmotic dextrose\u2019 prescription) remain correct. This miscalculation is confined to the SELECT expression and does not require altering join logic or query flow.",
            "error_type": "Local: other-temporal error"
        },
        "d705db837f97e8a0a6b440f1": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the top four most frequently tested specimens for patients within 2 months after receiving colonoscopy?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'colonoscopy' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH colonoscopy_patients AS ( SELECT p.subject_id, p.hadm_id;",
            "gold_answer": "[['serology/blood']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error near the semicolon after the WITH clause. This is a localized syntax mistake and does not reflect a misunderstanding of the overall query logic or structure.",
            "error_type": "Local: syntax error"
        }
    },
    "qwen3": {
        "6f048eeb50b8e74a15d18d6d": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "Since 09/2100, what was the yearly maximum dose of pre-admission/non-icu intake for patient 10021487?",
            "gold_sql": "SELECT MAX(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 ) ) AND inputevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'pre-admission/non-icu intake' AND d_items.linksto = 'inputevents' ) AND strftime('%Y-%m',inputevents.starttime) >= '2100-09' GROUP BY strftime('%Y',inputevents.starttime)",
            "pred_sql": "SELECT strftime('%Y', p.starttime) AS year, MAX(p.dose_val_rx) AS max_dose FROM prescriptions p WHERE p.subject_id = 10021487 AND p.starttime >= '2100-09-01' AND p.starttime <= '2100-12-31 23:59:00' AND ( (p.hadm_id IS NULL AND EXISTS ( SELECT 1 FROM admissions a WHERE a.subject_id = 10021487 AND p.starttime < a.admittime )) OR (p.hadm_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM icustays i WHERE i.hadm_id = p.hadm_id AND p.starttime >= i.intime AND p.starttime <= COALESCE(i.outtime, '2100-12-31 23:59:00') )) ) GROUP BY year;",
            "gold_answer": "[['1050.0']]",
            "pred_answer": "[['2100.0', '900.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the prescriptions table and dose_val_rx column with complex EXISTS conditions to approximate pre-admission/non-ICU intake, whereas the ground truth correctly uses inputevents.totalamount filtered by the d_items.label 'pre-admission/non-icu intake' and ICU stay logic. This is not a simple column or syntax mistake but a fundamental misuse of the table and logic, altering the intended meaning of the query. Therefore, it is a Global error.",
            "error_type": "Global: incorrect table usage"
        },
        "59d35743b9ec360ca5a5d2db": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What is the average daily number of patients who had secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified in 2100?",
            "gold_sql": "SELECT AVG(T1.C1) FROM ( SELECT COUNT( DISTINCT diagnoses_icd.hadm_id ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified' ) AND strftime('%Y',diagnoses_icd.charttime) = '2100' GROUP BY strftime('%Y-%m-%d',diagnoses_icd.charttime) ) AS T1",
            "pred_sql": "SELECT COUNT(DISTINCT d.subject_id) / 365.0 AS average_daily_count FROM diagnoses_icd d JOIN d_icd_diagnoses i ON d.icd_code = i.icd_code WHERE i.long_title LIKE '%secondary diabetes mellitus%' AND i.long_title NOT LIKE '%complication%' AND i.long_title NOT LIKE '%uncontrolled%' AND i.long_title NOT LIKE '%unspecified%' AND strftime('%Y', d.charttime) = '2100';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of daily averaging by counting distinct patients over the whole year and dividing by 365, instead of grouping counts per day and then averaging those daily counts. It also uses subject_id instead of hadm_id and employs inaccurate LIKE filters rather than matching the exact long_title. These alterations span multiple clauses (JOIN, WHERE, aggregation) and alter the intended computation, requiring restructuring of the query rather than a simple fix in one part.",
            "error_type": "Global: aggregation logic error"
        }
    },
    "llm2sql": {
        "25675cd3569afa3bcdb2d55a": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "Among the patients who are prescribed clonidine patch 0.3 mg/24 hr, what are the three most frequently prescribed drugs at the same time since 1 year ago?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'clonidine patch 0.3 mg/24 hr' AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id AND datetime(prescriptions.starttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "SELECT drug FROM ( SELECT drug, COUNT(*) as frequency FROM prescriptions WHERE hadm_id IN ( SELECT DISTINCT hadm_id FROM prescriptions WHERE drug = 'clonidine patch 0.3 mg/24 hr' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-1 year') ) AND drug != 'clonidine patch 0.3 mg/24 hr' AND datetime(starttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY drug ORDER BY frequency DESC LIMIT 3 )",
            "gold_answer": "[['clonidine patch 0.3 mg/24 hr']]",
            "pred_answer": "[['iso-osmotic dextrose'], ['olanzapine (disintegrating tablet)'], ['sodium chloride 0.9%  flush']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses hadm_id membership to find co-prescriptions within the same admission rather than matching prescriptions that occurred at the exact same starttime. This violates the core requirement of finding drugs prescribed \"at the same time\" and alters the overall logic of the query, not just a local clause. The error spans multiple parts of the query (subquery selection and grouping) and requires restructuring the join condition to match timestamps, making it a global logical error.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "macsql": {
        "91406b50e1e11128c97d1761": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "What diagnosis did patient 10004457 receive the last time since 1 year ago?",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT diagnoses_icd.icd_code FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004457 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY diagnoses_icd.charttime DESC LIMIT 1 )",
            "pred_sql": "SELECT DISTINCT D.long_title FROM diagnoses_icd AS DI JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code WHERE DI.hadm_id = ( SELECT hadm_id FROM admissions WHERE subject_id = 10004457 AND datetime(admittime) >= datetime('2100-12-31 23:59:00', '-1 year') ORDER BY admittime DESC LIMIT 1 )",
            "gold_answer": "[['personal history of antineoplastic chemotherapy']]",
            "pred_answer": "[['asthma, unspecified type, unspecified'], ['coronary atherosclerosis of native coronary artery'], ['other and unspecified hyperlipidemia'], ['personal history of antineoplastic chemotherapy'], [\"personal history of hodgkin's disease\"], ['personal history of irradiation, presenting hazards to health'], ['personal history of malignant neoplasm of prostate'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by filtering on the admission date rather than the diagnosis time and by selecting the latest admission ID rather than the latest diagnosis event. This error spans multiple clauses (subquery filtering on admittime, join logic, lack of ordering by diagnosis charttime) and alters the intended meaning, resulting in multiple diagnoses rather than the single last diagnosis. Such an error requires restructuring the query logic beyond a single clause fix.",
            "error_type": "Global: flawed filter/join logic"
        },
        "db1bbc0ebe53801e7899373b": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "Calculate the number of patients who were given torsemide prescription within the same hospital visit following their hemodialysis this year.",
            "gold_sql": "SELECT COUNT( DISTINCT T1.subject_id ) FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'hemodialysis' ) AND datetime(procedures_icd.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE prescriptions.drug = 'torsemide' AND datetime(prescriptions.starttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id",
            "pred_sql": "SELECT COUNT(DISTINCT T1.subject_id) AS num_patients FROM ( SELECT DISTINCT T1.subject_id, T1.hadm_id FROM procedures_icd AS T1 WHERE T1.icd_code = 'icd9|3995' AND datetime(T1.charttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS Hemodialysis INNER JOIN ( SELECT DISTINCT T2.subject_id, T2.hadm_id FROM prescriptions AS T2 WHERE T2.drug LIKE '%torsemide%' AND datetime(T2.starttime, 'start of year') = datetime('2100-12-31 23:59:00', 'start of year', '-0 year') ) AS Torsemide ON Hemodialysis.subject_id = Torsemide.subject_id AND Hemodialysis.hadm_id = Torsemide.hadm_id",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such column: T1.subject_id",
            "is_correct": false,
            "reasoning": "The execution error \u201cno such column: T1.subject_id\u201d indicates that the outer query is referencing an alias T1 which is not defined at that scope. The subquery is aliased as Hemodialysis, so the outer SELECT should reference Hemodialysis.subject_id (or the subquery should be aliased T1). This is a localized column reference mistake and does not affect the overall query logic or structure.",
            "error_type": "Local: table/column reference error"
        }
    },
    "dinsql": {
        "a395c9eb659ffebdeff1c451": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the total cost for patient 10005817's hospital stay since 1 year ago?",
            "gold_sql": "SELECT SUM(cost.cost) FROM cost WHERE cost.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005817 ) AND datetime(cost.chargetime) >= datetime('2100-12-31 23:59:00','-1 year')",
            "pred_sql": "SELECT SUM(DISTINCT C.cost) AS total_cost FROM admissions AS A JOIN cost AS C ON A.hadm_id = C.hadm_id WHERE A.subject_id = 10005817 AND datetime(A.admittime) >= datetime('2100-12-31 23:59:00', '-1 year') AND C.event_type IN ('procedures_icd', 'labevents', 'prescriptions', 'diagnoses_icd')",
            "gold_answer": "[['4840.59']]",
            "pred_answer": "[['1180.45']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the logic of the question in two ways: (1) it filters admissions by admittime in the last year instead of filtering cost records by their chargetime, and (2) it adds an unnecessary event_type filter that excludes some cost entries. These mistakes alter the overall structure and meaning of the query rather than fixing a small localized mistake, so this is a global error.",
            "error_type": "Global: incorrect date filtering and event_type constraint"
        },
        "2c652b6030efee163a6dfaf1": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "Retrieve the top five most frequent diagnoses of patients with age in their 50s since 1 year ago.",
            "gold_sql": "SELECT d_icd_diagnoses.long_title FROM d_icd_diagnoses WHERE d_icd_diagnoses.icd_code IN ( SELECT T1.icd_code FROM ( SELECT diagnoses_icd.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age BETWEEN 50 AND 59 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') GROUP BY diagnoses_icd.icd_code ) AS T1 WHERE T1.C1 <= 5 )",
            "pred_sql": "SELECT D.long_title FROM diagnoses_icd AS DI JOIN d_icd_diagnoses AS D ON DI.icd_code = D.icd_code JOIN admissions AS A ON DI.hadm_id = A.hadm_id WHERE A.age BETWEEN 50 AND 59 AND datetime(DI.charttime) >= datetime('2100-12-31 23:59:00', '-1 year') GROUP BY D.long_title ORDER BY COUNT(DISTINCT DI.hadm_id) DESC LIMIT 5",
            "gold_answer": "[['accidents occurring in residential institution'], ['acute kidney failure, unspecified'], ['acute pancreatitis'], ['acute respiratory failure'], ['alcohol abuse, unspecified'], ['anemia of other chronic disease'], ['anemia, unspecified'], ['asthma, unspecified type, unspecified'], ['asymptomatic human immunodeficiency virus [hiv] infection status'], ['chronic airway obstruction, not elsewhere classified'], ['cirrhosis of liver without mention of alcohol'], ['colostomy status'], ['cyst and pseudocyst of pancreas'], ['dependence on respirator [ventilator] status'], ['depressive disorder, not elsewhere classified'], ['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['hyposmolality and/or hyponatremia'], ['long term (current) use of insulin'], ['long-term (current) use of insulin'], ['nonspecific elevation of levels of transaminase or lactic acid dehydrogenase [ldh]'], ['obstructive sleep apnea (adult)(pediatric)'], ['other and unspecified hyperlipidemia'], ['other ascites'], ['other iatrogenic hypotension'], ['other specified procedures as the cause of abnormal reaction of patient, or of later complication, without mention of misadventure at time of procedure'], ['patient room in hospital as the place of occurrence of the external cause'], ['personal history of malignant neoplasm of breast'], ['pneumonitis due to inhalation of food or vomitus'], ['portal hypertension'], ['pure hypercholesterolemia'], ['schizoaffective disorder, unspecified'], ['secondary diabetes mellitus without mention of complication, not stated as uncontrolled, or unspecified'], ['thrombocytopenia, unspecified'], ['tobacco use disorder'], ['type 2 diabetes mellitus without complications'], ['unspecified essential hypertension']]",
            "pred_answer": "[['diabetes mellitus without mention of complication, type ii or unspecified type, not stated as uncontrolled'], ['other ascites'], ['pure hypercholesterolemia'], ['tobacco use disorder'], ['unspecified essential hypertension']]",
            "is_correct": false,
            "reasoning": "The predicted SQL matches the ground truth in terms of tables joined, filter conditions (age between 50 and 59, charttime within one year), grouping by diagnosis title, and limiting to five results. The only difference is the aggregation used in ORDER BY: the ground truth ranks diagnoses by total occurrences (COUNT(*)), while the prediction ranks by distinct admissions count (COUNT(DISTINCT DI.hadm_id)). This is a localized mistake in the aggregate function, not a structural or multi-clause logical error.",
            "error_type": "Local: aggregate function error"
        }
    }
}