{
    "qwen3": {
        "8904dd5587873880719ae639": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "List the top five most frequent specimens that patients are tested within the same hospital visit after receiving dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic.",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic' ) ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH target_procedure AS ( SELECT icd_code FROM d_icd_procedures WHERE long_title = 'Dilation of left main bronchus with intraluminal device, via natural or artificial opening endoscopic' ) SELECT m.spec_type_desc FROM procedures_icd p JOIN target_procedure tp ON p.icd_code = tp.icd_code JOIN microbiologyevents m ON p.hadm_id = m.hadm_id GROUP BY m.spec_type_desc ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['bronchial washings']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the critical WHERE clause enforcing that microbiology events occur after the dilation procedure (T1.charttime < T2.charttime) and fails to correlate on subject, thus joining all specimens in the same admission regardless of timing. This is not a simple typo or minor syntax fix but a fundamental join/logical error that changes the query\u2019s overall meaning and requires restructuring (adding the time filter and proper join). Therefore, it is a Global error.",
            "error_type": "Global: join logic error"
        },
        "a8d68ef0763c469039fcfab5": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "What was the name of the procedure that was performed two or more times last month on patient 10027602?",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T1.icd_code FROM ( SELECT procedures_icd.icd_code, COUNT( procedures_icd.charttime ) AS C1 FROM procedures_icd WHERE procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND datetime(procedures_icd.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') GROUP BY procedures_icd.icd_code ) AS T1 WHERE T1.C1 >= 2 )",
            "pred_sql": "SELECT d.long_title FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE p.subject_id = 10027602 AND strftime('%Y-%m', p.charttime) = strftime('%Y-%m', datetime('2100-12-31', '-1 month')) GROUP BY p.icd_code HAVING COUNT(*) >= 2;",
            "gold_answer": "[['arteriography of cerebral arteries']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the filtering logic: instead of restricting procedures via admissions.hadm_id for patient 10027602 it directly filters on procedures_icd.subject_id. This alters the intended table relationship and overall query logic (it bypasses the admissions join/subquery), leading to incorrect results. Fixing it requires restructuring the query (adding the proper admissions subquery or join), not just tweaking a single clause. Therefore, this is a Global error.",
            "error_type": "Global: incorrect-join-condition"
        },
        "d5e534e863c69bb1b9c84c51": {
            "db_id": "mimic_iv",
            "generator_name": "qwen3",
            "question": "When was the first time patient 10029291 was measured with a heart rate greater than 76.0 on the first icu visit?",
            "gold_sql": "SELECT chartevents.charttime FROM chartevents WHERE chartevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 ) AND icustays.outtime IS NOT NULL ORDER BY icustays.intime ASC LIMIT 1 ) AND chartevents.itemid IN ( SELECT d_items.itemid FROM d_items WHERE d_items.label = 'heart rate' AND d_items.linksto = 'chartevents' ) AND chartevents.valuenum > 76.0 ORDER BY chartevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT MIN(chartevents.charttime) AS first_time FROM chartevents INNER JOIN d_items ON chartevents.itemid = d_items.itemid INNER JOIN ( SELECT stay_id, intime, outtime FROM icustays WHERE subject_id = 10029291 ORDER BY intime ASC LIMIT 1 ) AS first_icu ON chartevents.stay_id = first_icu.stay_id WHERE d_items.label = 'heart rate' AND chartevents.valuenum > 76.0 AND chartevents.charttime BETWEEN first_icu.intime AND first_icu.outtime;",
            "gold_answer": "[['2100-02-25 04:03:00']]",
            "pred_answer": "[['2100-02-25 05:00:00']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to include the icustays.outtime IS NOT NULL filter when selecting the \"first ICU visit.\" Because of this missing WHERE clause condition, it picks the wrong ICU stay and thus returns a later heart\u2010rate measurement (05:00 instead of 04:03). This is a localized mistake in the subquery\u2019s filter, not a restructuring of the overall query logic or multiple clauses.",
            "error_type": "Local: missing-filter error"
        }
    },
    "macsql": {
        "4de248a91a8c0a75290ba3d8": {
            "db_id": "mimic_iv",
            "generator_name": "macsql",
            "question": "How many days have elapsed since the first time patient 10039831 got a sodium lab test on the current hospital visit?",
            "gold_sql": "SELECT 1 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', labevents.charttime) ) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10039831 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT JULIANDAY('2100-12-31') - JULIANDAY( (SELECT MIN(`charttime`) FROM labevents WHERE `subject_id` = 10039831 AND `hadm_id` = ( SELECT `hadm_id` FROM admissions WHERE `subject_id` = 10039831 AND `dischtime` IS NULL ) AND `itemid` = ( SELECT `itemid` FROM d_labitems WHERE `label` = 'sodium' )) ) AS days_elapsed",
            "gold_answer": "[['0.319']]",
            "pred_answer": "[['-0.681']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only differs in the SELECT clause\u2019s date handling: it uses JULIANDAY('2100-12-31') instead of JULIANDAY('2100-12-31 23:59:00') (and uses JULIANDAY rather than strftime('%J')) which leads to the wrong offset and a negative result. The rest of the query structure\u2014filtering by hadm_id, subject_id, and sodium itemid\u2014is correct. Fixing the literal format in the SELECT date expression corrects the result without altering joins, subqueries, or overall logic.",
            "error_type": "Local: value parsing error"
        }
    },
    "codes": {
        "be7acaa3733f17d7c078dd3c": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the total dosage of iso-osmotic sodium chloride that patient 10027602 was prescribed last month?",
            "gold_sql": "SELECT SUM(prescriptions.dose_val_rx) FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10027602 ) AND prescriptions.drug = 'iso-osmotic sodium chloride' AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month')",
            "pred_sql": "SELECT sum(drug) FROM prescriptions WHERE subject_id = 10027602 AND starttime LIKE '2100-11-%'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['12.06']]",
            "is_correct": false,
            "reasoning": "The predicted SQL incorrectly sums the drug column (sum(drug)) instead of the numeric dosage column dose_val_rx, and it also omits the filter for drug = 'iso-osmotic sodium chloride' and misuses subject_id vs hadm_id and starttime filtering. These mistakes are confined to column references and WHERE-clause filters and do not require restructuring the overall query logic.",
            "error_type": "Local: column reference error"
        },
        "c85da658022ccba3515bc2f8": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Count the number of times that patient 10029291 received the performance of urinary filtration, multiple procedure on the last hospital visit.",
            "gold_sql": "SELECT COUNT(*) FROM procedures_icd WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'performance of urinary filtration, multiple' ) AND procedures_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10029291 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 )",
            "pred_sql": "SELECT count(DISTINCT procedures_icd.hadm_id) FROM procedures_icd INNER JOIN d_icd_procedures ON procedures_icd.icd_code = d_icd_procedures.icd_code WHERE d_icd_procedures.long_title LIKE '%urinary filtration%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['3.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict to patient 10029291 and the last hospital visit, uses a LIKE filter instead of an exact title match, and counts distinct hadm_id instead of counting each procedure occurrence. These omissions and changes alter the overall query logic and intended scope, not just a localized clause, requiring restructuring of filters and aggregation.",
            "error_type": "Global: incorrect filtering and aggregation"
        },
        "e3cba1c79b43587fbfdbcdfc": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What was the first time that patient 10003400 had the maximum amount of chloride in 06/this year?",
            "gold_sql": "SELECT labevents.charttime FROM labevents WHERE labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10003400 ) AND labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'chloride' ) AND datetime(labevents.charttime,'start of year') = datetime('2100-12-31 23:59:00','start of year','-0 year') AND strftime('%m',labevents.charttime) = '06' ORDER BY labevents.valuenum DESC, labevents.charttime ASC LIMIT 1",
            "pred_sql": "SELECT min(charttime) FROM chartevents WHERE itemid = 510 AND subject_id = 10003400 AND strftime('%m', charttime) = '06' AND value = ( SELECT max(value) FROM chartevents WHERE itemid = 509 AND subject_id = 10003400 AND strftime('%m', charttime) = '06' )",
            "gold_answer": "[['2100-06-15 05:34:00']]",
            "pred_answer": "no such column: value",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'value' in the chartevents table, causing the execution error 'no such column: value'. This is a localized column reference error and does not require restructuring the overall query logic.",
            "error_type": "Local: table/column reference error"
        },
        "8010b9081130f60c6d4a482e": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "What's the cost of a drug known as octreotide acetate?",
            "gold_sql": "SELECT DISTINCT cost.cost FROM cost WHERE cost.event_type = 'prescriptions' AND cost.event_id IN ( SELECT prescriptions.row_id FROM prescriptions WHERE prescriptions.drug = 'octreotide acetate' )",
            "pred_sql": "SELECT cost.cost FROM cost INNER JOIN prescriptions ON cost.row_id = prescriptions.row_id WHERE prescriptions.drug = 'octreotide acetate'",
            "gold_answer": "[['12.44']]",
            "pred_answer": "[['10.06'], ['10.09'], ['11.03'], ['11.16'], ['11.55'], ['8.6'], ['9.32']]",
            "is_correct": false,
            "reasoning": "The predicted query joins cost.row_id to prescriptions.row_id and omits the event_type filter, whereas the correct logic joins cost.event_id to prescriptions.row_id and restricts cost.event_type = 'prescriptions'. This wrong join key and missing filter fundamentally alters the intended mapping between cost entries and prescription events and requires restructuring the query logic rather than a simple local fix.",
            "error_type": "Global: incorrect join"
        },
        "76ee482fdcd9d445253719e5": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "Calculate the difference between the total input and output volume for patient 10005909 on last month/11.",
            "gold_sql": "SELECT ( SELECT SUM(inputevents.totalamount) FROM inputevents WHERE inputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(inputevents.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',inputevents.starttime) = '11' ) - ( SELECT SUM(outputevents.value) FROM outputevents WHERE outputevents.stay_id IN ( SELECT icustays.stay_id FROM icustays WHERE icustays.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10005909 ) ) AND datetime(outputevents.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-1 month') AND strftime('%d',outputevents.charttime) = '11' )",
            "pred_sql": "SELECT sum(inputevents.totalamount) - sum(outputevents.totalamount) FROM patients INNER JOIN inputevents ON patients.subject_id = inputevents.subject_id INNER JOIN outputevents ON patients.subject_id = outputevents.subject_id WHERE patients.subject_id = 10005909 AND strftime('%Y %m',inputevents.starttime) = '2100 11' AND strftime('%Y %m',outputevents.charttime) = '2100 11'",
            "gold_answer": "[['3210.0']]",
            "pred_answer": "no such column: outputevents.totalamount",
            "is_correct": false,
            "reasoning": "The predicted SQL refers to a non-existent column outputevents.totalamount (the correct column for output volume is outputevents.value). This is a simple column reference error and does not require restructuring the query's logic or joins\u2014just fixing the column name.",
            "error_type": "Local: table/column reference error"
        },
        "ba1809d4eafa379093b710bd": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours has it been since patient 10021487 stayed in the careunit emergency department on the current hospital visit for the first time?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', transfers.intime) ) FROM transfers WHERE transfers.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10021487 AND admissions.dischtime IS NULL ) AND transfers.careunit = 'emergency department' ORDER BY transfers.intime ASC LIMIT 1",
            "pred_sql": "SELECT strftime('%J', transfers.outtime) - strftime('%J', transfers.intime) AS days FROM transfers INNER JOIN admissions ON transfers.hadm_id = admissions.hadm_id WHERE transfers.careunit = 'emergency' AND admissions.subject_id = 10021487",
            "gold_answer": "[['601.033']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query fails to restrict to the current hospital admission (missing admissions.dischtime IS NULL), uses the wrong careunit literal ('emergency' instead of 'emergency department'), computes the difference using transfers.outtime instead of the fixed current time, and omits converting days to hours and ordering/limiting to the first occurrence. These issues span multiple clauses (SELECT expression, JOIN/WHERE filters, and overall query structure) and require restructuring the logic, so this is not a minor typo but a fundamental query logic error.",
            "error_type": "Global: incorrect filter and time calculation logic"
        },
        "ea7da99dbb2c6595e989664a": {
            "db_id": "mimic_iv",
            "generator_name": "codes",
            "question": "How many hours have passed since the last time patient 10018081 received a lab test of sodium on the current hospital visit?",
            "gold_sql": "SELECT 24 * ( strftime('%J','2100-12-31 23:59:00') - strftime('%J', labevents.charttime) ) FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'sodium' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) ORDER BY labevents.charttime DESC LIMIT 1",
            "pred_sql": "SELECT round((strftime('%J', labevents.charttime) - strftime('%J', transfers.intime)) * 24) AS hours_since FROM labevents INNER JOIN transfers ON transfers.hadm_id = labevents.hadm_id WHERE labevents.itemid = 50285 AND labevents.subject_id = 10018081",
            "gold_answer": "[['1.867']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses transfers.intime as the reference time instead of the fixed current time ('2100-12-31 23:59:00') and fails to restrict to the current hospital admission (dischtime IS NULL). It also omits ordering/limiting to the last sodium test and misuses the transfers table rather than admissions. These mistakes fundamentally alter the logic and data relationships, requiring restructuring of multiple clauses (JOIN, WHERE, SELECT) to correct, so this is not a localized syntax fix but a core logical error.",
            "error_type": "Global: incorrect join and reference logic"
        }
    },
    "dinsql": {
        "0f494beb8dd694f45a24771a": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What are the three most frequent drugs prescribed to female patients with an age of 40s within 2 months after they are diagnosed with alcohol abuse, unspecified?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'alcohol abuse, unspecified' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id WHERE admissions.subject_id IN ( SELECT patients.subject_id FROM patients WHERE patients.gender = 'f' ) AND admissions.age BETWEEN 40 AND 49 ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T2.starttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 3",
            "pred_sql": "WITH AlcoholAbuseDiagnoses AS ( SELECT DISTINCT subject_id, hadm_id, charttime FROM diagnoses_icd WHERE icd_code = 'icd9|30500' -- Assuming 'icd9|30500' is the code for alcohol abuse, unspecified ), FemalePatientsIn40s AS ( SELECT DISTINCT A.subject_id, A.hadm_id, A.admittime FROM admissions A JOIN patients P ON A.subject_id = P.subject_id WHERE P.gender = 'f' AND A.age BETWEEN 40 AND 49 ), PrescriptionsWithin2Months AS ( SELECT P.drug FROM prescriptions P JOIN AlcoholAbuseDiagnoses D ON P.subject_id = D.subject_id AND P.hadm_id = D.hadm_id JOIN FemalePatientsIn40s F ON P.subject_id = F.subject_id AND P.hadm_id = F.hadm_id WHERE datetime(P.starttime) BETWEEN datetime(D.charttime) AND datetime(D.charttime, '+2 months') ) SELECT drug FROM ( SELECT drug, COUNT(*) AS freq FROM PrescriptionsWithin2Months GROUP BY drug ORDER BY freq DESC LIMIT 3 ) AS TopDrugs",
            "gold_answer": "[['acetaminophen'], ['aluminum-magnesium hydrox.-simethicone'], ['amitriptyline'], ['darunavir'], ['emtricitabine-tenofovir (truvada)'], ['hydroxyzine'], ['ibuprofen'], ['milk of magnesia'], ['nicotine polacrilex'], ['ritonavir'], ['sertraline']]",
            "pred_answer": "incomplete input",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the intended logic by forcing prescriptions and diagnoses to share the same hadm_id (hospital admission) rather than simply matching by subject_id and then applying the 2\u2010month window. This fundamentally alters which prescriptions are considered (only those in the same admission as the diagnosis instead of any admission within 2 months after diagnosis) and thus breaks the overall query logic. This is not a small typo or localized syntax issue but a core join/key error affecting multiple clauses.",
            "error_type": "Global: incorrect join keys"
        },
        "b8eb4f72ed272217689345d4": {
            "db_id": "mimic_iv",
            "generator_name": "dinsql",
            "question": "What is the percentile po2 value of 128.0 among patients of the same age as patient 10018081 on the current hospital visit?",
            "gold_sql": "SELECT DISTINCT T1.C1 FROM ( SELECT labevents.valuenum, PERCENT_RANK() OVER ( ORDER BY labevents.valuenum ) AS C1 FROM labevents WHERE labevents.itemid IN ( SELECT d_labitems.itemid FROM d_labitems WHERE d_labitems.label = 'po2' ) AND labevents.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.age = ( SELECT admissions.age FROM admissions WHERE admissions.subject_id = 10018081 AND admissions.dischtime IS NULL ) ) ) AS T1 WHERE T1.valuenum = 128.0",
            "pred_sql": "WITH same_age_patients AS ( SELECT labevents.valuenum AS po2_value FROM labevents INNER JOIN admissions ON labevents.hadm_id = admissions.hadm_id WHERE admissions.age = (SELECT age FROM admissions WHERE subject_id = 10018081 AND dischtime IS NULL) AND labevents.itemid = (SELECT itemid FROM d_labitems WHERE label = 'po2') AND admissions.dischtime IS NULL ) SELECT ROUND((SUM(CASE WHEN po2_value <= 128.0 THEN 1 ELSE 0 END) * 100.0) / COUNT(*), 2) AS percentile FROM same_age_patients",
            "gold_answer": "[['0.882']]",
            "pred_answer": "[['87.5']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes how the percentile is computed. Instead of using PERCENT_RANK() to return a value between 0 and 1 for the specific measurement (as required), it computes a cumulative distribution percentage (count \u2264 128 / total count * 100). This is a logical error affecting the entire query\u2019s approach to percentile calculation, not a minor syntax or column reference issue.",
            "error_type": "Global: incorrect percentile calculation"
        }
    },
    "llm2sql": {
        "4c81362dc30f4548514eecc8": {
            "db_id": "mimic_iv",
            "generator_name": "llm2sql",
            "question": "List the top five most frequent microbiology tests given to patients after being diagnosed with hypokalemia within the same hospital visit since 1 year ago.",
            "gold_sql": "SELECT T3.test_name FROM ( SELECT T2.test_name, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime, admissions.hadm_id FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'hypokalemia' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.test_name, microbiologyevents.charttime, admissions.hadm_id FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.test_name ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH hypo_diagnoses AS ( SELECT DISTINCT di.subject_id, di.hadm_id, di.charttime FROM diagnoses_icd di JOIN d_icd_diagnoses dicd ON di.icd_code = dicd.icd_code JOIN admissions a ON di.hadm_id = a.hadm_id WHERE dicd.long_title LIKE '%hypokalemia%' AND datetime(di.charttime) >= datetime('2100-12-31', '-1 year') ), tests_after_diagnoses AS ( SELECT me.test_name, me.charttime FROM microbiologyevents me JOIN hypo_diagnoses hd ON me.hadm_id = hd.hadm_id WHERE datetime(me.charttime) > datetime(hd.charttime) ) SELECT test_name FROM tests_after_diagnoses GROUP BY test_name ORDER BY COUNT(*) DESC LIMIT 5;",
            "gold_answer": "[['anaerobic bottle gram stain'], ['blood culture, routine'], ['c. difficile pcr'], ['gram stain'], ['mrsa screen'], ['respiratory culture'], ['urine culture']]",
            "pred_answer": "[['blood culture, routine'], ['gram stain'], ['mrsa screen'], ['respiratory culture'], ['urine culture']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to restrict microbiology events to within the last year (missing the WHERE datetime(me.charttime) >= datetime('2100-12-31 23:59:00','-1 year') filter) and uses LIMIT 5 instead of applying a DENSE_RANK to include ties. Both mistakes are confined to specific clauses (the WHERE filter in the CTE and the ORDER BY/LIMIT vs. DENSE_RANK logic) and can be corrected without altering the overall join structure or logic of the query.",
            "error_type": "Local: filter and ranking clause error"
        }
    },
    "deepseek": {
        "362a8ded48a53e28746e95a6": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the first time that patient 10004422 was prescribed quetiapine fumarate and olanzapine on the last hospital visit at the same time?",
            "gold_sql": "SELECT T1.starttime FROM ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'olanzapine' ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004422 AND admissions.dischtime IS NOT NULL ORDER BY admissions.admittime DESC LIMIT 1 ) AND prescriptions.drug = 'quetiapine fumarate' ) AS T2 ON T1.subject_id = T2.subject_id WHERE datetime(T1.starttime) = datetime(T2.starttime) ORDER BY T1.starttime ASC LIMIT 1",
            "pred_sql": "WITH last_hospital_visit AS ( SELECT hadm_id, admittime, dischtime FROM admissions WHERE subject_id = 10004422 AND dischtime IS NOT NULL ORDER BY admittime DESC LIMIT 1 ), prescriptions_quetiapine AS ( SELECT starttime, stoptime FROM prescriptions WHERE drug = 'quetiapine fumarate' AND hadm_id = (SELECT hadm_id FROM last_hospital_visit) ), prescriptions_olanzapine AS ( SELECT starttime, stoptime FROM prescriptions WHERE drug = 'olanzapine' AND hadm_id = (;",
            "gold_answer": "[['2100-01-23 10:00:00']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is incomplete and contains a syntax error near the semicolon: the CTE for prescriptions_olanzapine ends with \"AND hadm_id = (;\". This is a localized syntax issue that can be fixed by completing the parentheses and CTE definition without altering the overall query logic.",
            "error_type": "Local: syntax error"
        },
        "7f7a2b9981a742290a99b704": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the top five most frequent specimens ordered for patients after being diagnosed with type 2 diabetes mellitus with foot ulcer within 2 months since 1 year ago?",
            "gold_sql": "SELECT T3.spec_type_desc FROM ( SELECT T2.spec_type_desc, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, diagnoses_icd.charttime FROM diagnoses_icd JOIN admissions ON diagnoses_icd.hadm_id = admissions.hadm_id WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'type 2 diabetes mellitus with foot ulcer' ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, microbiologyevents.spec_type_desc, microbiologyevents.charttime FROM microbiologyevents JOIN admissions ON microbiologyevents.hadm_id = admissions.hadm_id WHERE datetime(microbiologyevents.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.spec_type_desc ) AS T3 WHERE T3.C1 <= 5",
            "pred_sql": "WITH diabetes_foot_ulcer_patients AS ( SELECT DISTINCT d1.subject_id FROM diagnoses_icd d1 JOIN d_icd_diagnoses dd1 ON d1.icd_code = dd1.icd_code WHERE dd1.long_title LIKE '%type 2 diabetes mellitus%' AND d1.charttime >= datetime('2100-12-31 23:59:00', '-1 year') AND EXISTS ( SELECT 1 FROM diagnoses_icd d2 JOIN d_icd_diagnoses dd2 ON d2.icd_code = dd2.icd_code WHERE d2.subject_id = d1.subject_id AND dd2.long_title LIKE '%foot ulcer;",
            "gold_answer": "[['blood culture'], ['mrsa screen'], ['sputum'], ['stool'], ['urine']]",
            "pred_answer": "unrecognized token: \"'%foot ulcer;\"",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically invalid: it\u2019s truncated after LIKE '%foot ulcer; without closing quotes or parentheses, leading to an 'unrecognized token' error. This is a localized syntax/parse error in the WHERE clause and does not involve the overall structure or logic of the query.",
            "error_type": "Local: syntax error"
        },
        "b1f566e193cdc79472613929": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "When was the last time that patient 10019917 was diagnosed with acute posthemorrhagic anemia since 1 year ago?",
            "gold_sql": "SELECT diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'acute posthemorrhagic anemia' ) AND diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10019917 ) AND datetime(diagnoses_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ORDER BY diagnoses_icd.charttime DESC LIMIT 1",
            "pred_sql": "SELECT MAX(d.charttime) AS last_diagnosis_time FROM diagnoses_icd d JOIN d_icd_diagnoses dd ON d.icd_code = dd.icd_code WHERE d.subject_id = 10019917 AND dd.long_title = 'Acute posthemorrhagic anemia' AND d.charttime >= datetime('2100-12-31 23:59:00', '-1 year');",
            "gold_answer": "[['2100-01-27 23:25:00']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it filters on dd.long_title = 'Acute posthemorrhagic anemia' (capitalized 'A') while the actual title in the database is lowercase 'acute posthemorrhagic anemia'. This mismatch in the string literal prevents any rows from matching. All other aspects of the query structure remain correct, and no global restructuring of joins or logic is needed\u2014only the literal in the WHERE clause must be corrected.",
            "error_type": "Local: value parsing error"
        },
        "8f66cff426954aa52eb928cc": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What drug was prescribed to patient 10004720 within the same day subsequent to their age-related osteoporosis without current pathological fracture diagnosis this month?",
            "gold_sql": "SELECT T2.drug FROM ( SELECT diagnoses_icd.subject_id, diagnoses_icd.charttime FROM diagnoses_icd WHERE diagnoses_icd.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND diagnoses_icd.icd_code = ( SELECT d_icd_diagnoses.icd_code FROM d_icd_diagnoses WHERE d_icd_diagnoses.long_title = 'age-related osteoporosis without current pathological fracture' ) AND datetime(diagnoses_icd.charttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T1 JOIN ( SELECT prescriptions.subject_id, prescriptions.drug, prescriptions.starttime FROM prescriptions WHERE prescriptions.hadm_id IN ( SELECT admissions.hadm_id FROM admissions WHERE admissions.subject_id = 10004720 ) AND datetime(prescriptions.starttime,'start of month') = datetime('2100-12-31 23:59:00','start of month','-0 month') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND datetime(T1.charttime,'start of day') = datetime(T2.starttime,'start of day')",
            "pred_sql": "SELECT DISTINCT p.drug FROM prescriptions p JOIN diagnoses_icd d_icd ON p.hadm_id = d_icd.hadm_id AND p.subject_id = d_icd.subject_id JOIN d_icd_diagnoses diag ON d_icd.icd_code = diag.icd_code WHERE diag.long_title LIKE '%age-related osteoporosis%' AND p.starttime >= datetime(d_icd.charttime, 'start of day') AND p.starttime < datetime(d_icd.charttime, 'start of day', '+1 day') AND datetime(d_icd.charttime, 'start of month') = datetime('2100-12-31 23:59:00', 'start of month', '-0 month') AND NOT EXISTS ( SELECT;",
            "gold_answer": "[['0.9% sodium chloride'], ['bag'], ['bag'], ['bag'], ['famotidine'], ['midazolam'], ['soln']]",
            "pred_answer": "near \";\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL is syntactically incomplete\u2014 it ends abruptly with \"AND NOT EXISTS ( SELECT;\" and has no closing parentheses or subquery, resulting in a SQLite syntax error. This is a localized mistake in the query structure (missing subquery and parentheses) rather than a logical misjoin or overall structural flaw.",
            "error_type": "Local: syntax error"
        },
        "ceb9d85c5fba69ed153192b8": {
            "db_id": "mimic_iv",
            "generator_name": "deepseek",
            "question": "What are the four most commonly prescribed drugs for patients that received percutaneous abdominal drainage within the same hospital visit?",
            "gold_sql": "SELECT T3.drug FROM ( SELECT T2.drug, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime, admissions.hadm_id FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'percutaneous abdominal drainage' ) ) AS T1 JOIN ( SELECT admissions.subject_id, prescriptions.drug, prescriptions.starttime, admissions.hadm_id FROM prescriptions JOIN admissions ON prescriptions.hadm_id = admissions.hadm_id ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.starttime AND T1.hadm_id = T2.hadm_id GROUP BY T2.drug ) AS T3 WHERE T3.C1 <= 4",
            "pred_sql": "WITH drainage_patients AS ( SELECT DISTINCT p.subject_id, p.hadm_id FROM procedures_icd p JOIN d_icd_procedures dp ON p.icd_code = dp.icd_code WHERE dp.long_title LIKE '%percutaneous abdominal drainage%' ) SELECT drug, COUNT(DISTINCT dp.subject_id) as count FROM prescriptions JOIN drainage_patients dp ON prescriptions.subject_id = dp.subject_id AND prescriptions.hadm_id = dp.hadm_id GROUP BY drug ORDER BY count DESC LIMIT 4;",
            "gold_answer": "[['0.9% sodium chloride (mini bag plus)'], ['5% dextrose'], ['furosemide'], ['insulin'], ['sodium chloride 0.9%  flush']]",
            "pred_answer": "[['5% dextrose', '4.0'], ['furosemide', '4.0'], ['insulin', '4.0'], ['sodium chloride 0.9%  flush', '5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls all prescriptions for patients who had the drainage procedure in the same hospital visit but does not enforce that the prescriptions occurred after the drainage (i.e., it omits the temporal condition T1.charttime < T2.starttime). This omission changes the overall query logic and results, so it is not a simple local fix but a fundamental logical error.",
            "error_type": "Global: missing temporal condition"
        }
    },
    "o4-mini": {
        "853d5d23a05c7c89f691689c": {
            "db_id": "mimic_iv",
            "generator_name": "o4-mini",
            "question": "Fetch the top three most frequent procedures that are administered to patients within 2 months after they had introduction of nutritional substance into lower gi, via natural or artificial opening since 1 year ago.",
            "gold_sql": "SELECT d_icd_procedures.long_title FROM d_icd_procedures WHERE d_icd_procedures.icd_code IN ( SELECT T3.icd_code FROM ( SELECT T2.icd_code, DENSE_RANK() OVER ( ORDER BY COUNT(*) DESC ) AS C1 FROM ( SELECT admissions.subject_id, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE procedures_icd.icd_code = ( SELECT d_icd_procedures.icd_code FROM d_icd_procedures WHERE d_icd_procedures.long_title = 'introduction of nutritional substance into lower gi, via natural or artificial opening' ) AND datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T1 JOIN ( SELECT admissions.subject_id, procedures_icd.icd_code, procedures_icd.charttime FROM procedures_icd JOIN admissions ON procedures_icd.hadm_id = admissions.hadm_id WHERE datetime(procedures_icd.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) AS T2 ON T1.subject_id = T2.subject_id WHERE T1.charttime < T2.charttime AND datetime(T2.charttime) BETWEEN datetime(T1.charttime) AND datetime(T1.charttime,'+2 month') GROUP BY T2.icd_code ) AS T3 WHERE T3.C1 <= 3 )",
            "pred_sql": "WITH intro AS ( SELECT p.subject_id, p.hadm_id, p.charttime AS intro_time, p.row_id AS intro_row FROM procedures_icd p JOIN d_icd_procedures d ON p.icd_code = d.icd_code WHERE d.long_title LIKE '%nutritional substance%' AND d.long_title LIKE '%lower gastrointestinal%' AND datetime(p.charttime) >= datetime('2100-12-31 23:59:00','-1 year') ) SELECT d2.long_title FROM intro i JOIN procedures_icd p2 ON p2.subject_id = i.subject_id AND p2.hadm_id = i.hadm_id JOIN d_icd_procedures d2 ON p2.icd_code = d2.icd_code WHERE datetime(p2.charttime) > datetime(i.intro_time) AND datetime(p2.charttime) <= datetime(i.intro_time, '+2 months') AND p2.row_id <> i.intro_row GROUP BY d2.long_title ORDER BY COUNT(*) DESC LIMIT 3;",
            "gold_answer": "[['drainage of left pleural cavity, percutaneous approach'], ['drainage of left pleural cavity, percutaneous approach, diagnostic'], ['drainage of pericardial cavity with drainage device, percutaneous approach'], ['insertion of endotracheal airway into trachea, via natural or artificial opening'], ['respiratory ventilation, greater than 96 consecutive hours']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a CTE (intro) for the initial procedure but then restricts subsequent procedures to the same hadm_id (hospital admission). The ground truth logic only matches on subject_id (patient-level) within the 2-month window, allowing procedures across different admissions. By joining on both subject_id and hadm_id, the predicted query erroneously excludes valid procedures that occur in a different admission within the two-month period, fundamentally altering the intended result set. This is not a simple typo or localized mistake\u2014it changes the core logic of how procedures are matched to the introduction event.",
            "error_type": "Global: incorrect join logic"
        }
    }
}