{
    "dinsql": {
        "9b7f5c51ee9fdcfbba316fb48e8e6722": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the drug route and drug code of the drug succinylcoline? (Values: Succinylcholine)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Succinylcholine\"",
            "pred_sql": "SELECT ROUTE, FORMULARY_DRUG_CD FROM PRESCRIPTIONS WHERE DRUG = 'Succinylcoline'",
            "gold_answer": "[['SUCC20I', 'IV']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query uses the wrong literal value 'Succinylcoline' instead of the correct 'Succinylcholine'. This is a localized error in the WHERE clause (a value parsing/literal mismatch) and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "844e56db40ebdffe688da456b2860342": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "Provide me the number of patients born in or before 2179 who have left internal jugular vein thrombosis; left arm edema primary disease. (Values: LEFT INTERNAL JUGULAR VEIN THROMBOSIS;LEFT ARM EDEMA, 2179.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"LEFT INTERNAL JUGULAR VEIN THROMBOSIS;LEFT ARM EDEMA\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2179.0\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D INNER JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.DOB_YEAR <= 2179 AND DI.LONG_TITLE = 'LEFT INTERNAL JUGULAR VEIN THROMBOSIS",
            "gold_answer": "[['1.0']]",
            "pred_answer": "unrecognized token: \"'LEFT INTERNAL JUGULAR VEIN THROMBOSIS\"",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to an unclosed string literal in the WHERE clause (`DI.LONG_TITLE = 'LEFT INTERNAL JUGULAR VEIN THROMBOSIS`), leading to a syntax error. This mistake is confined to a specific part of the query and doesn\u2019t require changing the overall structure or logic.",
            "error_type": "Local: syntax error"
        },
        "0039685df6e3097e7f1e2742a86d106a": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients below 51 years of age are diagnosed with preterm nec 2500+g? (Values: Preterm NEC 2500+g)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"51\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Preterm NEC 2500+g\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.AGE < 51 AND diag.LONG_TITLE = 'Preterm NEC 2500+g'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only differs in a specific part of the WHERE clause: it filters on diag.LONG_TITLE instead of diag.SHORT_TITLE. It also uses an additional SUBJECT_ID join condition (which is equivalent given the schema) and omits quotes around the numeric literal, but these do not change the overall query structure or logical flow. The main mistake is selecting the wrong column for the diagnosis filter, a localized column reference error.",
            "error_type": "Local: column reference error"
        }
    },
    "macsql": {
        "a04053b65b4e0a526f0eb2e1b027f139": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the maximum age of male patients diagnosed with primary disease overdose? (Values: M, OVERDOSE)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"OVERDOSE\"",
            "pred_sql": "SELECT MAX(d.AGE) AS max_age FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.HADM_ID = diag.HADM_ID WHERE d.GENDER = 'M' AND d.AGE IS NOT NULL AND diag.LONG_TITLE IS NOT NULL AND (diag.LONG_TITLE LIKE '%overdose%' OR diag.SHORT_TITLE LIKE '%overdose%')",
            "gold_answer": "[['24.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN to the DIAGNOSES table and uses LIKE searches on LONG_TITLE/SHORT_TITLE instead of filtering the DEMOGRAPHIC.DIAGNOSIS column for the exact value 'OVERDOSE'. This changes the entire logic of how 'overdose' diagnoses are identified and adds unnecessary IS NOT NULL checks, resulting in no matching rows. Fixing this would require restructuring the query\u2019s core logic (removing the join and using the correct column).",
            "error_type": "Global: join logic error"
        },
        "0ebff2e801d7f07f735ed115ff6846e8": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "how many patients are below 44 years of age and followed the procedure uni salpingo-oophor? (Values: Oth uni salpingo-oophor)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"44\" AND PROCEDURES.\"SHORT_TITLE\" = \"Oth uni salpingo-oophor\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS num_patients FROM DEMOGRAPHIC AS T1 INNER JOIN PROCEDURES AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID` AND T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`AGE` < 44 AND T2.`LONG_TITLE` LIKE '%Oth uni salpingo-oophor%'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL deviates only in the WHERE clause by using PROCEDURES.LONG_TITLE with a LIKE pattern instead of PROCEDURES.SHORT_TITLE = 'Oth uni salpingo-oophor'. This is a localized mistake in column choice and comparison operator. The rest of the query structure\u2014including the JOIN\u2014remains correct and does not alter the overall logic. Therefore, this is a Local error.",
            "error_type": "Local: column/comparison error"
        }
    },
    "deepseek": {
        "ba3064e8b2f13a2448df23529f31de36": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of patients with admission location as transfer from hosp/extram and were admitted before the year 2131? (Values: TRANSFER FROM HOSP/EXTRAM)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"TRANSFER FROM HOSP/EXTRAM\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2131\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE ADMISSION_LOCATION = 'TRANSFER FROM HOSP/EXTRAM' AND YEAR(ADMITTIME) < 2131;",
            "gold_answer": "[['3.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the SQLite-unsupported YEAR() function to extract the year from ADMITTIME. This is a local mistake confined to the WHERE clause (an invalid function call), not a deeper logical or structural issue with joins or overall query intent. It can be fixed by replacing YEAR() with the correct SQLite date extraction (e.g., strftime).",
            "error_type": "Local: function reference error"
        },
        "054cab8f7975c46d400a46a17beb5a13": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "which patients had emergency hospital admission? (Values: EMERGENCY)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\"",
            "pred_sql": "SELECT SUBJECT_ID, NAME FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'EMERGENCY';",
            "gold_answer": "[['62.0']]",
            "pred_answer": "[['10317.0', 'Wayne Shelton'], ['14755.0', 'Bessie Wood'], ['15061.0', 'Lue White'], ['15898.0', 'Hector Blair'], ['16438.0', 'Sheila Riley'], ['17519.0', 'Shirley Mckenney'], ['17570.0', 'Paul Dunn'], ['18351.0', 'Andrea Capone'], ['18480.0', 'Brian Taylor'], ['19620.0', 'Dawn Cooper'], ['23602.0', 'Ida Cook'], ['23733.0', 'Dominga Garvin'], ['25167.0', 'Cynthia Robinson'], ['26285.0', 'Cynthia Gomez'], ['26746.0', 'Antonio Mccarthy'], ['28588.0', 'Theresa Vedovelli'], ['29541.0', 'Estrella Carroll'], ['29767.0', 'Warren Baldwin'], ['29961.0', 'Jerry Deberry'], ['3369.0', 'Charles Nilsson'], ['3623.0', 'Joseph Dillman'], ['42067.0', 'Kurt Buczek'], ['42820.0', 'Evelyn Lawrence'], ['42963.0', 'Marilyn Norvell'], ['43220.0', 'Thomas Nazario'], ['4333.0', 'Kenneth Koeller'], ['4589.0', 'Tina Henderson'], ['45962.0', 'Jonathan Wiggins'], ['52012.0', 'Jane Dillard'], ['52118.0', 'John Gartman'], ['53707.0', 'Stephanie Suchan'], ['5506.0', 'Gonzalo Wasson'], ['55094.0', 'Lawrence Wibbens'], ['62296.0', 'Morgan Mcgee'], ['64208.0', 'Darlene Martin'], ['65652.0', 'Troy Friedman'], ['65759.0', 'Francis Baseler'], ['66411.0', 'Jennifer Fulgham'], ['67965.0', 'Richard Gundlach'], ['6983.0', 'Edward Castillo'], ['71798.0', 'Edward Schafer'], ['73843.0', 'Claude Chipman'], ['74345.0', 'Miguel Hodges'], ['76508.0', 'Alice Nixon'], ['81254.0', 'Paul Edwards'], ['813.0', 'Travis Hofman'], ['81923.0', 'Rafael Stewart'], ['83678.0', 'Albert Bauer'], ['8440.0', 'Gwendolyn Hager'], ['85673.0', 'Gus Marques'], ['87275.0', 'Josette Orr'], ['8990.0', 'Michelle Medina'], ['91588.0', 'Elizabeth Bateman'], ['9258.0', 'Bryant Johnson'], ['9271.0', 'Lynette Upham'], ['92796.0', 'Tiffany Williams'], ['93033.0', 'Michael Tyndall'], ['94756.0', 'Betty Campbell'], ['9575.0', 'Melinda Miers'], ['98220.0', 'Bruce Harris'], ['990.0', 'Dawn Brill'], ['99936.0', 'Caren Obrien']]",
            "is_correct": false,
            "reasoning": "The predicted SQL matches the correct table and WHERE clause but fails to apply the COUNT DISTINCT aggregation requested by the question. Instead, it selects SUBJECT_ID and NAME directly. This mistake is isolated to the SELECT clause and does not affect joins or the overall query structure, so it\u2019s a local error in the aggregation/SELECT list.",
            "error_type": "Local: incorrect-aggregation"
        },
        "8745cad27000303a093d0dd8dafeeb7b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients are with medicare insurance and admitted before 2187? (Values: Medicare)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicare\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2187\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS number_of_patients FROM DEMOGRAPHIC WHERE INSURANCE = 'Medicare' AND EXTRACT(YEAR FROM ADMITTIME) < 2187;",
            "gold_answer": "[['39.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the EXTRACT(YEAR FROM ADMITTIME) function, which is not supported in SQLite and leads to a syntax error. This mistake is localized to the WHERE clause (a function call error), and the overall query structure remains correct. Therefore, it is a local error in the use of a temporal extraction function.",
            "error_type": "Local: other-temporal error"
        },
        "855cf4c4ba2b077fb516522ce28ed18b": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "let me know the drug code and route of administration of lidocaine jelly 2% (urojet). (Values: Lidocaine Jelly 2% (Urojet))",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Lidocaine Jelly 2% (Urojet)\"",
            "pred_sql": "SELECT FORMULARY_DRUG_CD AS \"Drug Code\", ROUTE AS \"Route of Administration\" FROM PRESCRIPTIONS WHERE DRUG = 'Lidocaine Jelly 2% (Urojet)';",
            "gold_answer": "[['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP'], ['LID2J', 'TP']]",
            "pred_answer": "[['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J'], ['LID2J']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to return the ROUTE column even though it\u2019s requested in the SELECT clause. This is a localized mistake in the SELECT clause (omitting or mis\u2010referencing a column), and does not affect joins, filters, or the overall query logic. Hence it is classified as a local error.",
            "error_type": "Local: missing-column-reference"
        },
        "83d2f2439a19c3bac564156d4d7f0f20": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients admitted in urgent care who were born before 2074. (Values: URGENT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2074\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE ADMISSION_TYPE = 'URGENT' AND YEAR(DOB) < 2074;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the function YEAR(DOB) which is not supported in SQLite, causing a runtime error. The issue is limited to this localized misuse of a date function and does not affect the overall query logic or table relationships. It can be fixed by replacing YEAR(DOB) with the correct column DOB_YEAR or by parsing the date differently, without restructuring the query.",
            "error_type": "Local: other-temporal error"
        }
    },
    "codes": {
        "06221a83812b03cacce79b7404bb3785": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many patients have the drug code pant40i? (Values: PANT40I)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"PANT40I\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE formulary_drug_cd = 'PANT40I'",
            "gold_answer": "[['27.0']]",
            "pred_answer": "[['37.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all rows in the PRESCRIPTIONS table matching the drug code, without using DISTINCT, so duplicated SUBJECT_IDs across multiple prescriptions are counted multiple times. The ground truth uses COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) to count unique patients. This is a localized mistake in the SELECT/aggregate clause (missing DISTINCT) and does not require restructuring joins or overall logic.",
            "error_type": "Local: missing DISTINCT in COUNT"
        },
        "ad0c2ee55406703b73c20f92102a868d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "let me know the number of emergency hospital admission patients who have docusate sodium prescription. (Values: EMERGENCY, Docusate Sodium)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND PRESCRIPTIONS.\"DRUG\" = \"Docusate Sodium\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM demographic INNER JOIN prescriptions ON demographic.subject_id = prescriptions.subject_id WHERE demographic.admission_type = 'EMERGENCY' AND prescriptions.drug = 'Docusate Sodium'",
            "gold_answer": "[['35.0']]",
            "pred_answer": "[['49.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses demographic.subject_id = prescriptions.subject_id for the JOIN instead of the correct hadm_id relationship, and it also omits DISTINCT, counting prescription rows rather than unique patients. This incorrect join key and aggregation changes the query\u2019s overall logic and produces the wrong result.",
            "error_type": "Global: incorrect join key"
        },
        "041461cfd563e6b139f3910205ec8e31": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the drug code and route for the drug propafenone hcl? (Values: Propafenone HCl)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\",PRESCRIPTIONS.\"ROUTE\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Propafenone HCl\"",
            "pred_sql": "SELECT drug, route FROM prescriptions WHERE drug = 'Propafenone HCl'",
            "gold_answer": "[['PROP225', 'PO'], ['PROP225', 'PO']]",
            "pred_answer": "[['Propafenone HCl', 'PO'], ['Propafenone HCl', 'PO']]",
            "is_correct": false,
            "reasoning": "The predicted SQL selects the wrong column for the drug code: it uses PRESCRIPTIONS.drug instead of PRESCRIPTIONS.FORMULARY_DRUG_CD. This is a localized mistake in the SELECT clause that does not require changing the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "a67acde1ffdd89fb4002c394789b52e8": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many of the patients admitted to emergency had a lab test for magnesium level? (Values: EMERGENCY, Magnesium)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"EMERGENCY\" AND LAB.\"LABEL\" = \"Magnesium\"",
            "pred_sql": "SELECT count(lab.subject_id) FROM lab INNER JOIN demographic ON lab.hadm_id = demographic.hadm_id WHERE demographic.admission_location = 'EMERGENCY' AND lab.label = 'Magnesium'",
            "gold_answer": "[['60.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses demographic.admission_location = 'EMERGENCY' instead of demographic.admission_type = 'EMERGENCY', so it\u2019s filtering on the wrong column. This is a localized mistake in the WHERE clause column reference and doesn\u2019t alter the overall structure or join logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "103c53116811183189bbac6240b1c38e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many married patients died in or before 2165? (Values: MARRIED, 2165.0)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"DOD_YEAR\" <= \"2165.0\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE marital_status = 'MARRIED' AND (dod IS NULL OR strftime('%Y', dod) < '2165') AND expire_flag = 1",
            "gold_answer": "[['7.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the death\u2010filtering logic: it uses dod IS NULL OR strftime('%Y', dod) < '2165' plus expire_flag = 1 instead of DOD_YEAR <= 2165. This is not a minor syntax or naming mistake but a fundamental change in how deaths are identified and filtered, altering the intended result.",
            "error_type": "Global: incorrect death filter logic"
        },
        "0c9f6eae00f98d18ac9f50768c7c5a86": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many asian patients had a base drug type? (Values: ASIAN, BASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"ASIAN\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"BASE\"",
            "pred_sql": "SELECT count(prescriptions.subject_id) FROM prescriptions INNER JOIN demographic ON prescriptions.subject_id = demographic.subject_id WHERE demographic.ethnicity = 'ASIAN' AND prescriptions.drug_type = 'BASE'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['2.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL counts all prescription rows rather than unique patients. It omits the DISTINCT on SUBJECT_ID in the COUNT and thus aggregates the wrong values. The join on SUBJECT_ID instead of HADM_ID also broadens the match but the core mistake is in the aggregation clause. This is a localized error in the SELECT aggregation (missing DISTINCT) and does not require restructuring joins or overall logic.",
            "error_type": "Local: missing-distinct"
        }
    },
    "llm2sql": {
        "9c0e1d982d6a14ff908ec2ba2546d3f1": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients of black/cape verdean ethnicity are primary diagnosed with femoral artery thrombosis? (Values: BLACK/CAPE VERDEAN, FEMORAL ARTERY THROMBOSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/CAPE VERDEAN\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"FEMORAL ARTERY THROMBOSIS\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) nFROM DEMOGRAPHIC d nJOIN DIAGNOSES diag nON d.SUBJECT_ID = diag.SUBJECT_ID nAND d.HADM_ID = diag.HADM_ID nWHERE d.ETHNICITY = 'BLACK/CAPE VERDEAN' nAND diag.LONG_TITLE = 'FEMORAL ARTERY THROMBOSIS';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error near 'DEMOGRAPHIC' due to a missing space between the alias and the FROM keyword ('nFROM' instead of '\\nFROM'). This is a local, easily fixable syntax mistake that does not affect the overall query structure or logic.",
            "error_type": "Local: syntax error"
        },
        "0b265651dd99d7806feaa26008b010e8": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find out the time of admission and primary disease of patient with patient id 4589.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"4589\"",
            "pred_sql": "SELECT T1.ADMITTIME, T2.SHORT_TITLE FROM DEMOGRAPHIC AS T1 JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID WHERE T1.SUBJECT_ID = 4589;",
            "gold_answer": "[['CONGESTIVE HEART FAILURE', '2111-12-17 19:59:00']]",
            "pred_answer": "[['2111-12-17 19:59:00', 'Aftrcre traum fx low leg'], ['2111-12-17 19:59:00', 'CHF NOS'], ['2111-12-17 19:59:00', 'Candidias urogenital NEC'], ['2111-12-17 19:59:00', 'Cardiac arrest'], ['2111-12-17 19:59:00', 'Cardiac dysrhythmias NEC'], ['2111-12-17 19:59:00', 'Critical illness myopthy'], ['2111-12-17 19:59:00', 'DMI neuro nt st uncntrld'], ['2111-12-17 19:59:00', 'DMI renl nt st uncntrld'], ['2111-12-17 19:59:00', 'End stage renal disease'], ['2111-12-17 19:59:00', 'Food/vomit pneumonitis'], ['2111-12-17 19:59:00', 'Hemododialysis hypotensn'], ['2111-12-17 19:59:00', 'Hyp kid NOS w cr kid V'], ['2111-12-17 19:59:00', 'Hyperpotassemia'], ['2111-12-17 19:59:00', 'Idio periph neurpthy NOS'], ['2111-12-17 19:59:00', 'Int inf clstrdium dfcile'], ['2111-12-17 19:59:00', 'Long-term use of insulin'], ['2111-12-17 19:59:00', 'Morbid obesity'], ['2111-12-17 19:59:00', 'Other pulmonary insuff'], ['2111-12-17 19:59:00', 'Pressure ulcer, low back'], ['2111-12-17 19:59:00', 'Rhabdomyolysis'], ['2111-12-17 19:59:00', 'Subendo infarct, initial'], ['2111-12-17 19:59:00', 'Ulcer of heel & midfoot'], ['2111-12-17 19:59:00', 'Urin tract infection NOS']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DEMOGRAPHIC table with DIAGNOSES, returning every diagnosis record for the patient rather than the single primary disease stored in DEMOGRAPHIC.DIAGNOSIS. This changes the fundamental logic and structure of the query, producing multiple rows instead of one. Fixing this requires removing the join and selecting the correct column from DEMOGRAPHIC, not just a small tweak in a clause.",
            "error_type": "Global: flawed join"
        },
        "aafae9c30494946bbd7ac56453a627b2": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients with primary disease overdose were less than 50 years of age? (Values: OVERDOSE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"OVERDOSE\" AND DEMOGRAPHIC.\"AGE\" < \"50\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DIAGNOSES AS D JOIN DEMOGRAPHIC AS DM ON D.SUBJECT_ID = DM.SUBJECT_ID AND D.HADM_ID = DM.HADM_ID WHERE (D.LONG_TITLE LIKE '%OVERDOSE%' OR D.SHORT_TITLE LIKE '%OVERDOSE%') AND DM.AGE < 50",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL changes the overall logic by joining the DIAGNOSES table to DEMOGRAPHIC and filtering on LONG_TITLE/SHORT_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column directly. This alters the intended meaning and structure of the query, requiring a significant rewrite of the query flow (wrong table usage, join logic, and filtering method).",
            "error_type": "Global: incorrect join logic"
        },
        "00648dbdd1cf6863da5597366c79a6f0": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many of the unmarried patients belonged to white ethnic origin? (Values: SINGLE, WHITE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"WHITE\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE MARITAL_STATUS = 'SINGLE' AND ETHNICITY LIKE '%WHITE%'",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['17.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only does COUNT(*) instead of COUNT(DISTINCT SUBJECT_ID), and it uses ETHNICITY LIKE '%WHITE%' instead of an exact equality check. These are mistakes confined to the SELECT aggregation and WHERE condition clauses and do not require reworking the overall query structure or table relationships. Therefore, this is a local error.",
            "error_type": "Local: missing-distinct-and-wrong-comparison"
        }
    }
}