{
    "llm2sql": {
        "a85f7ee77ab7acd707cc19c80b4acf63": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Count the number of patients born before 2071 who had an emergency room hospital admission. (Values: EMERGENCY ROOM ADMIT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2071\"",
            "pred_sql": "SELECT COUNT(*) FROM DEMOGRAPHIC WHERE DOB_YEAR < 2071 AND ADMISSION_TYPE = 'EMERGENCY ROOM ADMIT';",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column for filtering emergency room admissions: it checks ADMISSION_TYPE instead of ADMISSION_LOCATION. It also uses COUNT(*) rather than COUNT(DISTINCT SUBJECT_ID), but the overall query structure remains correct. These are localized mistakes (incorrect column reference and missing DISTINCT) that do not require changing the join logic or query structure.",
            "error_type": "Local: column reference error"
        },
        "0ad552d2835d64ccd539c3214e2b6d7a": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the icd9 code and marital status of patient id 42067",
            "gold_sql": "SELECT DEMOGRAPHIC.\"MARITAL_STATUS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"42067\"",
            "pred_sql": "SELECT D.ICD9_CODE, DG.MARITAL_STATUS FROM DEMOGRAPHIC DG JOIN DIAGNOSES D ON DG.SUBJECT_ID = D.SUBJECT_ID WHERE DG.SUBJECT_ID = 42067",
            "gold_answer": "[['SINGLE', 'Anemia in neoplastic dis'], ['SINGLE', 'Atrial fibrillation'], ['SINGLE', 'CHF NOS'], ['SINGLE', 'Chr diastolic hrt fail'], ['SINGLE', 'Crbl art ocl NOS w infrc'], ['SINGLE', 'Crnry athrscl natve vssl'], ['SINGLE', 'DMII wo cmp nt st uncntr'], ['SINGLE', 'Dysthymic disorder'], ['SINGLE', 'Encephalopathy NOS'], ['SINGLE', 'Hyperosmolality'], ['SINGLE', 'Intracerebral hemorrhage'], ['SINGLE', 'Malig neo brain NEC'], ['SINGLE', 'Old myocardial infarct'], ['SINGLE', 'Proteus infection NOS'], ['SINGLE', 'Urin tract infection NOS']]",
            "pred_answer": "[['1918.0', 'SINGLE'], ['25000.0', 'SINGLE'], ['2760.0', 'SINGLE'], ['28522.0', 'SINGLE'], ['3004.0', 'SINGLE'], ['34830.0', 'SINGLE'], ['412.0', 'SINGLE'], ['41401.0', 'SINGLE'], ['416.0', 'SINGLE'], ['42731.0', 'SINGLE'], ['4280.0', 'SINGLE'], ['42832.0', 'SINGLE'], ['431.0', 'SINGLE'], ['43491.0', 'SINGLE'], ['5990.0', 'SINGLE']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES on SUBJECT_ID instead of HADM_ID. This isn\u2019t a simple typo or missing column but a fundamentally incorrect relationship between tables that can change the result set (Cartesian\u2010like duplication or filtering differences) and alters the intended logic of matching diagnoses to the correct hospital admission. Fixing it requires changing the join condition at a structural level, so it\u2019s a global error.",
            "error_type": "Global: incorrect join key"
        },
        "8992cac76fa0ec727f4a010671a5ce78": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "tell me the short title of procedure and primary disease of patient steven sepulveda. (Values: Steven Sepulveda)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Steven Sepulveda\"",
            "pred_sql": "SELECT P.SHORT_TITLE AS Procedure_Short_Title, D.SHORT_TITLE AS Primary_Disease_Short_Title nFROM DEMOGRAPHIC DEM nJOIN PROCEDURES P ON DEM.SUBJECT_ID = P.SUBJECT_ID AND DEM.HADM_ID = P.HADM_ID nJOIN DIAGNOSES D ON DEM.SUBJECT_ID = D.SUBJECT_ID AND DEM.HADM_ID = D.HADM_ID nWHERE DEM.NAME = 'Steven Sepulveda';",
            "gold_answer": "[['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', '1 int mam-cor art bypass'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Aortocor bypas-3 cor art'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Dx ultrasound-heart'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Exc/dest hrt lesion open'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Extracorporeal circulat'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT; MYOMECTOMY/SDA', 'Incision of aorta']]",
            "pred_answer": "near \"nFROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a localized syntax mistake: it includes literal \u2018nFROM\u2019, \u2018nJOIN\u2019, and \u2018nWHERE\u2019 tokens instead of proper newline or whitespace separators. This is a parsing error confined to clause formatting and does not change the query\u2019s overall structure or logic. It can be fixed by replacing \u2018nFROM\u2019 with \u2018FROM\u2019 (and similarly for JOIN/WHERE), without altering table relationships or intent.",
            "error_type": "Local: syntax error"
        },
        "a03591944cbd372c579f85a8601246d7": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Provide the number of unmarried patients who have a hematology lab test category. (Values: SINGLE, Hematology)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND LAB.\"CATEGORY\" = \"Hematology\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC D nJOIN LAB L ON D.SUBJECT_ID = L.SUBJECT_ID nWHERE D.MARITAL_STATUS = 'SINGLE' nAND L.CATEGORY = 'Hematology';",
            "gold_answer": "[['26.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to malformed keywords (\u2018nFROM\u2019, \u2018nJOIN\u2019) indicating a simple syntax/newline insertion mistake. This is a localized error in the query text and does not reflect a misunderstanding of the overall logic or table relationships. Hence it is a local syntax error.",
            "error_type": "Local: syntax error"
        },
        "089a137d1d773bd6f945b3accb8eb318": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many of the patients with item id 50976 remained admitted in hospital for more than 16 days?",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"16\" AND LAB.\"ITEMID\" = \"50976\"",
            "pred_sql": "SELECT COUNT(DISTINCT LAB.SUBJECT_ID, LAB.HADM_ID) nFROM LAB nJOIN DEMOGRAPHIC ON LAB.SUBJECT_ID = DEMOGRAPHIC.SUBJECT_ID AND LAB.HADM_ID = DEMOGRAPHIC.HADM_ID nWHERE LAB.ITEMID = 50976 AND DEMOGRAPHIC.DAYS_STAY > 16;",
            "gold_answer": "[['4.0']]",
            "pred_answer": "near \"LAB\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains a syntax error: it writes \u201cnFROM LAB nJOIN\u2026\u201d and \u201cnWHERE\u2026\u201d without proper spacing or alias declaration, causing \u201cnear 'LAB': syntax error.\u201d This is a localized mistake in the FROM/JOIN clause formatting and does not alter the overall query logic. It can be fixed by correcting the spacing and alias syntax without restructuring the query.",
            "error_type": "Local: syntax error"
        }
    },
    "deepseek": {
        "84d545e92b1a3cabb755523ca1f95bd4": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Show the number of patients who are less than 60 years of age and have the drug code morp2i. (Values: 62, MORP2I)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"62\" AND PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" = \"MORP2I\"",
            "pred_sql": "SELECT COUNT(D.SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC D INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.AGE < 60 AND P.FORMULARY_DRUG_CD = 'MORP2I';",
            "gold_answer": "[['6.0']]",
            "pred_answer": "[['5.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses AGE < 60 instead of AGE < 62 (wrong literal value) and omits DISTINCT in the COUNT, both of which are local mistakes in the WHERE and SELECT clauses. The overall join structure and logic remain intact.",
            "error_type": "Local: value-parsing error"
        },
        "0ef4e32954ca70a4dbc73fedfa9a6ba8": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "count the number of patients on norepinephrine prescription who were born before the year 2109. (Values: NORepinephrine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"DOB_YEAR\" < \"2109\" AND PRESCRIPTIONS.\"DRUG\" = \"NORepinephrine\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE P.DRUG = 'norepinephrine' AND CAST(substr(D.DOB, 1, 4) AS INT) < 2109;",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fails because it uses the wrong literal for the drug name ('norepinephrine' instead of the exact 'NORepinephrine') and it parses DOB instead of using the provided DOB_YEAR column. Both issues are confined to the WHERE clause and involve incorrect literal and column usage, without altering the overall join structure or logic of the query.",
            "error_type": "Local: value parsing error"
        },
        "885d89607152d4e994c2d3ea430b4cde": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the number of divorced patients who were admitted before 2120? (Values: DIVORCED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"DIVORCED\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2120\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_DIVORCED_PATIENTS FROM DEMOGRAPHIC WHERE MARITAL_STATUS = 'DIVORCED' AND EXTRACT(YEAR FROM ADMITTIME) < 2120;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the EXTRACT(YEAR FROM ADMITTIME) syntax, which is not supported in SQLite, leading to a syntax error. This mistake is confined to the WHERE clause\u2019s date extraction and does not alter the overall query structure or table relationships.",
            "error_type": "Local: other-temporal error"
        },
        "aa7c9f0f083c93b10c5c1410673a02c2": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients who were admitted before the year 2174 and given the drug cefpodoxime proxetil. (Values: Cefpodoxime Proxetil)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2174\" AND PRESCRIPTIONS.\"DRUG\" = \"Cefpodoxime Proxetil\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE D.ADMITYEAR < 2174 AND P.DRUG = 'cefpodoxime proxetil';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query differs from the ground truth only in the literal values used in the WHERE clause: it uses an unquoted numeric literal for the year (which is fine) and a lowercase string 'cefpodoxime proxetil' instead of the correctly cased 'Cefpodoxime Proxetil'. This mismatch in string casing prevents any rows from matching the filter, but the overall query structure, joins, and logic remain correct. This is a localized error in the value parsing/comparison in the WHERE clause, not a structural or logical join issue.",
            "error_type": "Local: value parsing error"
        }
    },
    "macsql": {
        "ae0a0d761414ab17b03de7bb99fefdf0": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "For patient id 2110, specify name and lab test category",
            "gold_sql": "SELECT DEMOGRAPHIC.\"NAME\",LAB.\"CATEGORY\" FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2110\"",
            "pred_sql": "SELECT (SELECT `NAME` FROM DEMOGRAPHIC WHERE `SUBJECT_ID` = 2110) AS `NAME`, L.`CATEGORY` FROM LAB AS L WHERE L.`SUBJECT_ID` = 2110 GROUP BY L.`CATEGORY`",
            "gold_answer": "[['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology'], ['Chandra Schulman', 'Hematology']]",
            "pred_answer": "[['Chandra Schulman', 'Chemistry'], ['Chandra Schulman', 'Hematology']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally alters the query structure by replacing the intended JOIN with a scalar subquery and adding a GROUP BY on L.CATEGORY. This grouping removes duplicate rows and changes the result set cardinality, which is a global logic change rather than a simple fix in one clause. The error spans multiple components (SELECT, FROM, GROUP BY) and changes the intended meaning of the query.",
            "error_type": "Global: flawed aggregation/join strategy"
        },
        "8c1239d9e940eb0ebcff0e32c2c18d7b": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "Tell me which primary disease the patient Walter Locher is suffering from and whether he is still alive or not.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"EXPIRE_FLAG\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Walter Locher\"",
            "pred_sql": "SELECT `EXPIRE_FLAG` FROM DEMOGRAPHIC WHERE `NAME` = 'Walter Locher'",
            "gold_answer": "[['1.0', 'NEOPLASM OF UNCERTAIN BEHAVIOR OF OTHER LYMPHATIC AND HEMATOPOIETIC TISSUES\\\\BONE MARROW TRANSPLANT']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only selects the EXPIRE_FLAG column and omits the DIAGNOSIS column, whereas the question asks for both the patient\u2019s primary disease (DIAGNOSIS) and whether they are alive (EXPIRE_FLAG). This is a localized mistake in the SELECT clause and does not alter the overall structure or table joins of the query.",
            "error_type": "Local: missing column in SELECT"
        },
        "074e5b4530d549289801849ce3cdde02": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the total number of patients aged below 68 who had abdominal abscess as primary disease  (Values: ABDOMINAL ABSCESS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"ABDOMINAL ABSCESS\" AND DEMOGRAPHIC.\"AGE\" < \"68\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) AS total_patients FROM ( SELECT DISTINCT `SUBJECT_ID` FROM DIAGNOSES WHERE `LONG_TITLE` = 'ABDOMINAL ABSCESS' OR `SHORT_TITLE` = 'ABDOMINAL ABSCESS' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM DEMOGRAPHIC WHERE `AGE` < 68 ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL pulls the disease condition from the DIAGNOSES table (using LONG_TITLE/SHORT_TITLE) and then joins to DEMOGRAPHIC, whereas the question asks for the primary disease recorded directly in DEMOGRAPHIC.DIAGNOSIS. This changes the source of the disease filter and the overall join strategy, fundamentally altering the query logic rather than a small fix in a single clause.",
            "error_type": "Global: join logic error"
        }
    },
    "codes": {
        "060b7c924b230b79b3935a795cfaf37d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "how many of the asian patients had unspecified neutropenia? (Values: ASIAN, Neutropenia, unspecified)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ETHNICITY\" = \"ASIAN\" AND DIAGNOSES.\"LONG_TITLE\" = \"Neutropenia, unspecified\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN lab ON demographic.hadm_id = lab.hadm_id WHERE demographic.ethnicity = 'ASIAN' AND lab.value = 'unspecified' AND lab.label = 'Neutropenia'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such column: lab.value",
            "is_correct": false,
            "reasoning": "The predicted query mistakenly joins the LAB table and references non-existent columns (lab.value and lab.label) instead of using the DIAGNOSES table and its LONG_TITLE column. This is a localized mistake in table/column references and does not require restructuring the overall logic of the query.",
            "error_type": "Local: table/column reference error"
        },
        "a8548cc3da3f7647e4b65fbcb65de4f3": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the gender and icd9 code for patient  id 7578",
            "gold_sql": "SELECT DEMOGRAPHIC.\"GENDER\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"7578\"",
            "pred_sql": "SELECT demographic.gender, diagnoses.icd9_code FROM demographic INNER JOIN diagnoses ON demographic.hadm_id = diagnoses.hadm_id WHERE demographic.subject_id = 7578",
            "gold_answer": "[['F', 'Vaccination NEC']]",
            "pred_answer": "[['F', 'V053'], ['F', 'V290'], ['F', 'V3000']]",
            "is_correct": false,
            "reasoning": "The predicted query joins the DEMOGRAPHIC table with DIAGNOSES and selects ICD9_CODE, whereas the ground truth joins DEMOGRAPHIC with PROCEDURES and selects SHORT_TITLE. This is not a minor syntax or column reference issue but a fundamental change to the tables and the intended join relationship, altering the overall logic of which records are retrieved. Fixing it requires replacing the entire join and target table, not just a column or condition tweak.",
            "error_type": "Global: incorrect table join error"
        },
        "871b48c28ceee54f43ba16f2a5d5dd15": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify the duration of hospital stay and admission location of patient troy friedman (Values: Troy Friedman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DAYS_STAY\",DEMOGRAPHIC.\"ADMISSION_LOCATION\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Troy Friedman\"",
            "pred_sql": "SELECT demographic.days_stay, demographic.admission_location FROM demographic INNER JOIN procedures ON demographic.hadm_id = procedures.hadm_id WHERE demographic.name = 'Troy Friedman'",
            "gold_answer": "[['1.0', 'CLINIC REFERRAL/PREMATURE']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN with the PROCEDURES table on hadm_id. This join changes the overall logic by filtering out the patient (who has no matching PROCEDURES row), resulting in no output. Fixing it requires removing the join, which is a structural change affecting the entire query logic rather than a simple local tweak. Therefore, this is a global error involving an incorrect join that alters the intended meaning.",
            "error_type": "Global: join error"
        },
        "9126eb002106d4c790c7ca9f1745106d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Out of total number of patients admitted before 2198, how many of them were married. (Values: MARRIED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"ADMITYEAR\" < \"2198\"",
            "pred_sql": "SELECT count(hadm_id) FROM demographic WHERE strftime('%Y', admittedtime) < 2198 AND marital_status = 'MARRIED'",
            "gold_answer": "[['34.0']]",
            "pred_answer": "no such column: admittedtime",
            "is_correct": false,
            "reasoning": "The predicted SQL references a non-existent column 'admittedtime' instead of the correct 'ADMITTIME' from the DEMOGRAPHIC table. This is a simple column reference error that can be fixed locally without altering the overall query structure or logic.",
            "error_type": "Local: column reference error"
        },
        "05bac85c2939f9bbec8d57102c7b6156": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "find the minimum days of hospital stay for newborns. (Values: NEWBORN)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"NEWBORN\"",
            "pred_sql": "SELECT min(days_stay) FROM demographic WHERE name = 'NEWBORN'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column in the WHERE clause (filtering on NAME instead of ADMISSION_TYPE). The overall query structure (aggregating MIN on DAYS_STAY) is correct, and only a column reference in the WHERE clause is incorrect.",
            "error_type": "Local: column reference error"
        }
    },
    "dinsql": {
        "8647b4ed3acbcdf65b6a3e4a003a5b5b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the number of patients less than 89 years who are suffering from femoral artery thrombosis primaray disease? (Values: FEMORAL ARTERY THROMBOSIS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"FEMORAL ARTERY THROMBOSIS\" AND DEMOGRAPHIC.\"AGE\" < \"89\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.AGE < 89 AND DI.LONG_TITLE = 'FEMORAL ARTERY THROMBOSIS'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces a JOIN with the DIAGNOSES table and filters on DI.LONG_TITLE, whereas the ground truth uses the DEMOGRAPHIC.DIAGNOSIS column to capture the primary disease. This change affects the overall logic of where the diagnosis is sourced and alters the intended meaning of querying primary diagnoses. It is not a simple typo or localized fix but a structural change in table relationships and data source.",
            "error_type": "Global: incorrect join logic"
        },
        "885fc05dd9e2816d96606e43d38a8e31": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the primary disease of the patient name jerry deberry? (Values: Jerry Deberry)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Jerry Deberry\"",
            "pred_sql": "SELECT T2.LONG_TITLE FROM DEMOGRAPHIC AS T1 INNER JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID AND T1.HADM_ID = T2.HADM_ID WHERE T1.NAME = 'Jerry Deberry' LIMIT 1",
            "gold_answer": "[['S/P FALL']]",
            "pred_answer": "[['Abdominal pain, other specified site']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC to DIAGNOSES and selects DIAGNOSES.LONG_TITLE instead of directly retrieving the DEMOGRAPHIC.DIAGNOSIS column. This is not a simple syntax or alias mistake but a fundamental change in data source and query logic, mistakenly using the diagnoses table rather than the primary disease field in DEMOGRAPHIC. Fixing this would require restructuring the query to remove the join and select the correct column.",
            "error_type": "Global: join error"
        }
    },
    "qwen3": {
        "adce510d770aef3a1b9e9014e766798c": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "give me the number of patients who had single internal mammary-coronary artery bypass. (Values: 1 int mam-cor art bypass)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"SHORT_TITLE\" = \"1 int mam-cor art bypass\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PROCEDURES WHERE ICD9_CODE = 352;",
            "gold_answer": "[['5.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column and value in the WHERE clause: it filters on ICD9_CODE = 352 instead of matching SHORT_TITLE = '1 int mam-cor art bypass'. This is a localized mistake within the WHERE clause (a wrong column/value reference) and does not require restructuring the joins or overall query flow.",
            "error_type": "Local: column reference error"
        }
    }
}