{
    "deepseek": {
        "a9d92fdd4552de6119472381096c41d5": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how many patients with drug type base were diagnosed with diaphragmatic hernia? (Values: Diaphragmatic hernia, BASE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Diaphragmatic hernia\" AND PRESCRIPTIONS.\"DRUG_TYPE\" = \"BASE\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES Diag ON D.SUBJECT_ID = Diag.SUBJECT_ID AND D.HADM_ID = Diag.HADM_ID INNER JOIN PRESCRIPTIONS P ON D.SUBJECT_ID = P.SUBJECT_ID AND D.HADM_ID = P.HADM_ID WHERE Diag.LONG_TITLE = 'Diaphragmatic hernia' AND P.DRUG_TYPE = 'BASE';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column for the diagnosis filter: it checks Diag.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE as required by the question. All joins and overall structure remain correct, and only the column reference in the WHERE clause is incorrect. This is a localized error that can be fixed by changing the column name without altering the query\u2019s logic or structure.",
            "error_type": "Local: column reference error"
        },
        "9bd46aa88780c8ffc2597327bfa6d9bd": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "provide the number of patients whose admission type is urgent. (Values: URGENT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"URGENT\"",
            "pred_sql": "error in generating sql: No",
            "gold_answer": "[['1.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL is just the text \u201cerror in generating sql: No\u201d rather than any valid query. This is a localized syntax/formatting failure and does not involve mis\u2010joining tables or breaking overall logic. It\u2019s a missing or invalid SQL snippet, which is a local error.",
            "error_type": "Local: syntax error"
        },
        "0f58d7c3462afbaf1092a9aaca163031": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "specify type of admission of patient edward castillo (Values: Edward Castillo)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Edward Castillo\"",
            "pred_sql": "SELECT ADMISSION_TYPE FROM DEMOGRAPHIC WHERE NAME = 'EDWARD CASTILLO';",
            "gold_answer": "[['EMERGENCY']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted SQL is nearly identical to the ground truth except for the literal value in the WHERE clause: it uses 'EDWARD CASTILLO' instead of the exact 'Edward Castillo'. This mismatch in the string literal causes no rows to match. The issue is isolated to the value in the WHERE clause and does not affect the overall query structure or logic.",
            "error_type": "Local: value parsing error"
        },
        "09e03098584e026254e4f3b57ab3f524": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "tell me the short title and icd9 code of procedure for patient alice nixon. (Values: Alice Nixon)",
            "gold_sql": "SELECT PROCEDURES.\"ICD9_CODE\",PROCEDURES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Alice Nixon\"",
            "pred_sql": "SELECT P.SHORT_TITLE, P.ICD9_CODE FROM PROCEDURES P INNER JOIN DEMOGRAPHIC D ON P.SUBJECT_ID = D.SUBJECT_ID AND P.HADM_ID = D.HADM_ID WHERE D.NAME = 'Alice Nixon';",
            "gold_answer": "[['3893.0', 'Venous cath NEC'], ['5101.0', 'Percutan aspiration gb'], ['5185.0', 'Endosc sphincterotomy']]",
            "pred_answer": "[['Endosc sphincterotomy', '5185.0'], ['Percutan aspiration gb', '5101.0'], ['Venous cath NEC', '3893.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the two queries is the order of the selected columns: the predicted SQL returns SHORT_TITLE then ICD9_CODE, whereas the expected output (and ground truth SQL\u2019s column order) is ICD9_CODE then SHORT_TITLE. The join structure, WHERE condition, and overall logic are correct. This is a localized mistake in the SELECT clause and doesn\u2019t affect query structure or table relationships.",
            "error_type": "Local: select-column-order error"
        }
    },
    "codes": {
        "9e98c86301154d7baf44b1654b51c1f0": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the number of patients with replaced route of drug administration? (Values: REPLACE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"ROUTE\" = \"REPLACE\"",
            "pred_sql": "SELECT count(subject_id) FROM prescriptions WHERE route = 'REPLACE'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['4.0']]",
            "is_correct": false,
            "reasoning": "The predicted query counts all prescription rows with route = 'REPLACE' rather than counting distinct patients. It uses COUNT(subject_id) instead of COUNT(DISTINCT subject_id), leading to an inflated result. This is a localized mistake in the aggregation clause and does not require restructuring joins or the overall query logic.",
            "error_type": "Local: aggregation error"
        },
        "adae5e26acd2cff0c947689a1e07525e": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "Specify the average age of unmarried patients admitted in or after the year 2156 (Values: SINGLE)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND DEMOGRAPHIC.\"ADMITYEAR\" >= \"2156\"",
            "pred_sql": "SELECT avg(age) FROM demographic WHERE marital_status = 'SINGLE' AND admittime LIKE '2156%'",
            "gold_answer": "[['54.538']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong column and filter in the WHERE clause: it filters on admittime with a LIKE pattern instead of comparing ADMITYEAR >= 2156. This mistake is localized to the condition and does not affect the overall structure or logic beyond that filter. Hence, it is a local error in the WHERE clause.",
            "error_type": "Local: WHERE clause filter error"
        },
        "0f06b77b423037f77d9cf551771a846d": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the maximum age of medicaid patients who belong to black/cape verdean ethnic background. (Values: Medicaid, BLACK/CAPE VERDEAN)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"INSURANCE\" = \"Medicaid\" AND DEMOGRAPHIC.\"ETHNICITY\" = \"BLACK/CAPE VERDEAN\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE procedures.icd9_code = 99591 WHERE demographic.insurance = 'Medicaid' AND demographic.ethnicity = 'BLACK/CAPE VERDEAN'",
            "gold_answer": "[['None']]",
            "pred_answer": "near \"WHERE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an unnecessary INNER JOIN to the PROCEDURES table and even repeats the WHERE keyword, causing a syntax error. Joining an unrelated table fundamentally changes the structure and intended logic of the query (we only need the DEMOGRAPHIC table) and cannot be fixed by a simple local edit. This is a global schema/logic error.",
            "error_type": "Global: incorrect join logic"
        }
    },
    "macsql": {
        "0f85a50055cfb411dd0b95a57d97191f": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "find the minimum number of days of hospital stay for patients born before the year 2200.",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DOB_YEAR\" > \"2200\"",
            "pred_sql": "SELECT MIN(`DAYS_STAY`) AS min_days_stay FROM DEMOGRAPHIC WHERE `DOB_YEAR` < 2200",
            "gold_answer": "[['None']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The only difference between the ground truth and the predicted SQL is the comparison operator in the WHERE clause (ground truth uses > \"2200\", predicted uses < 2200). This is an error confined to the WHERE condition and does not change the overall structure or the tables involved. Such an incorrect comparator is a localized mistake in the condition, so it is a Local error.",
            "error_type": "Local: incorrect comparison operator"
        }
    },
    "llm2sql": {
        "0ef1bd0f34de8289d8b8043c9600dbb9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Get me the number of unmarried patients who had tonsil and adenoid biopsy. (Values: SINGLE, Tonsil&adenoid biopsy)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"SINGLE\" AND PROCEDURES.\"SHORT_TITLE\" = \"Tonsil&adenoid biopsy\"",
            "pred_sql": "SELECT COUNT(DISTINCT DEMOGRAPHIC.SUBJECT_ID) nFROM DEMOGRAPHIC nJOIN PROCEDURES ON DEMOGRAPHIC.SUBJECT_ID = PROCEDURES.SUBJECT_ID nAND DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID nWHERE DEMOGRAPHIC.MARITAL_STATUS = 'SINGLE' nAND PROCEDURES.LONG_TITLE LIKE '%Tonsil&adenoid biopsy%'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL contains literal 'n' characters before keywords (e.g., 'nFROM', 'nJOIN', 'nWHERE'), causing a syntax error. This is a local, easily fixable formatting/syntax issue that does not alter the overall query logic.",
            "error_type": "Local: syntax error"
        },
        "05c5dbad2999a021a98bc80262a523d1": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "what is the total number of patients who had icd9 code 4601",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"4601\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '4601';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL queries the DIAGNOSES table for ICD9_CODE = '4601' instead of the PROCEDURES table as required by the question. This is not a minor syntax or column reference issue, but a fundamental misuse of the wrong table that changes the entire logic of the query. Fixing it would require replacing the table and potentially adding a join, which is beyond a localized correction.",
            "error_type": "Global: wrong table used"
        },
        "a653e009b44f6b042361642164f758e9": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Look for the patients admitted before 2174 that were diagnosed with sepsis. (Values: Sepsis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2174\" AND DIAGNOSES.\"LONG_TITLE\" = \"Sepsis\"",
            "pred_sql": "SELECT DEMOGRAPHIC.SUBJECT_ID, DEMOGRAPHIC.HADM_ID FROM DEMOGRAPHIC INNER JOIN DIAGNOSES ON DEMOGRAPHIC.SUBJECT_ID = DIAGNOSES.SUBJECT_ID AND DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.ADMITYEAR < 2174 AND DIAGNOSES.LONG_TITLE LIKE '%Sepsis%'",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['18351.0', '136878.0'], ['29541.0', '103957.0'], ['62296.0', '193576.0'], ['67965.0', '145657.0'], ['71798.0', '122790.0'], ['76508.0', '127570.0'], ['83678.0', '172089.0'], ['98220.0', '189416.0']]",
            "is_correct": false,
            "reasoning": "The predicted query fundamentally changes the overall structure and logic: it returns raw SUBJECT_ID and HADM_ID rows instead of counting distinct patients, uses LIKE instead of an exact match, and omits the aggregation entirely. These errors span multiple clauses (SELECT projection, use of aggregation, and WHERE condition) and require rewriting the query logic rather than a localized fix.",
            "error_type": "Global: wrong-aggregation-and-projection"
        },
        "06fdfc5ab31f978981e3b81b323b4d0a": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find the minimum age of patients who stayed in the hospital for 20 days and died before the year 2186. (Values: 2186.0)",
            "gold_sql": "SELECT MIN ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DAYS_STAY\" = \"20\" AND DEMOGRAPHIC.\"DOD_YEAR\" < \"2186.0\"",
            "pred_sql": "SELECT MIN(AGE) FROM DEMOGRAPHIC WHERE DAYS_STAY >= 20 AND DOD_YEAR < 2186;",
            "gold_answer": "[['67.0']]",
            "pred_answer": "[['59.0']]",
            "is_correct": false,
            "reasoning": "The predicted query only changes the operator in the WHERE clause from DAYS_STAY = 20 to DAYS_STAY >= 20, thereby altering which patients are considered. This is a localized mistake in the condition and does not affect the overall query structure or joins.",
            "error_type": "Local: where-clause condition error"
        },
        "88bd504917d82cccd38ba49d6ab39d9d": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "calculate the number of patients diagnosed with icd9 code 29570 whose death status is unspecified",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"ICD9_CODE\" = \"29570\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '29570';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only queries the DIAGNOSES table and counts distinct SUBJECT_IDs with ICD9_CODE = '29570', completely omitting the join to DEMOGRAPHIC and the EXPIRE_FLAG = '0' filter that ensures death status is unspecified. This missing join and missing WHERE condition significantly change the query\u2019s logic and results, affecting multiple clauses and the overall structure.",
            "error_type": "Global: missing death status filter"
        },
        "926a345db323823260da99f203e0010e": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "find the number of patients who are below the age of 41 and lab test category is blood gas. (Values: Blood Gas)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE DEMOGRAPHIC.\"AGE\" < \"41\" AND LAB.\"CATEGORY\" = \"Blood Gas\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) nFROM DEMOGRAPHIC d nJOIN LAB l ON d.SUBJECT_ID = l.SUBJECT_ID nWHERE d.AGE < 41 AND l.CATEGORY = 'Blood Gas';",
            "gold_answer": "[['8.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL has a syntax error: it incorrectly concatenates the alias or newline character \u2018n\u2019 with keywords (nFROM, nJOIN, nWHERE) causing a parsing failure near \u201cDEMOGRAPHIC\u201d. This is a localized formatting/syntax mistake that does not alter the overall query logic or table relationships. It can be fixed by adding the missing spaces/newlines. Therefore it is a Local error.",
            "error_type": "Local: syntax error"
        },
        "00ef3ee46331250704baf2033dad4b10": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients diagnosed under icd9 code 25541 were divorced? (Values: DIVORCED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"DIVORCED\" AND DIAGNOSES.\"ICD9_CODE\" = \"25541\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) nFROM DEMOGRAPHIC AS D nJOIN DIAGNOSES AS DI nON D.SUBJECT_ID = DI.SUBJECT_ID nWHERE DI.ICD9_CODE = '25541' nAND D.MARITAL_STATUS = 'DIVORCED';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"DEMOGRAPHIC\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to parse due to literal 'n' characters inserted before FROM and JOIN, causing a syntax error near \"DEMOGRAPHIC\". This is a localized formatting/syntax mistake in the SELECT/JOIN clause and does not alter the overall query logic or require restructuring of the query.",
            "error_type": "Local: syntax error"
        }
    },
    "qwen3": {
        "93769bf5b268fb4f529260ca8fc17c57": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "how many patients are with death status 0 and with diagnosis chronic diastolic heart failure? (Values: Chronic diastolic heart failure)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"0\" AND DIAGNOSES.\"LONG_TITLE\" = \"Chronic diastolic heart failure\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DEMOGRAPHIC WHERE EXPIRE_FLAG = 0 AND DIAGNOSIS = 'Chronic diastolic heart failure';",
            "gold_answer": "[['3.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fails to join the DIAGNOSES table and instead filters on the DEMOGRAPHIC.DIAGNOSIS column. The correct logic requires an INNER JOIN with DIAGNOSES on HADM_ID and filtering DIAGNOSES.LONG_TITLE. This is not a minor syntax or column name issue but a missing table relationship that fundamentally changes the query\u2019s meaning and results. Such a mistake affects the overall query structure and data associations, so it is a global error.",
            "error_type": "Global: join error"
        }
    },
    "o4-mini": {
        "9599d62908a1ba42d4b04b1abcdbe094": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "How many patients have a diphenhydramine prescription? (Values: DiphenhydrAMINE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"DiphenhydrAMINE\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) AS num_patients FROM PRESCRIPTIONS WHERE UPPER(DRUG) LIKE '%DIPHENHYDRAMINE%';",
            "gold_answer": "[['12.0']]",
            "pred_answer": "[['17.0']]",
            "is_correct": false,
            "reasoning": "The only difference is in the WHERE clause. The ground truth uses an exact match PRESCRIPTIONS.DRUG = \"DiphenhydrAMINE\", whereas the prediction uses UPPER(DRUG) LIKE '%DIPHENHYDRAMINE%', pulling in extra rows. The overall join and counting logic is fine; this is a localized mistake in the filter condition.",
            "error_type": "Local: value parsing error"
        }
    },
    "dinsql": {
        "934102ea4b2c1a74fa970bf8adf637c9": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "count the number of patients whose item id is 51256.",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE LAB.\"ITEMID\" = \"51256\"",
            "pred_sql": "COUNT(DISTINCT SUBJECT_ID) FROM LAB WHERE ITEMID = 51256",
            "gold_answer": "[['74.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the SELECT keyword and so is syntactically invalid (it simply starts with COUNT(\u2026) without a SELECT). This is a minor, localized syntax mistake in the SELECT clause and does not affect the overall query structure or table relationships. It can be fixed by adding the missing SELECT, so it\u2019s a Local error.",
            "error_type": "Local: syntax error"
        },
        "075dfde202da217a38664c06dfb7fc1d": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "what is the maximum number of days of hospital stay for patients with hypertension but not coronary artery disease/cardiac cath? (Values: HYPERTENSION;RULE OUT CORONARY ARTERY DISEASE\\CARDIAC CATH)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"HYPERTENSION;RULE OUT CORONARY ARTERY DISEASE\\CARDIAC CATH\"",
            "pred_sql": "SELECT MAX(D.DAYS_STAY) AS max_days_stay FROM DEMOGRAPHIC D WHERE D.SUBJECT_ID IN ( SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%hypertension%' ) AND D.SUBJECT_ID NOT IN ( SELECT DISTINCT SUBJECT_ID FROM DIAGNOSES WHERE LONG_TITLE LIKE '%coronary artery disease%' OR LONG_TITLE LIKE '%cardiac cath%' )",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['69.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely restructures the logic by querying the DIAGNOSES table and using SUBJECT_ID-level LIKE filters instead of directly filtering the DEMOGRAPHIC.DIAGNOSIS column for the exact composite value. It spans multiple clauses (subqueries on DIAGNOSES, SUBJECT_ID filtering) and fundamentally changes the intended meaning of the query. This requires restructuring the query flow, not just fixing a local mistake.",
            "error_type": "Global: join/logic error"
        },
        "0709ceb7a5a37d731d4c3fb50129fc8b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "tell me the time of admission and primary disease of patient bessie giordano. (Values: Bessie Giordano)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DEMOGRAPHIC.\"ADMITTIME\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Bessie Giordano\"",
            "pred_sql": "SELECT T1.ADMITTIME, T2.LONG_TITLE FROM DEMOGRAPHIC AS T1 INNER JOIN DIAGNOSES AS T2 ON T1.SUBJECT_ID = T2.SUBJECT_ID AND T1.HADM_ID = T2.HADM_ID WHERE T1.NAME = 'Bessie Giordano'",
            "gold_answer": "[['NEWBORN', '2123-02-24 17:12:00']]",
            "pred_answer": "[['2123-02-24 17:12:00', 'Need for prophylactic vaccination and inoculation against viral hepatitis'], ['2123-02-24 17:12:00', 'Observation for suspected genetic or metabolic condition'], ['2123-02-24 17:12:00', 'Observation for suspected infectious condition'], ['2123-02-24 17:12:00', 'Single liveborn, born in hospital, delivered by cesarean section']]",
            "is_correct": false,
            "reasoning": "The user\u2019s question requests the admission time and the primary disease, which in this schema is stored in the DEMOGRAPHIC.DIAGNOSIS column. The ground truth query selects DEMOGRAPHIC.DIAGNOSIS and DEMOGRAPHIC.ADMITTIME directly. The predicted query instead joins to DIAGNOSES and selects DIAGNOSES.LONG_TITLE, yielding multiple rows of secondary diagnoses rather than the single primary diagnosis field. This is not a simple column reference mistake but a fundamentally incorrect join strategy that changes the query\u2019s logic and intended result. It requires restructuring the query to remove the join and select the correct column from DEMOGRAPHIC, so it is a Global error.",
            "error_type": "Global: join error"
        }
    }
}