{
    "llm2sql": {
        "015ae8de2f7597f3308e719345bd99de": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "how many patients are admitted under emergency room and diagnosed with mixed acid-base bal dis? (Values: EMERGENCY ROOM ADMIT, Mixed acid-base bal dis)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Mixed acid-base bal dis\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC D INNER JOIN DIAGNOSES DI ON D.SUBJECT_ID = DI.SUBJECT_ID AND D.HADM_ID = DI.HADM_ID WHERE D.ADMISSION_TYPE = 'EMERGENCY ROOM ADMIT' AND DI.LONG_TITLE = 'Mixed acid-base bal dis'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the wrong columns in the WHERE clause: it filters on D.ADMISSION_TYPE instead of DEMOGRAPHIC.ADMISSION_LOCATION, and on DI.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE. The overall query structure (JOIN, COUNT DISTINCT) remains correct, so these are isolated column reference mistakes that can be fixed without altering the query\u2019s logic.",
            "error_type": "Local: table/column reference errors"
        },
        "809481545e7a77fa4c823f05ba51741b": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the total number of patients who had icd9 code 4105",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE PROCEDURES.\"ICD9_CODE\" = \"4105\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE ICD9_CODE = '4105';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table instead of the PROCEDURES table and omits the join to DEMOGRAPHIC. This is not a simple typo or missing condition in one clause\u2014it changes the core tables and relationships used in the query, altering the intended logic of counting patients who had the ICD9 procedure code. Such an error affects the overall structure and meaning of the query and requires reworking multiple parts (table selection and joins).",
            "error_type": "Global: incorrect table used"
        },
        "0be089824bb2caf402523cf69d0356a8": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "Tell me the procedure icd9 code and the primary disease for the patient Brian Brock.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",PROCEDURES.\"ICD9_CODE\" FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"NAME\" = \"Brian Brock\"",
            "pred_sql": "SELECT P.ICD9_CODE, D.LONG_TITLE n FROM DEMOGRAPHIC AS DEM n JOIN PROCEDURES AS P ON DEM.SUBJECT_ID = P.SUBJECT_ID AND DEM.HADM_ID = P.HADM_ID n JOIN DIAGNOSES AS D ON DEM.SUBJECT_ID = D.SUBJECT_ID AND DEM.HADM_ID = D.HADM_ID n WHERE DEM.NAME = 'Brian Brock';",
            "gold_answer": "[['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3613.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3615.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3893.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '3961.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '8872.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9604.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9671.0'], ['CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT /SDA', '9904.0']]",
            "pred_answer": "near \"n\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL includes stray 'n' tokens after aliases and column selections (e.g., 'DEM n', 'D.LONG_TITLE n'), causing a syntax error near 'n'. This is a localized mistake in aliasing/syntax rather than a logical issue spanning multiple clauses or tables.",
            "error_type": "Local: syntax error"
        }
    },
    "deepseek": {
        "82d3695ec5d890238e93c72e3a314ced": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "how any patients have been given nitroglycerin ointment 2%? (Values: Nitroglycerin Ointment  2%)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Nitroglycerin Ointment  2%\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM PRESCRIPTIONS WHERE DRUG = 'Nitroglycerin Ointment 2%';",
            "gold_answer": "[['4.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses PRESCRIPTIONS.DRUG = 'Nitroglycerin Ointment 2%' (one space) whereas the actual value in the data is 'Nitroglycerin Ointment  2%' (two spaces). This is a mismatch in the literal string used in the WHERE clause. The rest of the query structure (COUNT DISTINCT on SUBJECT_ID in PRESCRIPTIONS) is fine. The error is confined to an incorrect literal value in the WHERE clause and does not affect the overall query logic or table relationships.",
            "error_type": "Local: value parsing error"
        },
        "a098cd2cff8c0c939b8ee19c13e38c43": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Provide the number of patients that died and had a primary disease of ST elevated myocardial infarction/cardiac cath. (Values: 1, ST ELEVATED MYOCARDIAL INFARCTION\\CARDIAC CATH)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"ST ELEVATED MYOCARDIAL INFARCTION\\CARDIAC CATH\"",
            "pred_sql": "error in generating sql: No",
            "gold_answer": "[['0.0']]",
            "pred_answer": "null",
            "is_correct": false,
            "reasoning": "The predicted SQL failed to produce any valid query (\u2018error in generating sql: No\u2019), indicating a localized generation/syntax issue rather than a flaw in the overall query logic or structure. No tables, joins, or conditions were even attempted, so it\u2019s a fixable, localized syntax/generation error.",
            "error_type": "Local: syntax error"
        }
    },
    "codes": {
        "817fe7ae70c2b5623a475f9d9081d340": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "what is the maximum age of married patients aged 47 years or more? (Values: MARRIED)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"MARITAL_STATUS\" = \"MARRIED\" AND DEMOGRAPHIC.\"AGE\" >= \"47\"",
            "pred_sql": "SELECT max(demographic.age) FROM demographic INNER JOIN prescriptions ON demographic.hadm_id = prescriptions.hadm_id WHERE prescriptions.drug_type = 'BASE' AND demographic.marital_status = 'MARRIED' AND demographic.age >= 47",
            "gold_answer": "[['89.0']]",
            "pred_answer": "[['82.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN with the PRESCRIPTIONS table and filters on prescriptions.drug_type = 'BASE', which is not part of the original question. This join and extra filter changes the overall logic and scope of the query by restricting the population to patients with a specific prescription type. Fixing this would require removing the join and filter entirely, which is more than a localized syntax or reference fix\u2014it alters the fundamental structure and intended meaning of the query.",
            "error_type": "Global: join error"
        }
    },
    "dinsql": {
        "9c185af46be35cab5c7f0bc760c202b5": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many patients have been diagnosed with unspecified pseudomonas infection? (Values: Pseudomonas infect NOS)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Pseudomonas infect NOS\"",
            "pred_sql": "SELECT COUNT(DISTINCT SUBJECT_ID) FROM DIAGNOSES WHERE LONG_TITLE = 'Pseudomonas infect NOS'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL filters on DIAGNOSES.LONG_TITLE instead of DIAGNOSES.SHORT_TITLE. The ground truth uses SHORT_TITLE = 'Pseudomonas infect NOS' to match the exact wording; using LONG_TITLE yields no rows. Everything else (counting distinct SUBJECT_ID and table usage) is structurally fine. This is a localized mistake in the column reference within the WHERE clause.",
            "error_type": "Local: incorrect column reference"
        }
    },
    "macsql": {
        "9ce3e6d4028ec80eb96214edc77b6496": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "How many patients had a lab test for sodium urine and were treated with a base drug? (Values: MAIN, Sodium, Urine)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID INNER JOIN LAB on DEMOGRAPHIC.HADM_ID = LAB.HADM_ID WHERE PRESCRIPTIONS.\"DRUG_TYPE\" = \"MAIN\" AND LAB.\"LABEL\" = \"Sodium, Urine\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM ( SELECT DISTINCT `SUBJECT_ID` FROM LAB WHERE `LABEL` = 'Sodium' AND `FLUID` = 'Urine' ) AS T1 INNER JOIN ( SELECT DISTINCT `SUBJECT_ID` FROM PRESCRIPTIONS WHERE `DRUG_TYPE` = 'BASE' ) AS T2 ON T1.`SUBJECT_ID` = T2.`SUBJECT_ID`",
            "gold_answer": "[['15.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL is structurally similar to the ground truth (it performs two subqueries and then joins on SUBJECT_ID), but it gets the filtering criteria wrong in the WHERE clauses.  Specifically, it uses DRUG_TYPE = 'BASE' instead of 'MAIN' and splits the LAB label into LABEL = 'Sodium' and FLUID = 'Urine' rather than using the single label 'Sodium, Urine'.  These are errors localized to the WHERE conditions (wrong literal values) and do not require changing the overall query structure.",
            "error_type": "Local: value parsing error"
        },
        "86e3e3a08a930e116dd8720746f1458a": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "provide me the primary disease and diagnosis short title of the patient with patient id 9575.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"9575\"",
            "pred_sql": "SELECT `LONG_TITLE`, `SHORT_TITLE` FROM DIAGNOSES WHERE `SUBJECT_ID` = 9575",
            "gold_answer": "[['CONGESTIVE HEART FAILURE', 'Abn involun movement NEC'], ['CONGESTIVE HEART FAILURE', 'Ac on chr diast hrt fail'], ['CONGESTIVE HEART FAILURE', 'Acute kidney failure NOS'], ['CONGESTIVE HEART FAILURE', 'Adv eff benzodiaz tranq'], ['CONGESTIVE HEART FAILURE', 'Anemia NOS'], ['CONGESTIVE HEART FAILURE', 'Atriovent block complete'], ['CONGESTIVE HEART FAILURE', 'CHF NOS'], ['CONGESTIVE HEART FAILURE', 'Cardiac arrest'], ['CONGESTIVE HEART FAILURE', 'Cardiogenic shock'], ['CONGESTIVE HEART FAILURE', 'Chr airway obstruct NEC'], ['CONGESTIVE HEART FAILURE', 'Chr pulmon heart dis NEC'], ['CONGESTIVE HEART FAILURE', 'Constipation NOS'], ['CONGESTIVE HEART FAILURE', 'Depend-supplement oxygen'], ['CONGESTIVE HEART FAILURE', 'Depress psychosis-unspec'], ['CONGESTIVE HEART FAILURE', 'Do not resusctate status'], ['CONGESTIVE HEART FAILURE', 'Encountr palliative care'], ['CONGESTIVE HEART FAILURE', 'Esophageal reflux'], ['CONGESTIVE HEART FAILURE', 'Food/vomit pneumonitis'], ['CONGESTIVE HEART FAILURE', 'Full incontinence-feces'], ['CONGESTIVE HEART FAILURE', 'Hx of breast malignancy'], ['CONGESTIVE HEART FAILURE', 'Hx traumatic fracture'], ['CONGESTIVE HEART FAILURE', 'Hyp ht dis NOS w ht fail'], ['CONGESTIVE HEART FAILURE', 'Hyperosmolality'], ['CONGESTIVE HEART FAILURE', 'Leukocytosis NOS'], ['CONGESTIVE HEART FAILURE', 'Mitral valve disorder'], ['CONGESTIVE HEART FAILURE', 'Osteoporosis NOS'], ['CONGESTIVE HEART FAILURE', 'Other alter consciousnes'], ['CONGESTIVE HEART FAILURE', 'Panic dis w/o agorphobia'], ['CONGESTIVE HEART FAILURE', 'Prim cardiomyopathy NEC'], ['CONGESTIVE HEART FAILURE', 'Schizoaffective dis NOS'], ['CONGESTIVE HEART FAILURE', 'Suicidal ideation'], ['CONGESTIVE HEART FAILURE', 'Thrombocytopenia NOS'], ['CONGESTIVE HEART FAILURE', 'Wheelchair dependence']]",
            "pred_answer": "[['Abnormal involuntary movements', 'Abn involun movement NEC'], ['Acute kidney failure, unspecified', 'Acute kidney failure NOS'], ['Acute on chronic diastolic heart failure', 'Ac on chr diast hrt fail'], ['Anemia, unspecified', 'Anemia NOS'], ['Atrioventricular block, complete', 'Atriovent block complete'], ['Benzodiazepine-based tranquilizers causing adverse effects in therapeutic use', 'Adv eff benzodiaz tranq'], ['Cardiac arrest', 'Cardiac arrest'], ['Cardiogenic shock', 'Cardiogenic shock'], ['Chronic airway obstruction, not elsewhere classified', 'Chr airway obstruct NEC'], ['Congestive heart failure, unspecified', 'CHF NOS'], ['Constipation, unspecified', 'Constipation NOS'], ['Do not resuscitate status', 'Do not resusctate status'], ['Encounter for palliative care', 'Encountr palliative care'], ['Esophageal reflux', 'Esophageal reflux'], ['Full incontinence of feces', 'Full incontinence-feces'], ['Hyperosmolality and/or hypernatremia', 'Hyperosmolality'], ['Leukocytosis, unspecified', 'Leukocytosis NOS'], ['Major depressive affective disorder, single episode, unspecified', 'Depress psychosis-unspec'], ['Mitral valve disorders', 'Mitral valve disorder'], ['Osteoporosis, unspecified', 'Osteoporosis NOS'], ['Other alteration of consciousness', 'Other alter consciousnes'], ['Other chronic pulmonary heart diseases', 'Chr pulmon heart dis NEC'], ['Other dependence on machines, supplemental oxygen', 'Depend-supplement oxygen'], ['Other primary cardiomyopathies', 'Prim cardiomyopathy NEC'], ['Panic disorder without agoraphobia', 'Panic dis w/o agorphobia'], ['Personal history of malignant neoplasm of breast', 'Hx of breast malignancy'], ['Personal history of traumatic fracture', 'Hx traumatic fracture'], ['Pneumonitis due to inhalation of food or vomitus', 'Food/vomit pneumonitis'], ['Schizoaffective disorder, unspecified', 'Schizoaffective dis NOS'], ['Suicidal ideation', 'Suicidal ideation'], ['Thrombocytopenia, unspecified', 'Thrombocytopenia NOS'], ['Unspecified hypertensive heart disease with heart failure', 'Hyp ht dis NOS w ht fail'], ['Wheelchair dependence', 'Wheelchair dependence']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the DEMOGRAPHIC table entirely and fails to retrieve the primary disease field (DEMOGRAPHIC.DIAGNOSIS). Instead, it only selects LONG_TITLE and SHORT_TITLE from DIAGNOSES, thus missing the required join on HADM_ID and the DEMOGRAPHIC.DIAGNOSIS column. This is not a simple typo or localized mistake but a fundamental structural error that changes the intended query logic and output.",
            "error_type": "Global: join error"
        }
    },
    "qwen3": {
        "a923f3ce780d550018dd0df1f1e55570": {
            "db_id": "mimicsql",
            "generator_name": "qwen3",
            "question": "what number of patients were given the drug oxycodone sr (oxycontin)? (Values: Oxycodone SR (OxyconTIN))",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PRESCRIPTIONS on DEMOGRAPHIC.HADM_ID = PRESCRIPTIONS.HADM_ID WHERE PRESCRIPTIONS.\"DRUG\" = \"Oxycodone SR (OxyconTIN)\"",
            "pred_sql": "SELECT COUNT(DISTINCT p.SUBJECT_ID) FROM PRESCRIPTIONS p WHERE p.DRUG = 'Oxycodone SR (Oxycontin)';",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly counts distinct SUBJECT_IDs but uses the wrong drug string literal ('Oxycodone SR (Oxycontin)' instead of the exact 'Oxycodone SR (OxyconTIN)'). This mismatch in the WHERE clause is a localized value parsing error\u2014it doesn\u2019t change the overall query structure or join logic, only the literal value, so it\u2019s a Local error.",
            "error_type": "Local: value parsing error"
        }
    }
}