{
    "codes": {
        "a918a0036b56f9087c0676932fa13024": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "How many patients with an elective admission type were discharged at home health care? (Values: ELECTIVE, HOME HEALTH CARE)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_TYPE\" = \"ELECTIVE\" AND DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"HOME HEALTH CARE\"",
            "pred_sql": "SELECT count(demographic.subject_id) FROM demographic INNER JOIN procedures ON procedures.subject_id = demographic.subject_id WHERE demographic.admission_type = 'ELECTIVE' AND demographic.discharge_location = 'HOME HEALTH CARE'",
            "gold_answer": "[['11.0']]",
            "pred_answer": "[['52.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL introduces an INNER JOIN to the PROCEDURES table, which is not required by the question. This join changes the result set by excluding patients without procedures and by counting the same patient multiple times if they have multiple procedures. The original query only filters on DEMOGRAPHIC and uses COUNT(DISTINCT) to get the correct number of unique patients. The unnecessary join fundamentally alters the logic and requires restructuring (removing the join and adding DISTINCT), so this is a global error.",
            "error_type": "Global: incorrect join"
        },
        "050ed27881bce414cfd05ba83a22a4d4": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "provide the number of emergency room admitted patients who were hospitalized for more than 4 days. (Values: EMERGENCY ROOM ADMIT)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"EMERGENCY ROOM ADMIT\" AND DEMOGRAPHIC.\"DAYS_STAY\" > \"4\"",
            "pred_sql": "SELECT count(subject_id) FROM demographic WHERE days_stay > 4",
            "gold_answer": "[['18.0']]",
            "pred_answer": "[['60.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL only applies days_stay > 4 but omits the filter on ADMISSION_LOCATION = \"EMERGENCY ROOM ADMIT\" (and also doesn\u2019t use DISTINCT). These are localized omissions in the WHERE and SELECT clauses and don\u2019t require restructuring joins or the overall logic. Thus it\u2019s a local error in the WHERE clause.",
            "error_type": "Local: missing filter in WHERE clause"
        },
        "ab3f3d61bd231c11a4f22dce7af58d02": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "tell me the number of patients who were diagnosed with unspecified cerebral artery occlusion with cerebral infarction. (Values: Crbl art ocl NOS w infrc)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DIAGNOSES.\"SHORT_TITLE\" = \"Crbl art ocl NOS w infrc\"",
            "pred_sql": "SELECT count(DISTINCT diagnoses.subject_id) FROM diagnoses INNER JOIN procedures ON diagnoses.subject_id = procedures.subject_id WHERE diagnoses.short_title = 'Crbl art ocl NOS w infrc'",
            "gold_answer": "[['2.0']]",
            "pred_answer": "[['1.0']]",
            "is_correct": false,
            "reasoning": "The predicted query incorrectly joins the DIAGNOSES table with the PROCEDURES table rather than with DEMOGRAPHIC (or simply using DEMOGRAPHIC to count distinct patients). This fundamentally changes the logic by restricting the count to patients who also have procedure records, instead of all patients with the specified diagnosis. Fixing this requires restructuring the join, so it is a global error.",
            "error_type": "Global: incorrect join"
        },
        "0c27eb4d99b5a50b55dcea73a2a9f54f": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "specify icd9 code for patient id 14755",
            "gold_sql": "SELECT PROCEDURES.\"LONG_TITLE\" FROM PROCEDURES WHERE PROCEDURES.\"SUBJECT_ID\" = \"14755\"",
            "pred_sql": "SELECT icd9_code FROM procedures WHERE subject_id = 14755",
            "gold_answer": "[['Arterial catheterization'], ['Closed [endoscopic] biopsy of bronchus'], ['Continuous invasive mechanical ventilation for 96 consecutive hours or more'], ['Enteral infusion of concentrated nutritional substances'], ['Spinal tap'], ['Spinal tap'], ['Venous catheterization, not elsewhere classified']]",
            "pred_answer": "[['331.0'], ['331.0'], ['3324.0'], ['3891.0'], ['3893.0'], ['966.0'], ['9672.0']]",
            "is_correct": false,
            "reasoning": "Both queries filter on the same table (PROCEDURES) and the same SUBJECT_ID, so the join and WHERE logic is correct. The only difference is that the predicted SQL selects the icd9_code column instead of the LONG_TITLE column, which is a mistake confined to the SELECT clause and does not change the overall query structure or logic.",
            "error_type": "Local: wrong-column reference"
        },
        "046dedd9201c79ad138cee000f7e9354": {
            "db_id": "mimicsql",
            "generator_name": "codes",
            "question": "calculate the average age of female patients who were discharged to long term care hospital. (Values: F, LONG TERM CARE HOSPITAL)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"AGE\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"F\" AND DEMOGRAPHIC.\"DISCHARGE_LOCATION\" = \"LONG TERM CARE HOSPITAL\"",
            "pred_sql": "SELECT avg(demographic.age) FROM demographic INNER JOIN procedures ON demographic.subject_id = procedures.subject_id WHERE procedures.icd9_code IN (36428, 36429, 36430) AND demographic.gender = 'F'",
            "gold_answer": "[['0.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL completely changes the query\u2019s logic by introducing an unnecessary JOIN to the PROCEDURES table, filtering on ICD9 codes, and omitting the required discharge location filter. This is not a minor syntax or column error but a fundamental restructuring that no longer matches the question\u2019s intent. The join and filter strategy would need to be redone to correctly target DEMOGRAPHIC.DISCHARGE_LOCATION, so this is a global error.",
            "error_type": "Global: join error"
        }
    },
    "dinsql": {
        "1017466aa786d9466aa74af6bd0700ec": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "provide me the admission type and primary disease of patient travis hofman. (Values: Travis Hofman)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"Travis Hofman\"",
            "pred_sql": "SELECT D.ADMISSION_TYPE, DIAG.SHORT_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS DIAG ON D.SUBJECT_ID = DIAG.SUBJECT_ID AND D.HADM_ID = DIAG.HADM_ID WHERE D.NAME = 'Travis Hofman'",
            "gold_answer": "[['EMERGENCY', 'GASTROINTESTINAL BLEED']]",
            "pred_answer": "[['EMERGENCY', 'AMI inferior wall, init'], ['EMERGENCY', 'Atrial fibrillation'], ['EMERGENCY', 'CHF NOS'], ['EMERGENCY', 'Crnry athrscl natve vssl'], ['EMERGENCY', 'Dvrtclo colon w/o hmrhg'], ['EMERGENCY', 'Gastrointest hemorr NOS'], ['EMERGENCY', 'Malig neo ascend colon'], ['EMERGENCY', 'Pneumonia, organism NOS'], ['EMERGENCY', 'Surg compl-heart']]",
            "is_correct": false,
            "reasoning": "The predicted query joins the DIAGNOSES table and selects DIAGNOSES.SHORT_TITLE, returning all diagnosis records for the patient, instead of using the DEMOGRAPHIC.DIAGNOSIS column (the primary disease). The incorrect table/column reference is a localized mistake in the SELECT and JOIN; it does not require restructuring the overall query logic beyond fixing the referenced table/column.",
            "error_type": "Local: incorrect table/column reference"
        },
        "8769210e7e36fb15bd7f3441ac74704f": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "tell me the admission type and primary disease of subject id 2560.",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"2560\"",
            "pred_sql": "SELECT D.ADMISSION_TYPE, G.LONG_TITLE FROM DEMOGRAPHIC AS D JOIN DIAGNOSES AS G ON D.SUBJECT_ID = G.SUBJECT_ID AND D.HADM_ID = G.HADM_ID WHERE D.SUBJECT_ID = 2560 LIMIT 1",
            "gold_answer": "[['NEWBORN', 'NEWBORN']]",
            "pred_answer": "[['NEWBORN', 'Congenital pigmentary anomalies of skin']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins the DIAGNOSES table and selects LONG_TITLE instead of using the DIAGNOSIS column from the DEMOGRAPHIC table, fundamentally changing the query\u2019s logic. This is not a minor syntax issue but a flawed join strategy that retrieves the wrong field and requires restructuring the query to remove the join and use the correct table/column.",
            "error_type": "Global: incorrect join"
        },
        "a41c0a71e44265cf13545eae050408e0": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "specify the primary disease of james cepeda and where he was admitted (Values: James Cepeda)",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_LOCATION\",DEMOGRAPHIC.\"DIAGNOSIS\" FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"NAME\" = \"James Cepeda\"",
            "pred_sql": "SELECT D2.LONG_TITLE AS Primary_Disease, D1.ADMISSION_LOCATION FROM DEMOGRAPHIC AS D1 JOIN DIAGNOSES AS D2 ON D1.SUBJECT_ID = D2.SUBJECT_ID AND D1.HADM_ID = D2.HADM_ID WHERE D1.NAME = 'James Cepeda' LIMIT 1",
            "gold_answer": "[['PHYS REFERRAL/NORMAL DELI', 'CORONARY ARTERY DISEASE\\\\CORONARY ARTERY BYPASS GRAFT WITH MVR; ? MAZE']]",
            "pred_answer": "[['Atrial fibrillation', 'PHYS REFERRAL/NORMAL DELI']]",
            "is_correct": false,
            "reasoning": "The predicted SQL still retrieves James Cepeda\u2019s admission location and links to a diagnosis table, but it selects DIAGNOSES.LONG_TITLE instead of the DEMOGRAPHIC.DIAGNOSIS column that stores the primary disease. The join condition itself is valid, and the overall intent (getting admission location and a disease field) remains unchanged. Only the column reference for the disease is incorrect, which can be fixed locally without altering the query\u2019s structure or join logic.",
            "error_type": "Local: column reference error"
        },
        "07dd887d78325747ddf8d7f6a4cc952b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "For how long the patients admitted to clinic referral (premature) stayed in the hospital? (Values: CLINIC REFERRAL/PREMATURE)",
            "gold_sql": "SELECT MAX ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"ADMISSION_LOCATION\" = \"CLINIC REFERRAL/PREMATURE\"",
            "pred_sql": "SELECT DAYS_STAY FROM DEMOGRAPHIC WHERE ADMISSION_LOCATION = 'CLINIC REFERRAL/PREMATURE'",
            "gold_answer": "[['30.0']]",
            "pred_answer": "[['0.0'], ['1.0'], ['11.0'], ['11.0'], ['15.0'], ['16.0'], ['2.0'], ['2.0'], ['2.0'], ['20.0'], ['23.0'], ['30.0'], ['4.0'], ['4.0'], ['4.0'], ['4.0'], ['5.0'], ['6.0'], ['7.0'], ['7.0'], ['8.0'], ['9.0']]",
            "is_correct": false,
            "reasoning": "The predicted query selects all DAYS_STAY values instead of computing the maximum as required by the question. The mistake lies solely in the SELECT clause (omitting the MAX aggregation) without altering joins, tables, or overall structure.",
            "error_type": "Local: missing-aggregation"
        },
        "0feb6c94275dae4ee5dc08799c8e771b": {
            "db_id": "mimicsql",
            "generator_name": "dinsql",
            "question": "how many male patients had guillain barre syndrome as their primary disease? (Values: M, GUILLAIN BARRE SYNDROME)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"GUILLAIN BARRE SYNDROME\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.GENDER = 'M' AND diag.LONG_TITLE = 'GUILLAIN BARRE SYNDROME'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the source of the disease information by joining to the DIAGNOSES table and filtering on diag.LONG_TITLE, rather than using the DEMOGRAPHIC.DIAGNOSIS column which represents the primary disease. This alters the overall logic of the query\u2014shifting from primary diagnosis in DEMOGRAPHIC to any occurrence in DIAGNOSES\u2014and thus cannot be fixed by a small local tweak.",
            "error_type": "Global: incorrect table join altering query logic"
        }
    },
    "macsql": {
        "a4411125b5abe68f2d0eb36a12677a73": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "How many male patients had icd9 code 8842? (Values: M)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"GENDER\" = \"M\" AND PROCEDURES.\"ICD9_CODE\" = \"8842\"",
            "pred_sql": "SELECT COUNT(DISTINCT T2.`SUBJECT_ID`) AS male_patient_count FROM DIAGNOSES AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`ICD9_CODE` = '8842' AND T2.`GENDER` = 'M'",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL joins DEMOGRAPHIC with the DIAGNOSES table rather than the PROCEDURES table, which changes the entire source of ICD9_CODE data and thus the result. This is not a minor syntax mistake in a clause but a fundamental error in how tables are related, altering the intended logic of the query.",
            "error_type": "Global: join error"
        },
        "0505b2e7a406ecc63693aa9f33c0af26": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "what is the average days of hospital stay of patients whose primary disease is s/p fall? (Values: S/P FALL)",
            "gold_sql": "SELECT AVG ( DEMOGRAPHIC.\"DAYS_STAY\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"DIAGNOSIS\" = \"S/P FALL\"",
            "pred_sql": "SELECT AVG(T2.`DAYS_STAY`) AS Average_Days_Stay FROM DIAGNOSES AS T1 INNER JOIN DEMOGRAPHIC AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE (T1.`LONG_TITLE` LIKE '%S/P FALL%' OR T1.`SHORT_TITLE` LIKE '%S/P FALL%') AND T2.`DAYS_STAY` IS NOT NULL AND T1.`LONG_TITLE` IS NOT NULL AND T1.`SHORT_TITLE` IS NOT NULL AND T1.`HADM_ID` IS NOT NULL AND T2.`HADM_ID` IS NOT NULL;",
            "gold_answer": "[['12.0']]",
            "pred_answer": "[['None']]",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the DIAGNOSES table and a JOIN to filter for \u201cS/P FALL,\u201d but the question asks for the primary disease stored in DEMOGRAPHIC.DIAGNOSIS. By joining to DIAGNOSES and filtering LONG_TITLE/SHORT_TITLE, the query\u2019s fundamental logic is changed and yields no results. Fixing this requires removing the DIAGNOSES table and instead filtering directly on DEMOGRAPHIC.DIAGNOSIS, which is a structural change affecting the whole query rather than a small localized correction.",
            "error_type": "Global: join error"
        },
        "88309c7f731812aac8c0784535361b4e": {
            "db_id": "mimicsql",
            "generator_name": "macsql",
            "question": "count the number of patients who died of pituatary bleed. (Values: 1, PITUITARY BLEED)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"EXPIRE_FLAG\" = \"1\" AND DEMOGRAPHIC.\"DIAGNOSIS\" = \"PITUITARY BLEED\"",
            "pred_sql": "SELECT COUNT(DISTINCT T1.`SUBJECT_ID`) FROM DEMOGRAPHIC AS T1 INNER JOIN ( SELECT DISTINCT `HADM_ID` FROM DIAGNOSES WHERE `LONG_TITLE` LIKE '%pituitary bleed%' OR `SHORT_TITLE` LIKE '%pituitary bleed%' ) AS T2 ON T1.`HADM_ID` = T2.`HADM_ID` WHERE T1.`EXPIRE_FLAG` = 1",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted SQL fundamentally changes the query\u2019s logic by joining the DIAGNOSES table and searching in LONG_TITLE/SHORT_TITLE instead of directly filtering the DEMOGRAPHIC.DIAGNOSIS field as specified. This is not a minor syntax or naming mistake but a structural alteration that affects which records are counted (it uses a different table and join strategy). Thus it is a Global error.",
            "error_type": "Global: incorrect join/table logic"
        }
    },
    "deepseek": {
        "07f753a76f9e4833bde8ba792b3b14c9": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "what is the primary disease and diagnosis of the patient id 8440?",
            "gold_sql": "SELECT DEMOGRAPHIC.\"DIAGNOSIS\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"8440\"",
            "pred_sql": "SELECT DIAGNOSIS AS primary_disease FROM DEMOGRAPHIC WHERE SUBJECT_ID = 8440;",
            "gold_answer": "[['GASTROINTESTINAL BLEED', 'Acute kidney failure NOS'], ['GASTROINTESTINAL BLEED', 'Acute respiratry failure'], ['GASTROINTESTINAL BLEED', 'Atrial fibrillation'], ['GASTROINTESTINAL BLEED', 'CHF NOS'], ['GASTROINTESTINAL BLEED', 'Chr airway obstruct NEC'], ['GASTROINTESTINAL BLEED', 'Chr blood loss anemia'], ['GASTROINTESTINAL BLEED', 'Gastrointest hemorr NOS'], ['GASTROINTESTINAL BLEED', 'Hyperosmolality'], ['GASTROINTESTINAL BLEED', 'Hypertension NOS'], ['GASTROINTESTINAL BLEED', 'Shock w/o trauma NEC'], ['GASTROINTESTINAL BLEED', 'Urin tract infection NOS']]",
            "pred_answer": "[['GASTROINTESTINAL BLEED']]",
            "is_correct": false,
            "reasoning": "The predicted SQL omits the DIAGNOSES table entirely and only selects the primary disease from DEMOGRAPHIC, failing to retrieve the diagnosis information (SHORT_TITLE). This is not a minor syntax or column reference mistake but a structural issue: the necessary JOIN to DIAGNOSES is missing and the query\u2019s logic is fundamentally altered. Fixing it requires adding the join and selecting the second column, which affects the overall query structure.",
            "error_type": "Global: missing join error"
        },
        "a7c671e965437f33b28e21cf6bfe9e89": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "find the number of greek orthodox religious background patients who were born before 2085. (Values: GREEK ORTHODOX)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC WHERE DEMOGRAPHIC.\"RELIGION\" = \"GREEK ORTHODOX\" AND DEMOGRAPHIC.\"DOB_YEAR\" < \"2085\"",
            "pred_sql": "SELECT COUNT(SUBJECT_ID) AS NUMBER_OF_PATIENTS FROM DEMOGRAPHIC WHERE RELIGION = 'GREEK ORTHODOX' AND YEAR(DOB) < 2085;",
            "gold_answer": "[['0.0']]",
            "pred_answer": "no such function: YEAR",
            "is_correct": false,
            "reasoning": "The predicted SQL fails because it uses the YEAR(DOB) function, which does not exist in SQLite. This is a localized error in the WHERE clause\u2019s temporal function usage and does not require restructuring the query logic or joins. It can be fixed by directly comparing the DOB_YEAR column or rewriting the function usage.",
            "error_type": "Local: other-temporal error"
        },
        "a61ac4f658e87123f6f32e8d02272ab4": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "Specify the code forTopiramate (Topamax)",
            "gold_sql": "SELECT PRESCRIPTIONS.\"FORMULARY_DRUG_CD\" FROM PRESCRIPTIONS WHERE PRESCRIPTIONS.\"DRUG\" = \"Topiramate (Topamax)\"",
            "pred_sql": "SELECT DRUG FROM PRESCRIPTIONS WHERE DRUG = 'Topiramate';",
            "gold_answer": "[['TOPI100'], ['TOPI100'], ['TOPI25']]",
            "pred_answer": "[]",
            "is_correct": false,
            "reasoning": "The predicted query only selects the DRUG column instead of the FORMULARY_DRUG_CD column and filters on 'Topiramate' rather than the exact string 'Topiramate (Topamax)'. These are localized mistakes in the SELECT and WHERE clauses (wrong column reference and wrong literal), but the overall query structure remains correct, so this is a local error.",
            "error_type": "Local: value parsing error"
        },
        "86e5e1517891514bdb1d0018f85f9a34": {
            "db_id": "mimicsql",
            "generator_name": "deepseek",
            "question": "How many patients admitted before 2112 are diagnosed with retained plastic fragments? (Values: Retained plastic fragments)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2112\" AND DIAGNOSES.\"LONG_TITLE\" = \"Retained plastic fragments\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM DEMOGRAPHIC DM JOIN DIAGNOSES D ON DM.SUBJECT_ID = D.SUBJECT_ID AND DM.HADM_ID = D.HADM_ID WHERE EXTRACT(YEAR FROM DM.ADMITTIME) < 2112 AND (D.SHORT_TITLE LIKE '%retained plastic fragments%' OR D.LONG_TITLE LIKE '%retained plastic fragments%');",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"FROM\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL fails due to the use of the EXTRACT(YEAR FROM ...) function, which is not supported in SQLite, causing a syntax error at FROM. This error is confined to the WHERE clause\u2019s temporal function and does not require altering the overall query structure or logic (the join and filtering logic remain intact). Thus, it is a local, function/temporal error.",
            "error_type": "Local: other-temporal error"
        }
    },
    "o4-mini": {
        "a7f0f51a1b7298fc9b7c2f8214ee4b19": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "how many patients diagnosed with chronic diastolic heart failure were hospitalized for more than 30 days? (Values: Chr diastolic hrt fail)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"DAYS_STAY\" > \"30\" AND DIAGNOSES.\"SHORT_TITLE\" = \"Chr diastolic hrt fail\"",
            "pred_sql": "SELECT COUNT(DISTINCT d.SUBJECT_ID) AS num_patients FROM DEMOGRAPHIC d JOIN DIAGNOSES dx ON d.SUBJECT_ID = dx.SUBJECT_ID AND d.HADM_ID = dx.HADM_ID WHERE d.DAYS_STAY > 30 AND dx.LONG_TITLE ILIKE '%chronic diastolic heart failure%';",
            "gold_answer": "[['0.0']]",
            "pred_answer": "near \"ILIKE\": syntax error",
            "is_correct": false,
            "reasoning": "The predicted SQL uses the ILIKE operator, which is not supported in SQLite, leading to a syntax error. This mistake is localized to the WHERE clause (an operator/syntax issue) and can be fixed without restructuring the overall query, thus it is a local error.",
            "error_type": "Local: syntax error"
        },
        "92fb353d2314296f863997a8c3222e89": {
            "db_id": "mimicsql",
            "generator_name": "o4-mini",
            "question": "specify the admission type and icd9 code of patient id 91588",
            "gold_sql": "SELECT DEMOGRAPHIC.\"ADMISSION_TYPE\",DIAGNOSES.\"SHORT_TITLE\" FROM DEMOGRAPHIC INNER JOIN DIAGNOSES on DEMOGRAPHIC.HADM_ID = DIAGNOSES.HADM_ID WHERE DEMOGRAPHIC.\"SUBJECT_ID\" = \"91588\"",
            "pred_sql": "SELECT d.ADMISSION_TYPE, diag.ICD9_CODE FROM DEMOGRAPHIC d JOIN DIAGNOSES diag ON d.SUBJECT_ID = diag.SUBJECT_ID AND d.HADM_ID = diag.HADM_ID WHERE d.SUBJECT_ID = 91588;",
            "gold_answer": "[['EMERGENCY', 'Ac on chr syst hrt fail'], ['EMERGENCY', 'CHF NOS'], ['EMERGENCY', 'Chronic sinusitis NOS'], ['EMERGENCY', 'Cyclic neutropenia'], ['EMERGENCY', 'Encephalopathy NOS'], ['EMERGENCY', 'Enterococcus group d'], ['EMERGENCY', 'Fever in other diseases'], ['EMERGENCY', 'Hypothyroidism NOS'], ['EMERGENCY', 'Myelodysplastic synd NOS'], ['EMERGENCY', 'Oth lymp unsp xtrndl org'], ['EMERGENCY', 'Pericardial disease NOS'], ['EMERGENCY', 'Shock NOS'], ['EMERGENCY', 'Thrombocytopenia NOS'], ['EMERGENCY', 'Trnspl status-bne marrow'], ['EMERGENCY', 'Urin tract infection NOS']]",
            "pred_answer": "[['EMERGENCY', '20280.0'], ['EMERGENCY', '23875.0'], ['EMERGENCY', '2449.0'], ['EMERGENCY', '2875.0'], ['EMERGENCY', '28802.0'], ['EMERGENCY', '34830.0'], ['EMERGENCY', '4104.0'], ['EMERGENCY', '4239.0'], ['EMERGENCY', '4280.0'], ['EMERGENCY', '42823.0'], ['EMERGENCY', '4739.0'], ['EMERGENCY', '5990.0'], ['EMERGENCY', '78061.0'], ['EMERGENCY', '78550.0'], ['EMERGENCY', 'V4281']]",
            "is_correct": false,
            "reasoning": "The predicted SQL correctly joins DEMOGRAPHIC and DIAGNOSES on SUBJECT_ID and HADM_ID and filters by the correct patient ID. The only mistake is in the SELECT clause: it returns ICD9_CODE instead of the required SHORT_TITLE. This is a localized error in selecting the wrong column, without affecting the overall query structure or logic.",
            "error_type": "Local: column reference error"
        }
    },
    "llm2sql": {
        "9fbf9e43a2cf665e8cf12a1fddb6f556": {
            "db_id": "mimicsql",
            "generator_name": "llm2sql",
            "question": "specify the number of patients who were admitted in the year less that 2187 and had  (aorto)coronary bypass of one coronary artery (Values: (Aorto)coronary bypass of one coronary artery)",
            "gold_sql": "SELECT COUNT ( DISTINCT DEMOGRAPHIC.\"SUBJECT_ID\" ) FROM DEMOGRAPHIC INNER JOIN PROCEDURES on DEMOGRAPHIC.HADM_ID = PROCEDURES.HADM_ID WHERE DEMOGRAPHIC.\"ADMITYEAR\" < \"2187\" AND PROCEDURES.\"LONG_TITLE\" = \"(Aorto)coronary bypass of one coronary artery\"",
            "pred_sql": "SELECT COUNT(DISTINCT D.SUBJECT_ID) FROM PROCEDURES P INNER JOIN DEMOGRAPHIC D ON P.SUBJECT_ID = D.SUBJECT_ID AND P.HADM_ID = D.HADM_ID WHERE P.LONG_TITLE = '(aorto)coronary bypass of one coronary artery' AND D.ADMITYEAR < 2187;",
            "gold_answer": "[['1.0']]",
            "pred_answer": "[['0.0']]",
            "is_correct": false,
            "reasoning": "The predicted query uses a lowercase string '(aorto)coronary bypass of one coronary artery' in the WHERE clause, whereas the actual LONG_TITLE value in the data is '(Aorto)coronary bypass of one coronary artery' (with an uppercase 'A'). SQLite string comparisons are case-sensitive by default, so this mismatch causes the predicted query to return zero rows. The overall query structure, joins, and logic are correct; only the literal value is incorrectly formatted.",
            "error_type": "Local: value parsing error"
        }
    }
}