{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Workspace Information:\n",
    "\n",
    "Workspace Namespace\n",
    "aou-rw-37d4bab0\n",
    "\n",
    "Dataset\n",
    "All of Us Controlled Tier Dataset v8\n",
    "\n",
    "Creation Date\n",
    "Wed Feb 05 2025\n",
    "\n",
    "Last Updated\n",
    "Wed Feb 05 2025"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "code_folding": []
   },
   "outputs": [],
   "source": [
    "import pandas\n",
    "import os\n",
    "\n",
    "dataset_condition_sql = \"\"\"\n",
    "    SELECT\n",
    "        c_occurrence.person_id,\n",
    "        c_occurrence.condition_start_datetime,\n",
    "        c_type.concept_name as condition_type_concept_name,\n",
    "        visit.concept_name as visit_occurrence_concept_name,\n",
    "        c_status.concept_name as condition_status_concept_name \n",
    "    FROM\n",
    "        ( SELECT\n",
    "            * \n",
    "        FROM\n",
    "            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".condition_occurrence` c_occurrence \n",
    "        WHERE\n",
    "            (\n",
    "                condition_source_concept_id IN (SELECT\n",
    "                    DISTINCT c.concept_id \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_criteria` c \n",
    "                JOIN\n",
    "                    (SELECT\n",
    "                        CAST(cr.id as string) AS id       \n",
    "                    FROM\n",
    "                        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_criteria` cr       \n",
    "                    WHERE\n",
    "                        concept_id IN (702953)       \n",
    "                        AND full_text LIKE '%_rank1]%'      ) a \n",
    "                        ON (c.path LIKE CONCAT('%.', a.id, '.%') \n",
    "                        OR c.path LIKE CONCAT('%.', a.id) \n",
    "                        OR c.path LIKE CONCAT(a.id, '.%') \n",
    "                        OR c.path = a.id) \n",
    "                WHERE\n",
    "                    is_standard = 0 \n",
    "                    AND is_selectable = 1)\n",
    "            )  \n",
    "            AND (\n",
    "                c_occurrence.PERSON_ID IN (SELECT\n",
    "                    distinct person_id  \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_person` cb_search_person  \n",
    "                WHERE\n",
    "                    cb_search_person.person_id IN (SELECT\n",
    "                        criteria.person_id \n",
    "                    FROM\n",
    "                        (SELECT\n",
    "                            DISTINCT person_id, entry_date, concept_id \n",
    "                        FROM\n",
    "                            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_all_events` \n",
    "                        WHERE\n",
    "                            (concept_id IN (702953) \n",
    "                            AND is_standard = 0 )) criteria ) )\n",
    "            )) c_occurrence \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_type \n",
    "            ON c_occurrence.condition_type_concept_id = c_type.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".visit_occurrence` v \n",
    "            ON c_occurrence.visit_occurrence_id = v.visit_occurrence_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` visit \n",
    "            ON v.visit_concept_id = visit.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_status \n",
    "            ON c_occurrence.condition_status_concept_id = c_status.concept_id\"\"\"\n",
    "\n",
    "dataset_condition_df = pandas.read_gbq(\n",
    "    dataset_condition_sql,\n",
    "    dialect=\"standard\",\n",
    "    use_bqstorage_api=(\"BIGQUERY_STORAGE_API_ENABLED\" in os.environ),\n",
    "    progress_bar_type=\"tqdm_notebook\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "hospitalization_indicators = [\n",
    "    'Inpatient Visit', \n",
    "    'Inpatient Hospital', \n",
    "    'Emergency Room and Inpatient Visit'\n",
    "]\n",
    "\n",
    "valid_indicators = ['Outpatient Visit', 'Telehealth', 'Emergency Room Visit',\n",
    "       'Office Visit', 'Emergency Room and Inpatient Visit', 'Inpatient Visit',\n",
    "       'Laboratory Visit', 'Non-hospital institution Visit',\n",
    "       'Ambulatory Radiology Clinic / Center', 'Observation Room',\n",
    "       'Ambulatory Infusion Therapy Clinic / Center',\n",
    "       'Ambulatory Rehabilitation Visit', 'Home Visit', 'Inpatient Hospital',\n",
    "       'Pharmacy', 'Outpatient Hospital']\n",
    "\n",
    "# Create a new column 'inpatient' where the condition is met\n",
    "dataset_condition_df = dataset_condition_df[dataset_condition_df['visit_occurrence_concept_name'].isin(valid_indicators)]\n",
    "dataset_condition_df['inpatient'] = np.where(\n",
    "    dataset_condition_df['visit_occurrence_concept_name'].isin(hospitalization_indicators),\n",
    "    1,  # Value if condition is True\n",
    "    0   # Value if condition is False\n",
    ")\n",
    "dataset_condition_df = dataset_condition_df.drop(columns = ['visit_occurrence_concept_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "condition_status_mapping = {\n",
    "    \"None\": np.nan,\n",
    "    \"No matching concept\": np.nan,\n",
    "    \"Final diagnosis (discharge)\": \"Inpatient\",\n",
    "    \"Primary diagnosis\": \"Primary\",\n",
    "    \"Emergency use of U07.1 | COVID-19\": np.nan,\n",
    "    \"Secondary diagnosis\": \"Secondary\",\n",
    "    \"Suggested billing diagnosis\": \"Billing\",\n",
    "    \"Primary discharge diagnosis\": \"Primary\",\n",
    "    \"Admission diagnosis\": \"Inpatient\",\n",
    "    \"Preliminary diagnosis\": \"Primary\",\n",
    "    \"Discharge diagnosis\": \"Inpatient\",\n",
    "    \"Secondary discharge diagnosis\": \"Secondary\",\n",
    "    \"Confirmed diagnosis\": \"Primary\",\n",
    "    \"Established diagnosis\": \"Primary\",\n",
    "    \"Problem resolved\": np.nan,\n",
    "}\n",
    "dataset_condition_df[\"condition_status_concept_name\"] = dataset_condition_df[\"condition_status_concept_name\"].map(condition_status_mapping)\n",
    "# Define mapping\n",
    "mapping = {\n",
    "    \"Primary Condition\": \"Primary\",\n",
    "    \"Primary diagnosis\": \"Primary\",\n",
    "    \"Secondary Condition\": \"Secondary\",\n",
    "    \"Secondary diagnosis\": \"Secondary\",\n",
    "    \"EHR billing record\": \"EHR\",\n",
    "    \"EHR encounter record\": \"EHR\",\n",
    "    \"EHR\": \"EHR\",\n",
    "    \"EHR billing diagnosis\": \"EHR\",\n",
    "    \"EHR encounter diagnosis\": \"EHR\",\n",
    "    \"EHR problem list\": \"EHR\",\n",
    "    \"EHR problem list entry\": \"EHR\",\n",
    "    \"EHR discharge record\": \"EHR\",\n",
    "    \"EHR Chief Complaint\": \"EHR\",\n",
    "    \"Observation recorded from EHR\": \"EHR\",\n",
    "    \"Outpatient header - 1st position\": \"Outpatient\",\n",
    "    \"Outpatient header - 2nd position\": \"Outpatient\",\n",
    "    \"Claim\": \"Claims\",\n",
    "    \"Inpatient claim\": \"Inpatient\",\n",
    "    \"Inpatient header - 1st position\": \"Inpatient\",\n",
    "    \"Inpatient header - 2nd position\": \"Inpatient\",\n",
    "    \"Inpatient header - 3rd position\": \"Inpatient\",\n",
    "    \"Inpatient header - 4th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 5th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 6th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 8th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 11th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 13th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 15th position\": \"Inpatient\",\n",
    "    \"Admission diagnosis\": \"Inpatient\",\n",
    "    \"Discharge diagnosis\": \"Inpatient\",\n",
    "    \"Condition tested for by diagnostic procedure\": \"Secondary\"\n",
    "}\n",
    "dataset_condition_df[\"condition_type_concept_name\"] = dataset_condition_df[\"condition_type_concept_name\"].map(mapping)\n",
    "dataset_condition_df['EHR'] = np.where((dataset_condition_df['condition_type_concept_name'] == 'EHR'), 1, 0)\n",
    "dataset_condition_df['Claims'] = np.where((dataset_condition_df['condition_type_concept_name'] == 'Claims') | (dataset_condition_df['condition_status_concept_name'] == 'Billing'), 1, 0)\n",
    "dataset_condition_df['Primary'] = np.where((dataset_condition_df['condition_type_concept_name'] == 'Primary') | (dataset_condition_df['condition_status_concept_name'] == 'Primary'), 1, 0)\n",
    "dataset_condition_df['Secondary'] = np.where((dataset_condition_df['condition_type_concept_name'] == 'Secondary') | (dataset_condition_df['condition_status_concept_name'] == 'Secondary'), 1, 0)\n",
    "dataset_condition_df[dataset_condition_df['condition_type_concept_name'] == 'Inpatient']['inpatient'] = 1\n",
    "dataset_condition_df[dataset_condition_df['condition_status_concept_name'] == 'Inpatient']['inpatient'] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas\n",
    "import os\n",
    "\n",
    "dataset_observation_sql = \"\"\"\n",
    "    SELECT\n",
    "        observation.person_id,\n",
    "        observation.observation_datetime,\n",
    "        o_type.concept_name as observation_type_concept_name,\n",
    "        o_qualifier.concept_name as qualifier_concept_name,\n",
    "        o_visit.concept_name as visit_occurrence_concept_name\n",
    "    FROM\n",
    "        ( SELECT\n",
    "            * \n",
    "        FROM\n",
    "            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".observation` observation \n",
    "        WHERE\n",
    "            (\n",
    "                observation_source_concept_id IN (702953)\n",
    "            )  \n",
    "            AND (\n",
    "                observation.PERSON_ID IN (SELECT\n",
    "                    distinct person_id  \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_person` cb_search_person  \n",
    "                WHERE\n",
    "                    cb_search_person.person_id IN (SELECT\n",
    "                        criteria.person_id \n",
    "                    FROM\n",
    "                        (SELECT\n",
    "                            DISTINCT person_id, entry_date, concept_id \n",
    "                        FROM\n",
    "                            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_all_events` \n",
    "                        WHERE\n",
    "                            (concept_id IN (702953) \n",
    "                            AND is_standard = 0 )) criteria ) )\n",
    "            )) observation \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` o_standard_concept \n",
    "            ON observation.observation_concept_id = o_standard_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` o_type \n",
    "            ON observation.observation_type_concept_id = o_type.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` o_qualifier \n",
    "            ON observation.qualifier_concept_id = o_qualifier.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".visit_occurrence` v \n",
    "            ON observation.visit_occurrence_id = v.visit_occurrence_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` o_visit \n",
    "            ON v.visit_concept_id = o_visit.concept_id\"\"\"\n",
    "\n",
    "dataset_observation_df = pandas.read_gbq(\n",
    "    dataset_observation_sql,\n",
    "    dialect=\"standard\",\n",
    "    use_bqstorage_api=(\"BIGQUERY_STORAGE_API_ENABLED\" in os.environ),\n",
    "    progress_bar_type=\"tqdm_notebook\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "hospitalization_indicators = [\n",
    "    'Inpatient Visit', \n",
    "    'Inpatient Hospital', \n",
    "    'Emergency Room and Inpatient Visit'\n",
    "]\n",
    "\n",
    "# Create a new column 'inpatient' where the condition is met\n",
    "dataset_observation_df['inpatient'] = np.where(\n",
    "    dataset_observation_df['visit_occurrence_concept_name'].isin(hospitalization_indicators),\n",
    "    1,  # Value if condition is True\n",
    "    0   # Value if condition is False\n",
    ")\n",
    "dataset_observation_df = dataset_observation_df.drop(columns = ['visit_occurrence_concept_name'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "condition_status_mapping = {\n",
    "    \"Principal diagnosis\": \"Primary\",\n",
    "    \"Clinical diagnosis\": \"Primary\",\n",
    "    \"Secondary diagnosis\": \"Secondary\",\n",
    "    \"Suggested billing diagnosis\": \"Billing\"\n",
    "}\n",
    "dataset_observation_df[\"qualifier_concept_name\"] = dataset_observation_df[\"qualifier_concept_name\"].map(condition_status_mapping)\n",
    "# Define mapping\n",
    "mapping = {\n",
    "    \"Primary Condition\": \"Primary\",\n",
    "    \"Primary diagnosis\": \"Primary\",\n",
    "    \"Secondary Condition\": \"Secondary\",\n",
    "    \"Secondary diagnosis\": \"Secondary\",\n",
    "    \"EHR billing record\": \"EHR\",\n",
    "    \"EHR encounter record\": \"EHR\",\n",
    "    \"EHR\": \"EHR\",\n",
    "    \"EHR billing diagnosis\": \"EHR\",\n",
    "    \"EHR encounter diagnosis\": \"EHR\",\n",
    "    \"EHR problem list\": \"EHR\",\n",
    "    \"EHR problem list entry\": \"EHR\",\n",
    "    \"EHR discharge record\": \"EHR\",\n",
    "    \"EHR Chief Complaint\": \"EHR\",\n",
    "    \"Observation recorded from EHR\": \"EHR\",\n",
    "    \"Outpatient header - 1st position\": \"Outpatient\",\n",
    "    \"Outpatient header - 2nd position\": \"Outpatient\",\n",
    "    \"Claim\": \"Claims\",\n",
    "    \"Inpatient claim\": \"Inpatient\",\n",
    "    \"Inpatient header - 1st position\": \"Inpatient\",\n",
    "    \"Inpatient header - 2nd position\": \"Inpatient\",\n",
    "    \"Inpatient header - 3rd position\": \"Inpatient\",\n",
    "    \"Inpatient header - 4th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 5th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 6th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 8th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 11th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 13th position\": \"Inpatient\",\n",
    "    \"Inpatient header - 15th position\": \"Inpatient\",\n",
    "    \"Admission diagnosis\": \"Inpatient\",\n",
    "    \"Discharge diagnosis\": \"Inpatient\",\n",
    "    \"Condition tested for by diagnostic procedure\": \"Secondary\"\n",
    "}\n",
    "dataset_observation_df[\"observation_type_concept_name\"] = dataset_observation_df[\"observation_type_concept_name\"].map(mapping)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_observation_df['EHR'] = np.where((dataset_observation_df['observation_type_concept_name'] == 'EHR'), 1, 0)\n",
    "dataset_observation_df['Claims'] = np.where((dataset_observation_df['observation_type_concept_name'] == 'Claims') | (dataset_observation_df['qualifier_concept_name'] == 'Billing'), 1, 0)\n",
    "dataset_observation_df['Primary'] = np.where((dataset_observation_df['observation_type_concept_name'] == 'Primary') | (dataset_observation_df['qualifier_concept_name'] == 'Primary'), 1, 0)\n",
    "dataset_observation_df['Secondary'] = np.where((dataset_observation_df['observation_type_concept_name'] == 'Secondary') | (dataset_observation_df['qualifier_concept_name'] == 'Secondary'), 1, 0)\n",
    "for i in ['EHR', 'Claims', 'Primary', 'Secondary']:\n",
    "    print(i, dataset_observation_df[i].sum())\n",
    "dataset_observation_df[dataset_observation_df['observation_type_concept_name'] == 'Inpatient']['inpatient'] = 1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas\n",
    "import os\n",
    "\n",
    "dataset_person_sql = \"\"\"\n",
    "    SELECT\n",
    "        person.person_id,\n",
    "        p_gender_concept.concept_name as gender,\n",
    "        person.birth_datetime as date_of_birth,\n",
    "        p_race_concept.concept_name as race,\n",
    "        p_ethnicity_concept.concept_name as ethnicity,\n",
    "        p_sex_at_birth_concept.concept_name as sex_at_birth \n",
    "    FROM\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".person` person \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` p_gender_concept \n",
    "            ON person.gender_concept_id = p_gender_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` p_race_concept \n",
    "            ON person.race_concept_id = p_race_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` p_ethnicity_concept \n",
    "            ON person.ethnicity_concept_id = p_ethnicity_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` p_sex_at_birth_concept \n",
    "            ON person.sex_at_birth_concept_id = p_sex_at_birth_concept.concept_id  \n",
    "    WHERE\n",
    "        person.PERSON_ID IN (SELECT\n",
    "            distinct person_id  \n",
    "        FROM\n",
    "            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_person` cb_search_person  \n",
    "        WHERE\n",
    "            cb_search_person.person_id IN (SELECT\n",
    "                criteria.person_id \n",
    "            FROM\n",
    "                (SELECT\n",
    "                    DISTINCT person_id, entry_date, concept_id \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_all_events` \n",
    "                WHERE\n",
    "                    (concept_id IN (702953) \n",
    "                    AND is_standard = 0 )) criteria ) )\"\"\"\n",
    "\n",
    "dataset_person_df = pandas.read_gbq(\n",
    "    dataset_person_sql,\n",
    "    dialect=\"standard\",\n",
    "    use_bqstorage_api=(\"BIGQUERY_STORAGE_API_ENABLED\" in os.environ),\n",
    "    progress_bar_type=\"tqdm_notebook\")\n",
    "\n",
    "dataset_person_df.head(5)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "event_df1 = dataset_observation_df[['person_id', 'observation_datetime', 'EHR', 'Claims', 'Primary', 'Secondary', 'inpatient']]\n",
    "event_df2 = dataset_condition_df[['person_id', 'condition_start_datetime', 'EHR', 'Claims', 'Primary', 'Secondary', 'inpatient']]\n",
    "event_df2.columns = event_df1.columns\n",
    "event_df = pd.concat([event_df1, event_df2], ignore_index=True)\n",
    "aggregated_df = event_df.groupby('person_id', as_index=False).agg({\n",
    "    'observation_datetime': 'min',  # earliest observation_datetime\n",
    "    'EHR': 'max',              # largest inpatient flag\n",
    "    'Claims': 'max',              # largest inpatient flag\n",
    "    'Primary': 'max',              # largest inpatient flag\n",
    "    'Secondary': 'max',              # largest inpatient flag\n",
    "    'inpatient': 'max'\n",
    "})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "covid_details_df = pd.merge(dataset_person_df, aggregated_df, on='person_id', how='inner')\n",
    "covid_details_df['observation_datetime'] = pd.to_datetime(covid_details_df['observation_datetime'])\n",
    "covid_details_df['date_of_birth'] = pd.to_datetime(covid_details_df['date_of_birth'])\n",
    "covid_details_df['age'] = covid_details_df.apply(lambda row: (row['observation_datetime'].year - row['date_of_birth'].year) - ((row['observation_datetime'].month, row['observation_datetime'].day) < (row['date_of_birth'].month, row['date_of_birth'].day)), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This query represents dataset \"Covid and Comorbidities\" for domain \"condition\" and was generated for All of Us Registered Tier Dataset v7\n",
    "dataset_condition_sql = \"\"\"\n",
    "    SELECT\n",
    "        c_occurrence.person_id,\n",
    "        c_occurrence.condition_concept_id,\n",
    "        c_standard_concept.concept_name as standard_concept_name,\n",
    "        c_standard_concept.concept_code as standard_concept_code,\n",
    "        c_standard_concept.vocabulary_id as standard_vocabulary,\n",
    "        c_occurrence.condition_start_datetime,\n",
    "        c_occurrence.condition_end_datetime,\n",
    "        c_occurrence.condition_type_concept_id,\n",
    "        c_type.concept_name as condition_type_concept_name,\n",
    "        c_occurrence.stop_reason,\n",
    "        c_occurrence.visit_occurrence_id,\n",
    "        visit.concept_name as visit_occurrence_concept_name,\n",
    "        c_occurrence.condition_source_value,\n",
    "        c_occurrence.condition_source_concept_id,\n",
    "        c_source_concept.concept_name as source_concept_name,\n",
    "        c_source_concept.concept_code as source_concept_code,\n",
    "        c_source_concept.vocabulary_id as source_vocabulary,\n",
    "        c_occurrence.condition_status_source_value,\n",
    "        c_occurrence.condition_status_concept_id,\n",
    "        c_status.concept_name as condition_status_concept_name \n",
    "    FROM\n",
    "        ( SELECT\n",
    "            * \n",
    "        FROM\n",
    "            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".condition_occurrence` c_occurrence \n",
    "        WHERE\n",
    "            (\n",
    "                condition_source_concept_id IN (SELECT\n",
    "                    DISTINCT c.concept_id \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_criteria` c \n",
    "                JOIN\n",
    "                    (SELECT\n",
    "                        CAST(cr.id as string) AS id       \n",
    "                    FROM\n",
    "                        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_criteria` cr       \n",
    "                    WHERE\n",
    "                        concept_id IN (1326492, 1326493, 1326601, 1326602, 1326603, 1326604, 1326605, 1326606, 1326607, 1326608, 1326609, 1567956, 1567958, 1567959, 1567960, 1567964, 1567965, 1567966, 1567969, 1567971, 1569133, 1569134, 1569135, 1569145, 1569178, 1569179, 1569180, 1569487, 1569488, 1569489, 1569490, 1569491, 35206881, 35206882, 35207668, 35207702, 35207703, 35207704, 35207705, 35207706, 35207792, 35207793, 35208023, 35208024, 35208025, 37200198, 37200199, 37200200, 37200201, 37200202, 37200203, 37200204, 37200205, 37200206, 37200207, 37200208, 37200209, 37200210, 37200211, 37200212, 37200213, 37200214, 37200215, 37200216, 37200217, 37200218, 37200219, 37200220, 37200221, 37200222, 37200223, 37200224, 37200225, 37200227, 37200228, 37200229, 37200230, 37200232, 37200233, 37200234, 37200235, 37200237, 37200238, 37200239, 37200240, 37200242, 37200243, 37200244, 37200245, 37200246, 37200247, 37200248, 37200249, 37200251, 37200252, 37200253, 37200254, 45533019, 45533020,\n",
    " 45533021, 45533022, 45533023, 45533439, 45533456, 45533457, 45537961, 45537962, 45538373, 45538374, 45538375, 45538377, 45542738, 45543167, 45543168, 45543182, 45543269, 45543270, 45547625, 45547626, 45547627, 45548010, 45548011, 45548012, 45548013, 45548022, 45548116, 45548117, 45552385, 45552386, 45557112, 45557113, 45557538, 45557539, 45557626, 45561949, 45562343, 45562344, 45562355, 45562457, 45566731, 45567167, 45567168, 45567180, 45567181, 45567265, 45567266, 45572168, 45572169, 45572170, 45572171, 45576443, 45576866, 45576868, 45576878, 45576951, 45576952, 45581352, 45581353, 45581354, 45581355, 45581766, 45581860, 45586139, 45586140, 45586572, 45586573, 45586574, 45586575, 45586587, 45586588, 45586674, 45586675, 45591027, 45591029, 45591030, 45591031, 45591456, 45591458, 45591459, 45591460, 45591461, 45591469, 45591558, 45591559, 45595797, 45595798, 45595799, 45596197, 45596198, 45596199, 45600641, 45600642, 45601024, 45601026, 45601027, 45601028, 45601038, 45601133, 45601134,\n",
    " 45605401, 45605402, 45605403, 45605404, 45605405, 45605784, 45605785, 45605786, 45605787, 45605788, 1572195, 1568062, 1571486, 1567893, 1572190, 1569515, 1572193)       \n",
    "                        AND full_text LIKE '%_rank1]%'      ) a \n",
    "                        ON (c.path LIKE CONCAT('%.', a.id, '.%') \n",
    "                        OR c.path LIKE CONCAT('%.', a.id) \n",
    "                        OR c.path LIKE CONCAT(a.id, '.%') \n",
    "                        OR c.path = a.id) \n",
    "                WHERE\n",
    "                    is_standard = 0 \n",
    "                    AND is_selectable = 1)\n",
    "            )  \n",
    "            AND (\n",
    "                c_occurrence.PERSON_ID IN (SELECT\n",
    "                    distinct person_id  \n",
    "                FROM\n",
    "                    `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_person` cb_search_person  \n",
    "                WHERE\n",
    "                    cb_search_person.person_id IN (SELECT\n",
    "                        criteria.person_id \n",
    "                    FROM\n",
    "                        (SELECT\n",
    "                            DISTINCT person_id, entry_date, concept_id \n",
    "                        FROM\n",
    "                            `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".cb_search_all_events` \n",
    "                        WHERE\n",
    "                            (concept_id IN (702953) \n",
    "                            AND is_standard = 0 )) criteria ) )\n",
    "            )) c_occurrence \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_standard_concept \n",
    "            ON c_occurrence.condition_concept_id = c_standard_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_type \n",
    "            ON c_occurrence.condition_type_concept_id = c_type.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".visit_occurrence` v \n",
    "            ON c_occurrence.visit_occurrence_id = v.visit_occurrence_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` visit \n",
    "            ON v.visit_concept_id = visit.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_source_concept \n",
    "            ON c_occurrence.condition_source_concept_id = c_source_concept.concept_id \n",
    "    LEFT JOIN\n",
    "        `\"\"\" + os.environ[\"WORKSPACE_CDR\"] + \"\"\".concept` c_status \n",
    "            ON c_occurrence.condition_status_concept_id = c_status.concept_id\"\"\"\n",
    "\n",
    "dataset_condition_df = pandas.read_gbq(\n",
    "    dataset_condition_sql,\n",
    "    dialect=\"standard\",\n",
    "    use_bqstorage_api=(\"BIGQUERY_STORAGE_API_ENABLED\" in os.environ),\n",
    "    progress_bar_type=\"tqdm_notebook\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_condition_df['source_concept_code'] = dataset_condition_df['source_concept_code'].str[:3]\n",
    "full_data_df = pd.merge(covid_details_df['person_id'], dataset_condition_df[['person_id', 'source_concept_code']], on='person_id', how='left')\n",
    "comorbidities_pivot = full_data_df.pivot_table(index='person_id', columns='source_concept_code', aggfunc='size', fill_value=0)\n",
    "comorbidities_pivot = (comorbidities_pivot > 0).astype(int)\n",
    "final_df = pd.merge(covid_details_df, comorbidities_pivot, on='person_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_df[['person_id', 'age', 'gender', 'race', 'ethnicity', 'observation_datetime', 'EHR', 'Claims', 'Primary',\n",
    "       'Secondary', 'E03', 'E11', 'E78', 'I10', 'I25', 'I50', 'J44', 'J45', 'J96', 'N18', 'R06', 'R07', 'R09', 'inpatient']].to_csv('Covid and Comorbidities.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
