{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a04a3372-fe64-49a7-920b-41eba9f8e7fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "%pylab inline\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import os\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "06a4a1f8-9675-4547-9c03-0562e3339c37",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import STAGATE_pyG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c1c5adf-a0a3-4624-a6c9-aa63e608c547",
   "metadata": {},
   "outputs": [],
   "source": [
    "savedir = '/stor/usr/sgenetmp/'\n",
    "basedir = '/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/'\n",
    "alldataname = [x[:-5] for x in sorted(os.listdir(basedir)) if not x.__contains__('_')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cffe7616-672c-4fe1-9d82-2ed690c1a056",
   "metadata": {},
   "outputs": [],
   "source": [
    "rawadata = sc.read_h5ad('/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/Zhuang-ABCA-2-raw.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6aa39f87-2f1a-44f0-ac15-7da950217882",
   "metadata": {},
   "outputs": [],
   "source": [
    "annotationtable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "regiontable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "ccfv1 = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/A2ccf_coordinates.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6491a7a0-3025-4f16-801b-70585e9cc096",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "datalist = ['/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9c7b410-be6e-4532-a5d7-14678bd21418",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# name = datalist[3]\n",
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "\n",
    "    STAGATE_pyG.Cal_Spatial_Net(adatahvg, rad_cutoff=0.05)\n",
    "    STAGATE_pyG.Stats_Spatial_Net(adatahvg)\n",
    "\n",
    "    adatahvg = STAGATE_pyG.train_STAGATE(adatahvg, device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))\n",
    "\n",
    "    stagateadata = sc.AnnData(adatahvg.obsm['STAGATE'],obs=adatahvg.obs)\n",
    "    stagateadata.obsm['spatial']=adatahvg.obsm['spatial']\n",
    "    stagateadata.write_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/stagater005_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a043ff9a-e3d3-46db-969e-ba837ce865c5",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "# name = datalist[3]\n",
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "\n",
    "    STAGATE_pyG.Cal_Spatial_Net(adatahvg, rad_cutoff=0.1)\n",
    "    STAGATE_pyG.Stats_Spatial_Net(adatahvg)\n",
    "\n",
    "    adatahvg = STAGATE_pyG.train_STAGATE(adatahvg, device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))\n",
    "\n",
    "    stagateadata = sc.AnnData(adatahvg.obsm['STAGATE'],obs=adatahvg.obs)\n",
    "    stagateadata.obsm['spatial']=adatahvg.obsm['spatial']\n",
    "    stagateadata.write_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/stagater01_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0392b9d-5aa0-4563-a126-728a981f95d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# name = datalist[3]\n",
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "\n",
    "    STAGATE_pyG.Cal_Spatial_Net(adatahvg, rad_cutoff=0.2)\n",
    "    STAGATE_pyG.Stats_Spatial_Net(adatahvg)\n",
    "\n",
    "    adatahvg = STAGATE_pyG.train_STAGATE(adatahvg, device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu'))\n",
    "\n",
    "    stagateadata = sc.AnnData(adatahvg.obsm['STAGATE'],obs=adatahvg.obs)\n",
    "    stagateadata.obsm['spatial']=adatahvg.obsm['spatial']\n",
    "    stagateadata.write_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/stagater03_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "75e70154-af99-4fa5-bcdf-3489689ad02f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nichecompass",
   "language": "python",
   "name": "nichecompass"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
