{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6322dd02-cc06-452f-ae6e-6bc8464959c7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import torch\n",
    "import pandas as pd\n",
    "import scanpy as sc\n",
    "from sklearn import metrics\n",
    "import multiprocessing as mp\n",
    "os.environ['CUDA_VISIBLE_DEVICES']='2'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "aec2770b-c907-412f-904d-1b919e638ab8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:18.098895Z",
     "iopub.status.busy": "2024-05-13T07:22:18.098304Z",
     "iopub.status.idle": "2024-05-13T07:22:18.588492Z",
     "shell.execute_reply": "2024-05-13T07:22:18.587578Z",
     "shell.execute_reply.started": "2024-05-13T07:22:18.098873Z"
    }
   },
   "outputs": [],
   "source": [
    "from GraphST import GraphST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "262695de-5eba-45ef-87f5-9b99895a04d0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:18.589903Z",
     "iopub.status.busy": "2024-05-13T07:22:18.589475Z",
     "iopub.status.idle": "2024-05-13T07:22:19.815815Z",
     "shell.execute_reply": "2024-05-13T07:22:19.814423Z",
     "shell.execute_reply.started": "2024-05-13T07:22:18.589881Z"
    }
   },
   "outputs": [],
   "source": [
    "# Run device, by default, the package is implemented on 'cpu'. We recommend using GPU.\n",
    "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "73102268-d67d-490e-b836-b1ab0427e068",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:19.819974Z",
     "iopub.status.busy": "2024-05-13T07:22:19.819035Z",
     "iopub.status.idle": "2024-05-13T07:22:21.729950Z",
     "shell.execute_reply": "2024-05-13T07:22:21.728975Z",
     "shell.execute_reply.started": "2024-05-13T07:22:19.819922Z"
    }
   },
   "outputs": [],
   "source": [
    "adatahvg = sc.read_h5ad('/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad')\n",
    "annotationtable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "\n",
    "regiontable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "\n",
    "ccfv1 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "\n",
    "#filter\n",
    "setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "adatahvg = adatahvg[setidx].copy()\n",
    "adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "\n",
    "query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "query.index = adatahvg.obs.index\n",
    "adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "query.index = adatahvg.obs.index\n",
    "adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "\n",
    "subclass_color = dict(adatahvg.obs[['subclass','subclass_color']].drop_duplicates().values)\n",
    "class_color = dict(adatahvg.obs[['class','class_color']].drop_duplicates().values)\n",
    "supertypecolor = dict(adatahvg.obs[['supertype','supertype_color']].drop_duplicates().values)\n",
    "category_color = dict(adatahvg.obs[['category','category_color']].drop_duplicates().values)\n",
    "division_color = dict(adatahvg.obs[['division','division_color']].drop_duplicates().values)\n",
    "structure_color = dict(adatahvg.obs[['structure','structure_color']].drop_duplicates().values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7f6d87c-cce4-42aa-9cf8-b863dd5c7751",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-11T13:20:16.020538Z",
     "iopub.status.busy": "2024-05-11T13:20:16.019636Z",
     "iopub.status.idle": "2024-05-11T13:25:29.333933Z",
     "shell.execute_reply": "2024-05-11T13:25:29.332878Z",
     "shell.execute_reply.started": "2024-05-11T13:20:16.020472Z"
    }
   },
   "outputs": [],
   "source": [
    "# define model\n",
    "model = GraphST.GraphST(adatahvg, device=device)\n",
    "\n",
    "# train model\n",
    "adatahvg = model.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "09ebf356-5238-4131-a12a-d0fd6987d52a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-11T13:28:07.658831Z",
     "iopub.status.busy": "2024-05-11T13:28:07.658013Z",
     "iopub.status.idle": "2024-05-11T13:28:22.172993Z",
     "shell.execute_reply": "2024-05-11T13:28:22.171947Z",
     "shell.execute_reply.started": "2024-05-11T13:28:07.658763Z"
    }
   },
   "outputs": [],
   "source": [
    "adatahvg.write_h5ad('/data1/usr/results/embedding/graphst_Zhuang-ABCA-2.030.h5ad')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b1f11acd-8b49-4f4e-83b7-68cf8d98c10f",
   "metadata": {},
   "source": [
    "# Loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c046b5bd-e900-4bb5-80ad-7d13bd81c9a3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:21.731305Z",
     "iopub.status.busy": "2024-05-13T07:22:21.730947Z",
     "iopub.status.idle": "2024-05-13T07:22:22.983984Z",
     "shell.execute_reply": "2024-05-13T07:22:22.983077Z",
     "shell.execute_reply.started": "2024-05-13T07:22:21.731286Z"
    }
   },
   "outputs": [],
   "source": [
    "annotationtable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "\n",
    "regiontable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "ccfv1 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8d7c60b4-809f-4807-85e0-9e179ca46ef2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:22.985422Z",
     "iopub.status.busy": "2024-05-13T07:22:22.985100Z",
     "iopub.status.idle": "2024-05-13T07:22:22.992319Z",
     "shell.execute_reply": "2024-05-13T07:22:22.991525Z",
     "shell.execute_reply.started": "2024-05-13T07:22:22.985402Z"
    }
   },
   "outputs": [],
   "source": [
    "datalist = [\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "# '/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6b32f6c-1ee4-4d33-86bd-02d8b1156a5a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-12T15:14:10.251055Z",
     "iopub.status.busy": "2024-05-12T15:14:10.250166Z",
     "iopub.status.idle": "2024-05-12T15:30:56.323462Z",
     "shell.execute_reply": "2024-05-12T15:30:56.322627Z",
     "shell.execute_reply.started": "2024-05-12T15:14:10.250992Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "    # define model\n",
    "    model = GraphST.GraphST(adatahvg, device=device)\n",
    "    # train model\n",
    "    adatahvg = model.train()\n",
    "    adatahvg.write_h5ad(f'/data1/usr/results/embedding/mouse2/graphst_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4085479-ff78-4c05-89b5-7087297661e1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T06:34:03.242082Z",
     "iopub.status.busy": "2024-05-13T06:34:03.241254Z",
     "iopub.status.idle": "2024-05-13T06:53:23.026961Z",
     "shell.execute_reply": "2024-05-13T06:53:23.025733Z",
     "shell.execute_reply.started": "2024-05-13T06:34:03.242018Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "    # define model\n",
    "    model = GraphST.GraphST(adatahvg, device=device)\n",
    "    # train model\n",
    "    adatahvg = model.train()\n",
    "    adatahvg.write_h5ad(f'/data1/usr/results/embedding/mouse2/graphst_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47ba44ef-c7ba-4f2b-8bae-658beb09ec7f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-05-13T07:22:25.861378Z",
     "iopub.status.busy": "2024-05-13T07:22:25.860546Z",
     "iopub.status.idle": "2024-05-13T08:54:04.805154Z",
     "shell.execute_reply": "2024-05-13T08:54:04.804006Z",
     "shell.execute_reply.started": "2024-05-13T07:22:25.861317Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "for name in datalist:\n",
    "    datasetname = name.split('/')[-1].split('.h5ad')[0]\n",
    "    adatahvg = sc.read_h5ad(name)\n",
    "    print(datasetname,name)\n",
    "    #filter\n",
    "    setidx = adatahvg.obs.index[adatahvg.obs.index.isin(ccfv1.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(f'{datasetname} no ccf')\n",
    "        continue\n",
    "    adatahvg = adatahvg[setidx].copy()\n",
    "    adatahvg.obs['parcellation_index']=ccfv1.loc[adatahvg.obs.index,'parcellation_index']\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=0]\n",
    "    adatahvg = adatahvg[adatahvg.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[adatahvg.obs.parcellation_index.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    query = annotation.loc[adatahvg.obs.cluster_alias.values,:]\n",
    "    query.index = adatahvg.obs.index\n",
    "    adatahvg.obs = pd.concat([adatahvg.obs, query],axis=1)\n",
    "    adatahvg.obs['region']=adatahvg.obs['structure'].astype(str)\n",
    "    adatahvg.obs.loc[adatahvg.obs['division']=='Isocortex','region']='Isocortex'\n",
    "    # define model\n",
    "    model = GraphST.GraphST(adatahvg, device=device)\n",
    "    # train model\n",
    "    adatahvg = model.train()\n",
    "    adatahvg.write_h5ad(f'/data2/usr/graphst_{datasetname}.h5ad')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pt2",
   "language": "python",
   "name": "pt2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
