{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1343b71e-4fe5-46da-835e-f6027f95a998",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:32:08.172903Z",
     "iopub.status.busy": "2024-06-04T13:32:08.172227Z",
     "iopub.status.idle": "2024-06-04T13:32:18.451831Z",
     "shell.execute_reply": "2024-06-04T13:32:18.450540Z",
     "shell.execute_reply.started": "2024-06-04T13:32:08.172838Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import tempfile\n",
    "import anndata\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import scanpy as sc\n",
    "import scvi\n",
    "import seaborn as sns\n",
    "import torch\n",
    "scvi.settings.seed = 0\n",
    "print(\"Last run with scvi-tools version:\", scvi.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "39ae5f1e-3b7c-4d05-bf10-2cedfacd9cec",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:32:18.463124Z",
     "iopub.status.busy": "2024-06-04T13:32:18.462631Z",
     "iopub.status.idle": "2024-06-04T13:32:18.491796Z",
     "shell.execute_reply": "2024-06-04T13:32:18.490521Z",
     "shell.execute_reply.started": "2024-06-04T13:32:18.463076Z"
    }
   },
   "outputs": [],
   "source": [
    "sc.set_figure_params(figsize=(6, 6), frameon=False)\n",
    "sns.set_theme()\n",
    "torch.set_float32_matmul_precision(\"high\")\n",
    "save_dir = '/data1/usr/results/annotation/scANVI/'\n",
    "\n",
    "%config InlineBackend.print_figure_kwargs={\"facecolor\": \"w\"}\n",
    "%config InlineBackend.figure_format=\"retina\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "22d800d6-0bfc-4f36-8180-338c309236fe",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:32:18.495114Z",
     "iopub.status.busy": "2024-06-04T13:32:18.494597Z",
     "iopub.status.idle": "2024-06-04T13:32:18.515708Z",
     "shell.execute_reply": "2024-06-04T13:32:18.514456Z",
     "shell.execute_reply.started": "2024-06-04T13:32:18.495067Z"
    }
   },
   "outputs": [],
   "source": [
    "data = [\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.001.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.002.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.003.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.004.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.005.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.006.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.007.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.008.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.009.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.010.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.011.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.012.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.013.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.014.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.015.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.016.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.017.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.018.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.019.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.020.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.021.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.022.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.023.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.024.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.025.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.026.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.027.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.028.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.029.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.030.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.038.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.043.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.064.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.068.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.069.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.073.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.074.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.076.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.077.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.078.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.079.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.080.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.081.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.082.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.083.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.084.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.085.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.086.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.087.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.088.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.089.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.090.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.091.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.092.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.093.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.095.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.096.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.097.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.098.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.099.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.100.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.101.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.102.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.103.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.104.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.105.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.106.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.107.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.108.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.109.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.110.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.111.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.113.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.114.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.115.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.116.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.117.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.118.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.119.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.120.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.121.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.122.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.123.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.124.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.125.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.126.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.127.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.128.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.129.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.130.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.131.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.132.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.133.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.134.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.135.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.136.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.137.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.138.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.139.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.140.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.141.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.142.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.143.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.144.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.145.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.146.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.147.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.148.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.149.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.150.h5ad',\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "eeea4fec-6928-4fd0-8621-7d353b498129",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:32:45.078153Z",
     "iopub.status.busy": "2024-06-04T13:32:45.077423Z",
     "iopub.status.idle": "2024-06-04T13:34:22.642379Z",
     "shell.execute_reply": "2024-06-04T13:34:22.640584Z",
     "shell.execute_reply.started": "2024-06-04T13:32:45.078094Z"
    }
   },
   "outputs": [],
   "source": [
    "adataall={}\n",
    "for name in data:\n",
    "    adata = sc.read_h5ad(name)\n",
    "    adataall[name]=adata\n",
    "\n",
    "adatamerge = sc.concat(adataall,label='source')\n",
    "\n",
    "annotationtable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "\n",
    "regiontable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "\n",
    "ccfv1 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A1ccf_coordinates.csv',index_col=0)\n",
    "\n",
    "#filter\n",
    "setidx = adatamerge.obs.index[adatamerge.obs.index.isin(ccfv1.index)]\n",
    "adatamerge = adatamerge[setidx].copy()\n",
    "adatamerge.obs['parcellation_index']=ccfv1.loc[adatamerge.obs.index,'parcellation_index']\n",
    "adatamerge = adatamerge[adatamerge.obs['parcellation_index'] !=0]\n",
    "adatamerge = adatamerge[adatamerge.obs['parcellation_index'] !=987]\n",
    "\n",
    "query = regionanno.loc[adatamerge.obs.parcellation_index.values,:]\n",
    "query.index = adatamerge.obs.index\n",
    "adatamerge.obs = pd.concat([adatamerge.obs, query],axis=1)\n",
    "query = annotation.loc[adatamerge.obs.cluster_alias.values,:]\n",
    "query.index = adatamerge.obs.index\n",
    "adatamerge.obs = pd.concat([adatamerge.obs, query],axis=1)\n",
    "\n",
    "subclass_color = dict(adatamerge.obs[['subclass','subclass_color']].drop_duplicates().values)\n",
    "class_color = dict(adatamerge.obs[['class','class_color']].drop_duplicates().values)\n",
    "supertypecolor = dict(adatamerge.obs[['supertype','supertype_color']].drop_duplicates().values)\n",
    "category_color = dict(adatamerge.obs[['category','category_color']].drop_duplicates().values)\n",
    "division_color = dict(adatamerge.obs[['division','division_color']].drop_duplicates().values)\n",
    "structure_color = dict(adatamerge.obs[['structure','structure_color']].drop_duplicates().values)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16066de0-61f1-496e-a64c-42be206827f4",
   "metadata": {},
   "source": [
    "# structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "595f0088-ff31-42a8-992d-42522c77a27d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:35:38.079424Z",
     "iopub.status.busy": "2024-06-04T13:35:38.079119Z",
     "iopub.status.idle": "2024-06-04T13:35:38.416746Z",
     "shell.execute_reply": "2024-06-04T13:35:38.415753Z",
     "shell.execute_reply.started": "2024-06-04T13:35:38.079401Z"
    }
   },
   "outputs": [],
   "source": [
    "scvi.model.SCVI.setup_anndata(adatamerge, batch_key=\"source\", layer=\"counts\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6a61c67-1c95-4807-9b8a-f2b434fb4e0f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:36:04.770098Z",
     "iopub.status.busy": "2024-06-04T13:36:04.769393Z",
     "iopub.status.idle": "2024-06-04T13:43:36.993795Z",
     "shell.execute_reply": "2024-06-04T13:43:36.993230Z",
     "shell.execute_reply.started": "2024-06-04T13:36:04.770051Z"
    }
   },
   "outputs": [],
   "source": [
    "scvi_ref = scvi.model.SCVI(\n",
    "    adatamerge,\n",
    "    use_layer_norm=\"both\",\n",
    "    use_batch_norm=\"none\",\n",
    "    encode_covariates=True,\n",
    "    dropout_rate=0.2,\n",
    "    n_layers=2,\n",
    ")\n",
    "scvi_ref.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f4712521-205f-490c-b341-990544f7369a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:43:36.995420Z",
     "iopub.status.busy": "2024-06-04T13:43:36.995140Z",
     "iopub.status.idle": "2024-06-04T13:43:37.139206Z",
     "shell.execute_reply": "2024-06-04T13:43:37.138618Z",
     "shell.execute_reply.started": "2024-06-04T13:43:36.995397Z"
    }
   },
   "outputs": [],
   "source": [
    "scvi_ref.save('/data1/usr/results/annotation/scANVI/',prefix='scvi', overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "9b117d8e-dd18-49cb-a2fb-0b61b6a53353",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:43:37.140225Z",
     "iopub.status.busy": "2024-06-04T13:43:37.139967Z",
     "iopub.status.idle": "2024-06-04T13:43:45.971362Z",
     "shell.execute_reply": "2024-06-04T13:43:45.970774Z",
     "shell.execute_reply.started": "2024-06-04T13:43:37.140206Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.from_scvi_model(\n",
    "    scvi_ref,\n",
    "    adata=adatamerge,\n",
    "    unlabeled_category=\"Unknown\",\n",
    "    labels_key='structure',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef6a3494-5f68-4fc9-a2d2-fe4ea53a20e0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:43:45.973047Z",
     "iopub.status.busy": "2024-06-04T13:43:45.972642Z",
     "iopub.status.idle": "2024-06-04T15:58:16.809532Z",
     "shell.execute_reply": "2024-06-04T15:58:16.808875Z",
     "shell.execute_reply.started": "2024-06-04T13:43:45.973026Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model.train(max_epochs=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "97dd17ae-d139-4b28-885d-651b99db4e0a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T15:58:16.810893Z",
     "iopub.status.busy": "2024-06-04T15:58:16.810593Z",
     "iopub.status.idle": "2024-06-04T15:58:16.972104Z",
     "shell.execute_reply": "2024-06-04T15:58:16.971494Z",
     "shell.execute_reply.started": "2024-06-04T15:58:16.810871Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model.save('/data1/usr/results/annotation/scANVI/',prefix='structure', overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "39c18327-234d-4a4c-aa74-d30e7ce2aac7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:54:52.808276Z",
     "iopub.status.busy": "2024-06-05T01:54:52.807443Z",
     "iopub.status.idle": "2024-06-05T01:55:03.509204Z",
     "shell.execute_reply": "2024-06-05T01:55:03.507965Z",
     "shell.execute_reply.started": "2024-06-05T01:54:52.808215Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.load('/data1/usr/results/annotation/scANVI/',prefix='structure',adata=adatamerge,accelerator='gpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "97525e29-b3b3-464c-983f-0f593d275fe1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:55:16.569853Z",
     "iopub.status.busy": "2024-06-05T01:55:16.569062Z",
     "iopub.status.idle": "2024-06-05T01:55:17.304184Z",
     "shell.execute_reply": "2024-06-05T01:55:17.303182Z",
     "shell.execute_reply.started": "2024-06-05T01:55:16.569791Z"
    }
   },
   "outputs": [],
   "source": [
    "test = sc.read_h5ad('/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad')\n",
    "cntmtx = np.exp2(test.X)-1\n",
    "min_nonzero_values = np.ma.masked_equal(cntmtx, 0).min(axis=1, keepdims=True)\n",
    "result = np.divide(cntmtx, min_nonzero_values, where=min_nonzero_values != 0)\n",
    "test.layers['counts']=result.data.astype(int)\n",
    "test.obs['source']='test'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "56b2a6ec-4201-48f7-b9f2-f3da1a069620",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:55:17.305715Z",
     "iopub.status.busy": "2024-06-05T01:55:17.305401Z",
     "iopub.status.idle": "2024-06-05T01:55:19.122104Z",
     "shell.execute_reply": "2024-06-05T01:55:19.121043Z",
     "shell.execute_reply.started": "2024-06-05T01:55:17.305695Z"
    }
   },
   "outputs": [],
   "source": [
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "#filter\n",
    "setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "test = test[setidx].copy()\n",
    "test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "test = test[test.obs['parcellation_index'] !=0]\n",
    "test = test[test.obs['parcellation_index'] !=987]\n",
    "\n",
    "query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "query = annotation.loc[test.obs.cluster_alias.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e2028bf-faa9-4e2f-8ec5-a5bb9dfbf321",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:55:20.058713Z",
     "iopub.status.busy": "2024-06-05T01:55:20.058335Z",
     "iopub.status.idle": "2024-06-05T01:55:20.073276Z",
     "shell.execute_reply": "2024-06-05T01:55:20.072576Z",
     "shell.execute_reply.started": "2024-06-05T01:55:20.058692Z"
    }
   },
   "outputs": [],
   "source": [
    "scvi.model.SCANVI.prepare_query_anndata(test, scanvi_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "801e6249-b7aa-4b73-834a-6fdd9a765542",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:55:22.105431Z",
     "iopub.status.busy": "2024-06-05T01:55:22.104722Z",
     "iopub.status.idle": "2024-06-05T01:55:22.327837Z",
     "shell.execute_reply": "2024-06-05T01:55:22.326919Z",
     "shell.execute_reply.started": "2024-06-05T01:55:22.105373Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_query = scvi.model.SCANVI.load_query_data(test, scanvi_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a8766eb-4440-47bc-aaa6-992b3bdd10d3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:55:23.756139Z",
     "iopub.status.busy": "2024-06-05T01:55:23.755795Z",
     "iopub.status.idle": "2024-06-05T02:10:01.602862Z",
     "shell.execute_reply": "2024-06-05T02:10:01.601945Z",
     "shell.execute_reply.started": "2024-06-05T01:55:23.756118Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_query.train(\n",
    "    max_epochs=100,\n",
    "    plan_kwargs={\"weight_decay\": 0.0},\n",
    "    check_val_every_n_epoch=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "6fb04778-cf75-4278-af96-2beaf0de965a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:10:01.604715Z",
     "iopub.status.busy": "2024-06-05T02:10:01.604336Z",
     "iopub.status.idle": "2024-06-05T02:10:02.325477Z",
     "shell.execute_reply": "2024-06-05T02:10:02.324526Z",
     "shell.execute_reply.started": "2024-06-05T02:10:01.604691Z"
    }
   },
   "outputs": [],
   "source": [
    "test.obs['scANVI_predictions'] = scanvi_query.predict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "2865235d-c4e2-4015-a9a0-7ab5b8f9dd93",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:10:02.326741Z",
     "iopub.status.busy": "2024-06-05T02:10:02.326420Z",
     "iopub.status.idle": "2024-06-05T02:10:02.330744Z",
     "shell.execute_reply": "2024-06-05T02:10:02.329927Z",
     "shell.execute_reply.started": "2024-06-05T02:10:02.326722Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ad38cb7-e91f-4b97-9822-85aa2de14b41",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:10:02.332217Z",
     "iopub.status.busy": "2024-06-05T02:10:02.331940Z",
     "iopub.status.idle": "2024-06-05T02:10:03.257720Z",
     "shell.execute_reply": "2024-06-05T02:10:03.256997Z",
     "shell.execute_reply.started": "2024-06-05T02:10:02.332193Z"
    }
   },
   "outputs": [],
   "source": [
    "print(classification_report(test.obs['structure'],test.obs['scANVI_predictions']))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a921ff7-211d-4c52-9ffd-6a4969843928",
   "metadata": {},
   "source": [
    "# division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c40fe699-8615-42bb-9542-de928d363fbd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:13:21.895648Z",
     "iopub.status.busy": "2024-06-05T02:13:21.895243Z",
     "iopub.status.idle": "2024-06-05T02:13:30.872194Z",
     "shell.execute_reply": "2024-06-05T02:13:30.871554Z",
     "shell.execute_reply.started": "2024-06-05T02:13:21.895620Z"
    }
   },
   "outputs": [],
   "source": [
    "scvi_ref = scvi.model.SCVI.load('/data1/usr/results/annotation/scANVI/',prefix='scvi',adata=adatamerge,accelerator='gpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "f8153ce1-dba5-4166-8446-a3c585379b33",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:13:30.873664Z",
     "iopub.status.busy": "2024-06-05T02:13:30.873324Z",
     "iopub.status.idle": "2024-06-05T02:13:37.387459Z",
     "shell.execute_reply": "2024-06-05T02:13:37.386634Z",
     "shell.execute_reply.started": "2024-06-05T02:13:30.873643Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.from_scvi_model(\n",
    "    scvi_ref,\n",
    "    adata=adatamerge,\n",
    "    unlabeled_category=\"Unknown\",\n",
    "    labels_key='division',\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "246c1ff8-229d-4cf4-ba36-46029811080b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:13:37.388941Z",
     "iopub.status.busy": "2024-06-05T02:13:37.388560Z",
     "iopub.status.idle": "2024-06-05T03:24:47.069171Z",
     "shell.execute_reply": "2024-06-05T03:24:47.068603Z",
     "shell.execute_reply.started": "2024-06-05T02:13:37.388918Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model.train(max_epochs=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "7c0e3493-38a5-4db3-8009-acf67c087037",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T03:24:47.071097Z",
     "iopub.status.busy": "2024-06-05T03:24:47.070707Z",
     "iopub.status.idle": "2024-06-05T03:24:47.237668Z",
     "shell.execute_reply": "2024-06-05T03:24:47.236974Z",
     "shell.execute_reply.started": "2024-06-05T03:24:47.071074Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model.save('/data1/usr/results/annotation/scANVI/',prefix='division', overwrite=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4ce3130-b460-4827-a564-a2616fa8742a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T03:24:47.238829Z",
     "iopub.status.busy": "2024-06-05T03:24:47.238519Z",
     "iopub.status.idle": "2024-06-05T03:24:57.763887Z",
     "shell.execute_reply": "2024-06-05T03:24:57.762652Z",
     "shell.execute_reply.started": "2024-06-05T03:24:47.238809Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.load('/data1/usr/results/annotation/scANVI/',prefix='division',adata=adatamerge,accelerator='gpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "951f957b-8c7e-4cb2-bb12-6efa736ad492",
   "metadata": {},
   "outputs": [],
   "source": [
    "test = sc.read_h5ad('/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad')\n",
    "cntmtx = np.exp2(test.X)-1\n",
    "min_nonzero_values = np.ma.masked_equal(cntmtx, 0).min(axis=1, keepdims=True)\n",
    "result = np.divide(cntmtx, min_nonzero_values, where=min_nonzero_values != 0)\n",
    "test.layers['counts']=result.data.astype(int)\n",
    "query = annotation.loc[test.obs.cluster_alias.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "test.obs['source']='new'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "758942b4-f4fb-43ee-ba98-12dd14277df1",
   "metadata": {},
   "outputs": [],
   "source": [
    "scvi.model.SCANVI.prepare_query_anndata(test, scanvi_model)\n",
    "scanvi_query = scvi.model.SCANVI.load_query_data(test, scanvi_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d5cca63-8e53-40ea-953c-b1f3e38aeed8",
   "metadata": {},
   "outputs": [],
   "source": [
    "scanvi_query.train(\n",
    "    max_epochs=20,\n",
    "    plan_kwargs={\"weight_decay\": 0.0},\n",
    "    check_val_every_n_epoch=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3fd9f0b-7446-4a36-9c48-73053d908b4e",
   "metadata": {},
   "outputs": [],
   "source": [
    "test.obs['scANVI_predictions'] = scanvi_query.predict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1db69516-c01a-43b7-9513-c3b99f3dde86",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d85d3fc-83fc-4983-b1eb-46d72be71ede",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "print(classification_report(test.obs['supertype'],test.obs['scANVI_predictions']))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "04dfe398-61c0-4550-8b9c-2e4097dc68bf",
   "metadata": {},
   "source": [
    "# Predict Loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "0d1eccc0-70be-430f-9aea-a7e2477138e2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T08:52:06.534293Z",
     "iopub.status.busy": "2024-06-05T08:52:06.533337Z",
     "iopub.status.idle": "2024-06-05T08:52:06.548851Z",
     "shell.execute_reply": "2024-06-05T08:52:06.547869Z",
     "shell.execute_reply.started": "2024-06-05T08:52:06.534234Z"
    }
   },
   "outputs": [],
   "source": [
    "datalist = ['/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a1ac72a-c0be-42a3-b454-7da453f7bd08",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T08:52:08.224653Z",
     "iopub.status.busy": "2024-06-05T08:52:08.224395Z",
     "iopub.status.idle": "2024-06-05T08:52:18.887701Z",
     "shell.execute_reply": "2024-06-05T08:52:18.886698Z",
     "shell.execute_reply.started": "2024-06-05T08:52:08.224635Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.load('/data1/usr/results/annotation/scANVI/',prefix='structure',adata=adatamerge,accelerator='gpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3bed73f1-a885-4b16-ba19-fa2eecbdde6a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T08:55:36.062057Z",
     "iopub.status.busy": "2024-06-05T08:55:36.061324Z",
     "iopub.status.idle": "2024-06-05T09:48:20.327511Z",
     "shell.execute_reply": "2024-06-05T09:48:20.326848Z",
     "shell.execute_reply.started": "2024-06-05T08:55:36.061997Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# dataname = datalist[0]\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "for dataname in datalist:\n",
    "    dname = dataname.split('Sgeneration/')[-1]\n",
    "    savepath = f'/data1/usr/results/annotation/scANVI/mouse2_structure/{dname[:-5]}.csv'\n",
    "    # if os.path.exists(savepath):\n",
    "    #     print(f'{dataname} exist')\n",
    "    #     continue\n",
    "    test = sc.read_h5ad(dataname)\n",
    "\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "\n",
    "    cntmtx = np.exp2(test.X)-1\n",
    "    min_nonzero_values = np.ma.masked_equal(cntmtx, 0).min(axis=1, keepdims=True)\n",
    "    result = np.divide(cntmtx, min_nonzero_values, where=min_nonzero_values != 0)\n",
    "    test.layers['counts']=result.data.astype(int)\n",
    "    \n",
    "    test.obs['source']='new'\n",
    "    test.obs_names_make_unique()\n",
    "    scvi.model.SCANVI.prepare_query_anndata(test, scanvi_model)\n",
    "    scanvi_query = scvi.model.SCANVI.load_query_data(test, scanvi_model)\n",
    "\n",
    "    scanvi_query.train(\n",
    "        max_epochs=10,\n",
    "        plan_kwargs={\"weight_decay\": 0.0},\n",
    "    )\n",
    "    test.obs['scANVI_predictions'] = scanvi_query.predict()\n",
    "    structuredf = pd.DataFrame(classification_report(test.obs['structure'],test.obs['scANVI_predictions'],output_dict=True)).T    \n",
    "    structuredf.to_csv(savepath)\n",
    "    scanvi_query.predict(soft=True).to_csv(f'/data1/usr/results/annotation/scANVI/mouse2_structure/{dname[:-5]}_pred.csv')\n",
    "    print(savepath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a742b2e9-e599-488f-af1a-77c0c3e1667e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T09:48:20.329332Z",
     "iopub.status.busy": "2024-06-05T09:48:20.328918Z",
     "iopub.status.idle": "2024-06-05T09:48:31.319104Z",
     "shell.execute_reply": "2024-06-05T09:48:31.318256Z",
     "shell.execute_reply.started": "2024-06-05T09:48:20.329309Z"
    }
   },
   "outputs": [],
   "source": [
    "scanvi_model = scvi.model.SCANVI.load('/data1/usr/results/annotation/scANVI/',prefix='division',adata=adatamerge,accelerator='gpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6d41a55-9007-4211-b555-9069f0fe40a2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T09:48:31.320613Z",
     "iopub.status.busy": "2024-06-05T09:48:31.320242Z",
     "iopub.status.idle": "2024-06-05T10:17:08.824190Z",
     "shell.execute_reply": "2024-06-05T10:17:08.823165Z",
     "shell.execute_reply.started": "2024-06-05T09:48:31.320588Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# dataname = datalist[0]\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "for dataname in datalist:\n",
    "    dname = dataname.split('Sgeneration/')[-1]\n",
    "    savepath = f'/data1/usr/results/annotation/scANVI/mouse2_division/{dname[:-5]}.csv'\n",
    "    # if os.path.exists(savepath):\n",
    "    #     print(f'{dataname} exist')\n",
    "    #     continue\n",
    "    test = sc.read_h5ad(dataname)\n",
    "\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "\n",
    "    cntmtx = np.exp2(test.X)-1\n",
    "    min_nonzero_values = np.ma.masked_equal(cntmtx, 0).min(axis=1, keepdims=True)\n",
    "    result = np.divide(cntmtx, min_nonzero_values, where=min_nonzero_values != 0)\n",
    "    test.layers['counts']=result.data.astype(int)\n",
    "    \n",
    "    test.obs['source']='new'\n",
    "    test.obs_names_make_unique()\n",
    "    scvi.model.SCANVI.prepare_query_anndata(test, scanvi_model)\n",
    "    scanvi_query = scvi.model.SCANVI.load_query_data(test, scanvi_model)\n",
    "\n",
    "    scanvi_query.train(\n",
    "        max_epochs=10,\n",
    "        plan_kwargs={\"weight_decay\": 0.0},\n",
    "    )\n",
    "    test.obs['scANVI_predictions'] = scanvi_query.predict()\n",
    "    divisiondf = pd.DataFrame(classification_report(test.obs['division'],test.obs['scANVI_predictions'],output_dict=True)).T    \n",
    "    divisiondf.to_csv(savepath)\n",
    "    scanvi_query.predict(soft=True).to_csv(f'/data1/usr/results/annotation/scANVI/mouse2_division/{dname[:-5]}_pred.csv')\n",
    "    print(savepath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "07fc47c2-f849-46ad-b23a-aedd91cc3ac9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T11:36:08.319411Z",
     "iopub.status.busy": "2024-06-05T11:36:08.318186Z",
     "iopub.status.idle": "2024-06-05T11:37:41.562366Z",
     "shell.execute_reply": "2024-06-05T11:37:41.561226Z",
     "shell.execute_reply.started": "2024-06-05T11:36:08.319340Z"
    }
   },
   "outputs": [],
   "source": [
    "data = [\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.001.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.002.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.003.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.004.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.005.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.006.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.007.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.008.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.009.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.010.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.011.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.012.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.013.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.014.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.015.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.016.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.017.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.018.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.019.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.020.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.021.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.022.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.023.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.024.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.025.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.026.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.027.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.028.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.029.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.030.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.038.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.043.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.064.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.068.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.069.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.073.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.074.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.076.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.077.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.078.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.079.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.080.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.081.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.082.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.083.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.084.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.085.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.086.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.087.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.088.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.089.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.090.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.091.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.092.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.093.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.095.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.096.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.097.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.098.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.099.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.100.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.101.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.102.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.103.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.104.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.105.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.106.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.107.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.108.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.109.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.110.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.111.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.113.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.114.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.115.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.116.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.117.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.118.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.119.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.120.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.121.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.122.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.123.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.124.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.125.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.126.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.127.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.128.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.129.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.130.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.131.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.132.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.133.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.134.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.135.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.136.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.137.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.138.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.139.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.140.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.141.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.142.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.143.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.144.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.145.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.146.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.147.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.148.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.149.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.150.h5ad',\n",
    "]\n",
    "\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A1ccf_coordinates.csv',index_col=0)\n",
    "for dataname in data:\n",
    "    dname = dataname.split('Sgeneration/MERFISH/')[-1]\n",
    "    savepath = f'/data1/usr/results/annotation/traindata_structure/{dname[:-5]}.h5ad'\n",
    "    test = sc.read_h5ad(dataname)\n",
    "\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "    test.write_h5ad(savepath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4e727b68-47e0-433e-a90c-2fd064868496",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T11:40:16.737786Z",
     "iopub.status.busy": "2024-06-05T11:40:16.736969Z",
     "iopub.status.idle": "2024-06-05T11:40:16.836247Z",
     "shell.execute_reply": "2024-06-05T11:40:16.835143Z",
     "shell.execute_reply.started": "2024-06-05T11:40:16.737731Z"
    }
   },
   "outputs": [],
   "source": [
    "for x in sorted(os.listdir('/data1/usr/results/annotation/traindata_structure/')):\n",
    "    print('- /data1/usr/results/annotation/traindata_structure/'+x)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "niche",
   "language": "python",
   "name": "niche"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
