{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1343b71e-4fe5-46da-835e-f6027f95a998",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:22:05.419743Z",
     "iopub.status.busy": "2024-06-04T13:22:05.419327Z",
     "iopub.status.idle": "2024-06-04T13:22:09.303115Z",
     "shell.execute_reply": "2024-06-04T13:22:09.302159Z",
     "shell.execute_reply.started": "2024-06-04T13:22:05.419717Z"
    }
   },
   "outputs": [],
   "source": [
    "%pylab inline\n",
    "import celltypist\n",
    "import pandas as pd\n",
    "import scanpy as sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "22d800d6-0bfc-4f36-8180-338c309236fe",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:22:09.305107Z",
     "iopub.status.busy": "2024-06-04T13:22:09.304541Z",
     "iopub.status.idle": "2024-06-04T13:22:09.315752Z",
     "shell.execute_reply": "2024-06-04T13:22:09.314780Z",
     "shell.execute_reply.started": "2024-06-04T13:22:09.305083Z"
    }
   },
   "outputs": [],
   "source": [
    "data = [\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.001.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.002.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.003.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.004.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.005.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.006.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.007.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.008.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.009.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.010.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.011.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.012.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.013.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.014.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.015.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.016.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.017.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.018.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.019.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.020.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.021.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.022.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.023.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.024.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.025.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.026.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.027.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.028.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.029.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.030.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.038.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.043.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.064.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.068.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.069.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.073.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.074.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.076.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.077.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.078.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.079.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.080.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.081.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.082.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.083.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.084.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.085.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.086.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.087.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.088.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.089.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.090.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.091.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.092.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.093.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.095.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.096.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.097.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.098.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.099.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.100.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.101.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.102.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.103.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.104.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.105.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.106.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.107.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.108.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.109.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.110.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.111.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.113.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.114.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.115.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.116.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.117.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.118.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.119.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.120.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.121.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.122.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.123.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.124.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.125.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.126.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.127.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.128.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.129.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.130.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.131.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.132.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.133.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.134.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.135.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.136.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.137.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.138.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.139.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.140.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.141.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.142.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.143.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.144.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.145.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.146.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.147.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.148.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.149.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-1.150.h5ad',\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0c3af05e-7cdb-4e43-9d70-6ad144ca9cd6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:25:22.795372Z",
     "iopub.status.busy": "2024-06-04T13:25:22.794541Z",
     "iopub.status.idle": "2024-06-04T13:25:43.339460Z",
     "shell.execute_reply": "2024-06-04T13:25:43.338093Z",
     "shell.execute_reply.started": "2024-06-04T13:25:22.795307Z"
    }
   },
   "outputs": [],
   "source": [
    "adataall={}\n",
    "for name in data:\n",
    "    adata = sc.read_h5ad(name)\n",
    "    adataall[name]=adata\n",
    "\n",
    "adatamerge = sc.concat(adataall,label='source')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "4b554c9b-f3da-4c60-a7c3-4841c7fa9edb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:26:20.284655Z",
     "iopub.status.busy": "2024-06-04T13:26:20.283894Z",
     "iopub.status.idle": "2024-06-04T13:26:35.740591Z",
     "shell.execute_reply": "2024-06-04T13:26:35.738368Z",
     "shell.execute_reply.started": "2024-06-04T13:26:20.284594Z"
    }
   },
   "outputs": [],
   "source": [
    "annotationtable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "\n",
    "regiontable = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "\n",
    "ccfv1 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A1ccf_coordinates.csv',index_col=0)\n",
    "\n",
    "#filter\n",
    "setidx = adatamerge.obs.index[adatamerge.obs.index.isin(ccfv1.index)]\n",
    "adatamerge = adatamerge[setidx].copy()\n",
    "adatamerge.obs['parcellation_index']=ccfv1.loc[adatamerge.obs.index,'parcellation_index']\n",
    "adatamerge = adatamerge[adatamerge.obs['parcellation_index'] !=0]\n",
    "adatamerge = adatamerge[adatamerge.obs['parcellation_index'] !=987]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "58aaeef5-6df8-4352-965c-1b21660fc2ea",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:26:35.744177Z",
     "iopub.status.busy": "2024-06-04T13:26:35.743713Z",
     "iopub.status.idle": "2024-06-04T13:26:48.134050Z",
     "shell.execute_reply": "2024-06-04T13:26:48.133109Z",
     "shell.execute_reply.started": "2024-06-04T13:26:35.744143Z"
    }
   },
   "outputs": [],
   "source": [
    "query = regionanno.loc[adatamerge.obs.parcellation_index.values,:]\n",
    "query.index = adatamerge.obs.index\n",
    "adatamerge.obs = pd.concat([adatamerge.obs, query],axis=1)\n",
    "query = annotation.loc[adatamerge.obs.cluster_alias.values,:]\n",
    "query.index = adatamerge.obs.index\n",
    "adatamerge.obs = pd.concat([adatamerge.obs, query],axis=1)\n",
    "\n",
    "subclass_color = dict(adatamerge.obs[['subclass','subclass_color']].drop_duplicates().values)\n",
    "class_color = dict(adatamerge.obs[['class','class_color']].drop_duplicates().values)\n",
    "supertypecolor = dict(adatamerge.obs[['supertype','supertype_color']].drop_duplicates().values)\n",
    "category_color = dict(adatamerge.obs[['category','category_color']].drop_duplicates().values)\n",
    "division_color = dict(adatamerge.obs[['division','division_color']].drop_duplicates().values)\n",
    "structure_color = dict(adatamerge.obs[['structure','structure_color']].drop_duplicates().values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "8cf42d1e-9d64-4d3a-83f4-74a1151e6cbd",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:27:15.789413Z",
     "iopub.status.busy": "2024-06-04T13:27:15.788645Z",
     "iopub.status.idle": "2024-06-04T13:27:30.186125Z",
     "shell.execute_reply": "2024-06-04T13:27:30.185138Z",
     "shell.execute_reply.started": "2024-06-04T13:27:15.789353Z"
    }
   },
   "outputs": [],
   "source": [
    "normexp = (np.exp2(adatamerge.X)-1)/208*1e4\n",
    "\n",
    "adatamerge1e4 = adatamerge.copy()\n",
    "adatamerge1e4.X = np.log1p(normexp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "628eb32f-93c1-4ee4-890c-4102d1430b77",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:28:25.315795Z",
     "iopub.status.busy": "2024-06-04T13:28:25.315102Z",
     "iopub.status.idle": "2024-06-04T13:28:25.457578Z",
     "shell.execute_reply": "2024-06-04T13:28:25.456771Z",
     "shell.execute_reply.started": "2024-06-04T13:28:25.315737Z"
    }
   },
   "outputs": [],
   "source": [
    "len(adatamerge1e4.obs['structure'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7956aae-5223-4e8e-a281-b69a527223ac",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T13:28:38.198998Z",
     "iopub.status.busy": "2024-06-04T13:28:38.198249Z",
     "iopub.status.idle": "2024-06-04T15:38:01.099280Z",
     "shell.execute_reply": "2024-06-04T15:38:01.098285Z",
     "shell.execute_reply.started": "2024-06-04T13:28:38.198936Z"
    }
   },
   "outputs": [],
   "source": [
    "new_model = celltypist.train(adatamerge1e4, labels = 'structure', n_jobs = 100, feature_selection = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "595f0088-ff31-42a8-992d-42522c77a27d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T15:38:01.101420Z",
     "iopub.status.busy": "2024-06-04T15:38:01.100981Z",
     "iopub.status.idle": "2024-06-04T15:38:01.111482Z",
     "shell.execute_reply": "2024-06-04T15:38:01.110560Z",
     "shell.execute_reply.started": "2024-06-04T15:38:01.101394Z"
    }
   },
   "outputs": [],
   "source": [
    "new_model.write('/data1/usr/results/annotation/celltypist/ms1model_structure.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "42c14484-75b3-4beb-9861-81769722a7d3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T15:38:01.112936Z",
     "iopub.status.busy": "2024-06-04T15:38:01.112598Z",
     "iopub.status.idle": "2024-06-04T16:33:17.237937Z",
     "shell.execute_reply": "2024-06-04T16:33:17.236168Z",
     "shell.execute_reply.started": "2024-06-04T15:38:01.112912Z"
    }
   },
   "outputs": [],
   "source": [
    "new_model = celltypist.train(adatamerge1e4, labels = 'division', n_jobs = 100, feature_selection = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "ea2609f3-4065-44c2-ba34-8326c893618b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-04T16:33:17.241124Z",
     "iopub.status.busy": "2024-06-04T16:33:17.240583Z",
     "iopub.status.idle": "2024-06-04T16:33:17.248828Z",
     "shell.execute_reply": "2024-06-04T16:33:17.247243Z",
     "shell.execute_reply.started": "2024-06-04T16:33:17.241098Z"
    }
   },
   "outputs": [],
   "source": [
    "new_model.write('/data1/usr/results/annotation/celltypist/ms1model_division.pkl')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fdad0c9e-b8b9-47f2-9987-91ddc18045bd",
   "metadata": {},
   "source": [
    "# structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "57389db8-291d-423f-8d5d-566fffb4aadc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:32:56.048936Z",
     "iopub.status.busy": "2024-06-05T01:32:56.048162Z",
     "iopub.status.idle": "2024-06-05T01:32:56.058636Z",
     "shell.execute_reply": "2024-06-05T01:32:56.057821Z",
     "shell.execute_reply.started": "2024-06-05T01:32:56.048872Z"
    }
   },
   "outputs": [],
   "source": [
    "from celltypist import models\n",
    "new_model = models.Model.load('/data1/usr/results/annotation/celltypist/ms1model_structure.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "b4e36242-61bc-4b36-bd25-19e6099fa776",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:34:19.294917Z",
     "iopub.status.busy": "2024-06-05T01:34:19.294105Z",
     "iopub.status.idle": "2024-06-05T01:34:21.269124Z",
     "shell.execute_reply": "2024-06-05T01:34:21.268187Z",
     "shell.execute_reply.started": "2024-06-05T01:34:19.294854Z"
    }
   },
   "outputs": [],
   "source": [
    "test = sc.read_h5ad('/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad')\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "#filter\n",
    "setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "test = test[setidx].copy()\n",
    "test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "test = test[test.obs['parcellation_index'] !=0]\n",
    "test = test[test.obs['parcellation_index'] !=987]\n",
    "\n",
    "query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "\n",
    "normexp = (np.exp2(test.X)-1)/208*1e4\n",
    "test1e4 = test.copy()\n",
    "test1e4.X = np.log1p(normexp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fb04778-cf75-4278-af96-2beaf0de965a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:34:31.672732Z",
     "iopub.status.busy": "2024-06-05T01:34:31.672417Z",
     "iopub.status.idle": "2024-06-05T01:34:32.337337Z",
     "shell.execute_reply": "2024-06-05T01:34:32.336824Z",
     "shell.execute_reply.started": "2024-06-05T01:34:31.672710Z"
    }
   },
   "outputs": [],
   "source": [
    "predictions = celltypist.annotate(test1e4, model = new_model, majority_voting = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "2865235d-c4e2-4015-a9a0-7ab5b8f9dd93",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:34:33.951233Z",
     "iopub.status.busy": "2024-06-05T01:34:33.950910Z",
     "iopub.status.idle": "2024-06-05T01:34:33.954479Z",
     "shell.execute_reply": "2024-06-05T01:34:33.953815Z",
     "shell.execute_reply.started": "2024-06-05T01:34:33.951211Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ad38cb7-e91f-4b97-9822-85aa2de14b41",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:35:11.772635Z",
     "iopub.status.busy": "2024-06-05T01:35:11.772282Z",
     "iopub.status.idle": "2024-06-05T01:35:12.644337Z",
     "shell.execute_reply": "2024-06-05T01:35:12.643625Z",
     "shell.execute_reply.started": "2024-06-05T01:35:11.772613Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "print(classification_report(test1e4.obs['structure'],predictions.predicted_labels.predicted_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "90d9f08f-1132-4dff-b3f8-b0e890b45e2a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:36:15.063344Z",
     "iopub.status.busy": "2024-06-05T01:36:15.062557Z",
     "iopub.status.idle": "2024-06-05T01:36:15.071396Z",
     "shell.execute_reply": "2024-06-05T01:36:15.069852Z",
     "shell.execute_reply.started": "2024-06-05T01:36:15.063279Z"
    }
   },
   "outputs": [],
   "source": [
    "test1e4.obs['prediction'] = predictions.predicted_labels.predicted_labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd8b6f34-af90-4333-8f77-d568a2be01b3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:39:59.363019Z",
     "iopub.status.busy": "2024-06-05T01:39:59.362242Z",
     "iopub.status.idle": "2024-06-05T01:40:02.902435Z",
     "shell.execute_reply": "2024-06-05T01:40:02.901540Z",
     "shell.execute_reply.started": "2024-06-05T01:39:59.362957Z"
    }
   },
   "outputs": [],
   "source": [
    "sc.pl.spatial(test1e4,color=['structure'],spot_size=0.03,palette=structure_color)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6afc4379-8079-4091-97ae-471befbfbbc7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:39:32.963939Z",
     "iopub.status.busy": "2024-06-05T01:39:32.963548Z",
     "iopub.status.idle": "2024-06-05T01:39:39.322611Z",
     "shell.execute_reply": "2024-06-05T01:39:39.321763Z",
     "shell.execute_reply.started": "2024-06-05T01:39:32.963912Z"
    }
   },
   "outputs": [],
   "source": [
    "sc.pl.spatial(test1e4,color='prediction',spot_size=0.03,palette=structure_color)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c199ccc3-9c3b-466a-9773-f07d41646121",
   "metadata": {},
   "source": [
    "# division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "227cd67b-d630-44d8-a61e-76c34c165a3e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:40:45.874243Z",
     "iopub.status.busy": "2024-06-05T01:40:45.873547Z",
     "iopub.status.idle": "2024-06-05T01:40:45.882014Z",
     "shell.execute_reply": "2024-06-05T01:40:45.880671Z",
     "shell.execute_reply.started": "2024-06-05T01:40:45.874182Z"
    }
   },
   "outputs": [],
   "source": [
    "from celltypist import models\n",
    "new_model = models.Model.load('/data1/usr/results/annotation/celltypist/ms1model_division.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "4c4bee3f-348d-4556-a26e-197e80d44be9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:40:53.968441Z",
     "iopub.status.busy": "2024-06-05T01:40:53.967727Z",
     "iopub.status.idle": "2024-06-05T01:40:55.886444Z",
     "shell.execute_reply": "2024-06-05T01:40:55.885491Z",
     "shell.execute_reply.started": "2024-06-05T01:40:53.968379Z"
    }
   },
   "outputs": [],
   "source": [
    "test = sc.read_h5ad('/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad')\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "#filter\n",
    "setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "test = test[setidx].copy()\n",
    "test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "test = test[test.obs['parcellation_index'] !=0]\n",
    "test = test[test.obs['parcellation_index'] !=987]\n",
    "\n",
    "query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "\n",
    "normexp = (np.exp2(test.X)-1)/208*1e4\n",
    "test1e4 = test.copy()\n",
    "test1e4.X = np.log1p(normexp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "186b27c3-8f2a-4caf-9375-5b5ed023fb94",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:41:03.335353Z",
     "iopub.status.busy": "2024-06-05T01:41:03.335042Z",
     "iopub.status.idle": "2024-06-05T01:41:03.703176Z",
     "shell.execute_reply": "2024-06-05T01:41:03.702546Z",
     "shell.execute_reply.started": "2024-06-05T01:41:03.335329Z"
    }
   },
   "outputs": [],
   "source": [
    "predictions = celltypist.annotate(test1e4, model = new_model, majority_voting = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "8c847958-582a-47a6-8d08-54cc3a48dd95",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:41:07.674352Z",
     "iopub.status.busy": "2024-06-05T01:41:07.674034Z",
     "iopub.status.idle": "2024-06-05T01:41:07.678134Z",
     "shell.execute_reply": "2024-06-05T01:41:07.677283Z",
     "shell.execute_reply.started": "2024-06-05T01:41:07.674329Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0c5227ed-b724-4ecb-a56c-9a4e83554d4d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:41:24.513830Z",
     "iopub.status.busy": "2024-06-05T01:41:24.513055Z",
     "iopub.status.idle": "2024-06-05T01:41:25.302631Z",
     "shell.execute_reply": "2024-06-05T01:41:25.301753Z",
     "shell.execute_reply.started": "2024-06-05T01:41:24.513766Z"
    }
   },
   "outputs": [],
   "source": [
    "print(classification_report(test1e4.obs['division'],predictions.predicted_labels.predicted_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a00263-b85d-4b1f-844e-4156d7921ec0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:42:03.535091Z",
     "iopub.status.busy": "2024-06-05T01:42:03.534794Z",
     "iopub.status.idle": "2024-06-05T01:42:06.304271Z",
     "shell.execute_reply": "2024-06-05T01:42:06.303367Z",
     "shell.execute_reply.started": "2024-06-05T01:42:03.535068Z"
    }
   },
   "outputs": [],
   "source": [
    "sc.pl.spatial(test1e4,color=['division'],spot_size=0.03,palette=division_color)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5ea1bd3d-d8bc-40ef-bfef-ad4aa861277c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:42:21.578487Z",
     "iopub.status.busy": "2024-06-05T01:42:21.577732Z",
     "iopub.status.idle": "2024-06-05T01:42:24.332137Z",
     "shell.execute_reply": "2024-06-05T01:42:24.331247Z",
     "shell.execute_reply.started": "2024-06-05T01:42:21.578423Z"
    }
   },
   "outputs": [],
   "source": [
    "test1e4.obs['prediction'] = predictions.predicted_labels.predicted_labels\n",
    "sc.pl.spatial(test1e4,color='prediction',spot_size=0.03,palette=division_color)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d1ab108b-3547-43bf-b4f5-abd827d88c17",
   "metadata": {},
   "source": [
    "# Mouse2Loop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "3009b35a-8815-4288-a976-bf9de9dfa374",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T01:57:59.904046Z",
     "iopub.status.busy": "2024-06-05T01:57:59.903200Z",
     "iopub.status.idle": "2024-06-05T01:57:59.918039Z",
     "shell.execute_reply": "2024-06-05T01:57:59.916965Z",
     "shell.execute_reply.started": "2024-06-05T01:57:59.903980Z"
    }
   },
   "outputs": [],
   "source": [
    "datalist = ['/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/data2/usr/Sgeneration/MERFISH/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25ff90da-903d-4a4b-8391-22ff46e75008",
   "metadata": {},
   "source": [
    "## structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "ee60d5d5-3aab-4668-a3a0-4b05a7436fa5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T09:02:06.620773Z",
     "iopub.status.busy": "2024-06-05T09:02:06.619945Z",
     "iopub.status.idle": "2024-06-05T09:02:06.629287Z",
     "shell.execute_reply": "2024-06-05T09:02:06.627994Z",
     "shell.execute_reply.started": "2024-06-05T09:02:06.620707Z"
    }
   },
   "outputs": [],
   "source": [
    "from celltypist import models\n",
    "new_model = models.Model.load('/data1/usr/results/annotation/celltypist/ms1model_structure.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6b36bd32-8b21-4196-bc98-901cabcf7511",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T09:02:07.694838Z",
     "iopub.status.busy": "2024-06-05T09:02:07.693736Z",
     "iopub.status.idle": "2024-06-05T09:03:22.781078Z",
     "shell.execute_reply": "2024-06-05T09:03:22.780362Z",
     "shell.execute_reply.started": "2024-06-05T09:02:07.694776Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# dataname = datalist[0]\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "for dataname in datalist:\n",
    "    test = sc.read_h5ad(dataname)\n",
    "    dname = dataname.split('Sgeneration/')[-1]\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "    \n",
    "    normexp = (np.exp2(test.X)-1)/208*1e4\n",
    "    test1e4 = test.copy()\n",
    "    test1e4.X = np.log1p(normexp)\n",
    "    \n",
    "    predictions = celltypist.annotate(test1e4, model = new_model, majority_voting = False)\n",
    "    structuredf = pd.DataFrame(classification_report(test1e4.obs['structure'],predictions.predicted_labels.predicted_labels,output_dict=True)).T    \n",
    "    savepath = f'/data1/usr/results/annotation/celltypist/mouse2_structure/{dname[:-5]}.csv'\n",
    "    structuredf.to_csv(savepath)\n",
    "    np.save(f'/data1/usr/results/annotation/celltypist/mouse2_structure/{dname[:-5]}_pred.npy',np.array(predictions.predicted_labels.predicted_labels.values))\n",
    "    print(savepath)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8b369b0a-4775-4579-b406-debad46218cf",
   "metadata": {},
   "source": [
    "## division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "0f427458-bb0b-42cf-87b3-b38ea31c4567",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T02:12:24.525441Z",
     "iopub.status.busy": "2024-06-05T02:12:24.524669Z",
     "iopub.status.idle": "2024-06-05T02:12:24.532964Z",
     "shell.execute_reply": "2024-06-05T02:12:24.531440Z",
     "shell.execute_reply.started": "2024-06-05T02:12:24.525381Z"
    }
   },
   "outputs": [],
   "source": [
    "from celltypist import models\n",
    "new_model = models.Model.load('/data1/usr/results/annotation/celltypist/ms1model_division.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98a7c928-fe98-4d43-83d2-955ba6f28d77",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-05T08:59:16.275827Z",
     "iopub.status.busy": "2024-06-05T08:59:16.274982Z",
     "iopub.status.idle": "2024-06-05T09:00:21.756411Z",
     "shell.execute_reply": "2024-06-05T09:00:21.755272Z",
     "shell.execute_reply.started": "2024-06-05T08:59:16.275765Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "# dataname = datalist[0]\n",
    "ccfa2 = pd.read_csv('/data2/usr/Sgeneration/MERFISH/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "for dataname in datalist:\n",
    "    test = sc.read_h5ad(dataname)\n",
    "    dname = dataname.split('Sgeneration/')[-1]\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "    \n",
    "    normexp = (np.exp2(test.X)-1)/208*1e4\n",
    "    test1e4 = test.copy()\n",
    "    test1e4.X = np.log1p(normexp)\n",
    "    \n",
    "    predictions = celltypist.annotate(test1e4, model = new_model, majority_voting = False)\n",
    "    divisiondf = pd.DataFrame(classification_report(test1e4.obs['division'],predictions.predicted_labels.predicted_labels,output_dict=True)).T    \n",
    "    savepath = f'/data1/usr/results/annotation/celltypist/mouse2_division/{dname[:-5]}.csv'\n",
    "    divisiondf.to_csv(savepath)\n",
    "    np.save(f'/data1/usr/results/annotation/celltypist/mouse2_division/{dname[:-5]}_pred.npy',np.array(predictions.predicted_labels.predicted_labels.values))\n",
    "    print(savepath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f0cb779-c7ad-412b-af1c-58c044135507",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7e81abd-aef5-4868-85fc-ed304775c9d2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "niche",
   "language": "python",
   "name": "niche"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
