{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ceeb97d4-ed30-4bd5-850c-c6c8d4292b34",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:58:30.909453Z",
     "iopub.status.busy": "2024-09-15T05:58:30.908908Z",
     "iopub.status.idle": "2024-09-15T05:58:34.247517Z",
     "shell.execute_reply": "2024-09-15T05:58:34.245672Z",
     "shell.execute_reply.started": "2024-09-15T05:58:30.909411Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "%pylab inline\n",
    "import scanpy as sc\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "import scipy"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "86881c80-5e95-4f3e-ae7e-1a83d6f7c101",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fc7c805c-ac29-4f29-802c-bb8227f50f1b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:58:34.248992Z",
     "iopub.status.busy": "2024-09-15T05:58:34.248491Z",
     "iopub.status.idle": "2024-09-15T05:58:34.258067Z",
     "shell.execute_reply": "2024-09-15T05:58:34.256583Z",
     "shell.execute_reply.started": "2024-09-15T05:58:34.248974Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "datalist = ['/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "825f39fd-d426-4d0d-8e7b-ba5ad1527d7c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:41:38.787233Z",
     "iopub.status.busy": "2024-06-30T09:41:38.786510Z",
     "iopub.status.idle": "2024-06-30T09:45:20.682636Z",
     "shell.execute_reply": "2024-06-30T09:45:20.681897Z",
     "shell.execute_reply.started": "2024-06-30T09:41:38.787184Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "regiontable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "ccfa2 = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "\n",
    "allf1={}\n",
    "prefixpath = {\n",
    "    'scANVI':'/stor/usr/sgenetmp/results/annotation/scANVI/mouse2_structure/',\n",
    "    'celltypist':'/stor/usr/sgenetmp/results/annotation/celltypist/mouse2_structure/',\n",
    "    'model':'/stor/usr/sgenetmp/results/annotation/model/mouse2_structure/',\n",
    "    'modelmax':'/stor/usr/sgenetmp/results/annotation/model/mouse2_structure/'\n",
    "             }\n",
    "strlabel = np.load('/nfs/public/usr/MERFISH2023/Annotation/strstructure.npy')\n",
    "# dataname = datalist[0]\n",
    "for dataname in datalist:\n",
    "    dname = dataname.split('Sgeneration/')[-1][:-5]\n",
    "\n",
    "    adata = sc.read_h5ad(f'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/{dname}.h5ad')\n",
    "    \n",
    "    test = adata\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "    adata = test\n",
    "    allf1[dname]={}\n",
    "    allf1[dname]['gt']=adata.obs['structure']\n",
    "\n",
    "    k='model'\n",
    "    results = np.load(prefixpath[k]+dname+'_emb_meanpool.npy')\n",
    "    allf1[dname][k]=strlabel[results.argmax(1)]\n",
    "\n",
    "    k='modelmax'\n",
    "    results = np.load(prefixpath[k]+dname+'_emb_maxpool.npy')\n",
    "    allf1[dname][k]=strlabel[results.argmax(1)]\n",
    "    \n",
    "    k='celltypist'\n",
    "    results = np.load(prefixpath[k]+dname+'_pred.npy',allow_pickle=True)\n",
    "    allf1[dname][k]=results\n",
    "\n",
    "    k='scANVI'\n",
    "    results = pd.read_csv(prefixpath[k]+dname+'_pred.csv',index_col=0)\n",
    "    allf1[dname][k]=results.idxmax(1).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "12d8b6b9-70b0-454e-ab74-397f9ad66114",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:45:20.688737Z",
     "iopub.status.busy": "2024-06-30T09:45:20.688454Z",
     "iopub.status.idle": "2024-06-30T09:45:20.712197Z",
     "shell.execute_reply": "2024-06-30T09:45:20.711704Z",
     "shell.execute_reply.started": "2024-06-30T09:45:20.688717Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "c99348fd-1534-4942-ba7d-18cdb9de309b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:45:20.713518Z",
     "iopub.status.busy": "2024-06-30T09:45:20.713216Z",
     "iopub.status.idle": "2024-06-30T09:45:20.757024Z",
     "shell.execute_reply": "2024-06-30T09:45:20.756435Z",
     "shell.execute_reply.started": "2024-06-30T09:45:20.713499Z"
    }
   },
   "outputs": [],
   "source": [
    "import colorbm as cbm\n",
    "import seaborn as sns\n",
    "cpal = cbm.pal('npg').as_hex.copy()\n",
    "sns.set_palette(sns.color_palette(cpal))\n",
    "rcParams['pdf.fonttype'] = 42\n",
    "rcParams['ps.fonttype'] = 42\n",
    "rcParams['font.family'] = 'Arial'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "0dbe8974-cd86-4633-9d21-6dde4d668d25",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:47:18.264755Z",
     "iopub.status.busy": "2024-06-30T09:47:18.263481Z",
     "iopub.status.idle": "2024-06-30T09:47:36.895815Z",
     "shell.execute_reply": "2024-06-30T09:47:36.895075Z",
     "shell.execute_reply.started": "2024-06-30T09:47:18.264699Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=f1_score(allf1[dname]['gt'].values,allf1[dname][k],average='macro')\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "fc34f191-9d84-4352-8c20-4eeba406e0b0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:55:07.117223Z",
     "iopub.status.busy": "2024-06-30T09:55:07.116100Z",
     "iopub.status.idle": "2024-06-30T09:55:07.124303Z",
     "shell.execute_reply": "2024-06-30T09:55:07.123596Z",
     "shell.execute_reply.started": "2024-06-30T09:55:07.117175Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df.columns = ['scANVI','CellTypist','Ours_mean','Ours_max']\n",
    "f1df = f1df.loc[:,['Ours_mean','Ours_max','scANVI','CellTypist']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "155f0f27-9255-4ee2-9f2d-bff8c08d2935",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:55:08.318445Z",
     "iopub.status.busy": "2024-06-30T09:55:08.317509Z",
     "iopub.status.idle": "2024-06-30T09:55:08.325768Z",
     "shell.execute_reply": "2024-06-30T09:55:08.325002Z",
     "shell.execute_reply.started": "2024-06-30T09:55:08.318398Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df.to_csv('/stor/usr/sgenetmp/results/annotation/mouse2_region_marcof1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f2aba19-7033-4d91-b6cb-5e604528dce9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:59:25.401320Z",
     "iopub.status.busy": "2024-09-15T05:59:25.400752Z",
     "iopub.status.idle": "2024-09-15T05:59:27.295949Z",
     "shell.execute_reply": "2024-09-15T05:59:27.293895Z",
     "shell.execute_reply.started": "2024-09-15T05:59:25.401277Z"
    }
   },
   "outputs": [],
   "source": [
    "ls /stor/usr/sgenetmp/results/annotation/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "95e55fa1-ce29-43ce-bd4e-55eec5d0b01c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:59:36.055529Z",
     "iopub.status.busy": "2024-09-15T05:59:36.054922Z",
     "iopub.status.idle": "2024-09-15T05:59:36.305882Z",
     "shell.execute_reply": "2024-09-15T05:59:36.304029Z",
     "shell.execute_reply.started": "2024-09-15T05:59:36.055469Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df = pd.read_csv('/stor/usr/sgenetmp/results/annotation/mouse2_marcof1.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5f407fda-7554-4066-8082-9d2b3815356b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:59:47.515655Z",
     "iopub.status.busy": "2024-09-15T05:59:47.515140Z",
     "iopub.status.idle": "2024-09-15T05:59:47.530958Z",
     "shell.execute_reply": "2024-09-15T05:59:47.528903Z",
     "shell.execute_reply.started": "2024-09-15T05:59:47.515615Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df.mean(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2213bb3c-ca1c-4fa3-bbb4-ab80afb6aecb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:59:55.707698Z",
     "iopub.status.busy": "2024-09-15T05:59:55.707163Z",
     "iopub.status.idle": "2024-09-15T05:59:55.722244Z",
     "shell.execute_reply": "2024-09-15T05:59:55.720591Z",
     "shell.execute_reply.started": "2024-09-15T05:59:55.707655Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df.std(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61c3fc81-d5d7-4242-8eff-5bc06306df51",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:06:41.327457Z",
     "iopub.status.busy": "2024-06-30T12:06:41.327115Z",
     "iopub.status.idle": "2024-06-30T12:06:41.841891Z",
     "shell.execute_reply": "2024-06-30T12:06:41.841441Z",
     "shell.execute_reply.started": "2024-06-30T12:06:41.327434Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "figsize(6,4)\n",
    "pltdf = f1df.melt(ignore_index=False)\n",
    "pltdf['dataset']=pltdf.index\n",
    "sns.boxplot(data=pltdf,x='variable',y='value',hue='variable')\n",
    "sns.stripplot(data = pltdf,x='variable',y='value',size=5,jitter=True,hue='variable',edgecolor='black',linewidth=0.2)\n",
    "palette = {variable: \"gray\" for variable in pltdf['dataset'].unique()}\n",
    "sns.lineplot(data = pltdf,x='variable',y='value',hue='dataset',legend=False,palette=palette, alpha=0.2)\n",
    "plt.xlabel('')\n",
    "plt.ylabel('Macro F1',fontsize=18)\n",
    "plt.title('Region',fontsize=21)\n",
    "plt.xticks(fontsize=14,rotation=30);\n",
    "plt.yticks(fontsize=14);\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichepredict/Region_macrof1.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81d60a86-c4ca-490e-9809-1a46f50237bb",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    vcts = allf1[dname]['gt'].value_counts()\n",
    "    truecls = vcts[(vcts.values>5)].index\n",
    "    mask = allf1[dname]['gt'].isin(truecls).values\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=precision_score(allf1[dname]['gt'].values[mask],allf1[dname][k][mask],average='macro')\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a76fd6f6-0942-4b90-ac3c-0180b6efe9bc",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "pltdf = f1df.melt(ignore_index=False)\n",
    "pltdf['dataset']=pltdf.index\n",
    "sns.boxplot(data=pltdf,x='variable',y='value',hue='variable')\n",
    "sns.stripplot(data = pltdf,x='variable',y='value',size=5,jitter=True,hue='variable',edgecolor='black',linewidth=0.2)\n",
    "palette = {variable: \"gray\" for variable in pltdf['dataset'].unique()}\n",
    "sns.lineplot(data = pltdf,x='variable',y='value',hue='dataset',legend=False,palette=palette, alpha=0.2)\n",
    "plt.xlabel('Methods')\n",
    "plt.ylabel('Precision')\n",
    "# plt.ylim(0,1)\n",
    "plt.title('MERFISH Mouse2 structure')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "2bdf7ecc-0dfb-4298-9933-e65cd7831517",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:50:33.523755Z",
     "iopub.status.busy": "2024-06-30T09:50:33.523561Z",
     "iopub.status.idle": "2024-06-30T09:50:36.067217Z",
     "shell.execute_reply": "2024-06-30T09:50:36.066739Z",
     "shell.execute_reply.started": "2024-06-30T09:50:33.523741Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dname = list(allf1.keys())[30]\n",
    "adata = sc.read_h5ad(f'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/{dname}.h5ad')\n",
    "test = adata\n",
    "#filter\n",
    "setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "if len(setidx)==0:\n",
    "    print(dname)\n",
    "test = test[setidx].copy()\n",
    "test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "test = test[test.obs['parcellation_index'] !=0]\n",
    "test = test[test.obs['parcellation_index'] !=987]\n",
    "query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "adata = test\n",
    "\n",
    "uniquek = []\n",
    "for k in allf1[dname].keys():\n",
    "    adata.obs[k]=allf1[dname][k]\n",
    "    uniquek.append(np.unique(allf1[dname][k]))\n",
    "uniquek = np.unique(np.concatenate(uniquek))\n",
    "category_color = dict(regionanno[['category','category_color']].drop_duplicates().values)\n",
    "division_color = dict(regionanno[['division','division_color']].drop_duplicates().values)\n",
    "structure_color = dict(regionanno[['structure','structure_color']].drop_duplicates().values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cfd54bc2-d7d0-4046-a955-659234d54fea",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-08T09:17:47.491170Z",
     "iopub.status.busy": "2024-06-08T09:17:47.490533Z",
     "iopub.status.idle": "2024-06-08T09:17:57.352470Z",
     "shell.execute_reply": "2024-06-08T09:17:57.351490Z",
     "shell.execute_reply.started": "2024-06-08T09:17:47.491121Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import matplotlib.lines as mlines\n",
    "# Plot\n",
    "fig, axes = plt.subplots(2, 2, figsize=(8, 8))  # Increased figure size for clarity\n",
    "\n",
    "# Plotting spatial data\n",
    "sc.pl.spatial(adata, color='gt', palette=structure_color, spot_size=0.03, ax=axes[0, 0], show=False, legend_loc=None,title=f'GroundTruth')\n",
    "sc.pl.spatial(adata, color='modelmax', palette=structure_color, spot_size=0.03, ax=axes[0, 1], show=False, legend_loc=None,title=f\"Ours {f1df.loc[dname]['modelmax']:.2f}\")\n",
    "sc.pl.spatial(adata, color='celltypist', palette=structure_color, spot_size=0.03, ax=axes[1, 0], show=False, legend_loc=None,title=f\"CellTypist {f1df.loc[dname]['celltypist']:.2f}\")\n",
    "sc.pl.spatial(adata, color='scANVI', palette=structure_color, spot_size=0.03, ax=axes[1, 1], show=False, legend_loc=None,title=f\"scANVI {f1df.loc[dname]['scANVI']:.2f}\")\n",
    "\n",
    "# Manually create legend with circle markers\n",
    "handles = [mlines.Line2D([], [], color=structure_color[key], marker='o', linestyle='None', markersize=5, label=key) \n",
    "           for key in uniquek.tolist()]\n",
    "\n",
    "# Adjust the layout and display the legend with multiple columns\n",
    "fig.legend(handles=handles, labels=[h.get_label() for h in handles], loc='center left', bbox_to_anchor=(1, 0.5), title='Legend', ncol=5)\n",
    "plt.subplots_adjust(right=1)  # Adjust right margin to make space for legend\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4339e05b-f08c-4b34-a52d-da864134e734",
   "metadata": {},
   "source": [
    "# Division"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "845bb100-c101-4472-b73d-52491e13ad54",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:07:12.472757Z",
     "iopub.status.busy": "2024-06-30T12:07:12.471350Z",
     "iopub.status.idle": "2024-06-30T12:07:12.483825Z",
     "shell.execute_reply": "2024-06-30T12:07:12.483202Z",
     "shell.execute_reply.started": "2024-06-30T12:07:12.472706Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "datalist = ['/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.001.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.002.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.003.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.004.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.005.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.006.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.007.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.008.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.009.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.010.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.011.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.012.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.013.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.014.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.015.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.016.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.017.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.018.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.019.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.020.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.021.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.022.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.023.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.025.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.026.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.027.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.028.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.030.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.031.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.032.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.033.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.034.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.035.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.036.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.037.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.039.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.040.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.041.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.042.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.044.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.045.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.046.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.047.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.048.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.049.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.050.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.051.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.052.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.053.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.054.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.055.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.056.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.057.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.058.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.059.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.060.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.061.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.062.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.063.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.065.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.066.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.067.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.070.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.071.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.072.h5ad',\n",
    "'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/Zhuang-ABCA-2.073.h5ad']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f19b3a8-775b-42a1-a6bf-9e7b4992373d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:34:51.823531Z",
     "iopub.status.busy": "2024-06-30T12:34:51.822971Z",
     "iopub.status.idle": "2024-06-30T12:36:12.199899Z",
     "shell.execute_reply": "2024-06-30T12:36:12.198901Z",
     "shell.execute_reply.started": "2024-06-30T12:34:51.823484Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "regiontable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "ccfa2 = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/A2ccf_coordinates.csv',index_col=0)\n",
    "\n",
    "allf1={}\n",
    "prefixpath = {\n",
    "    'scANVI':'/stor/usr/sgenetmp/results/annotation/scANVI/mouse2_division/',\n",
    "    'celltypist':'/stor/usr/sgenetmp/results/annotation/celltypist/mouse2_division/',\n",
    "    'model':'/stor/usr/sgenetmp/results/annotation/model/mouse2_division/',\n",
    "    'modelmax':'/stor/usr/sgenetmp/results/annotation/model/mouse2_division/'\n",
    "             }\n",
    "\n",
    "strlabel = np.load('/nfs/public/usr/MERFISH2023/Annotation/strdivision.npy')\n",
    "# dataname = datalist[0]\n",
    "for dataname in datalist:\n",
    "    dname = dataname.split('Sgeneration/')[-1][:-5]\n",
    "    adata = sc.read_h5ad(f'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/{dname}.h5ad')\n",
    "    \n",
    "    test = adata\n",
    "    #filter\n",
    "    setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "    if len(setidx)==0:\n",
    "        print(dname)\n",
    "        continue\n",
    "    test = test[setidx].copy()\n",
    "    test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "    test = test[test.obs['parcellation_index'] !=0]\n",
    "    test = test[test.obs['parcellation_index'] !=987]\n",
    "    query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "    query.index = test.obs.index\n",
    "    test.obs = pd.concat([test.obs, query],axis=1)\n",
    "    adata = test\n",
    "    allf1[dname]={}\n",
    "    allf1[dname]['gt']=adata.obs['division']\n",
    "\n",
    "    k='model'\n",
    "    results = np.load(prefixpath[k]+dname+'_emb_meanpool.npy')\n",
    "    allf1[dname][k]=strlabel[results.argmax(1)]\n",
    "\n",
    "    k='modelmax'\n",
    "    results = np.load(prefixpath[k]+dname+'_emb_maxpool.npy')\n",
    "    allf1[dname][k]=strlabel[results.argmax(1)]\n",
    "    \n",
    "    k='celltypist'\n",
    "    results = np.load(prefixpath[k]+dname+'_pred.npy',allow_pickle=True)\n",
    "    allf1[dname][k]=results\n",
    "\n",
    "    k='scANVI'\n",
    "    results = pd.read_csv(prefixpath[k]+dname+'_pred.csv',index_col=0)\n",
    "    allf1[dname][k]=results.idxmax(1).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "1e7a74cd-2f54-44af-8be1-dbe08861fcbb",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:37:40.167448Z",
     "iopub.status.busy": "2024-06-30T12:37:40.166848Z",
     "iopub.status.idle": "2024-06-30T12:37:40.176139Z",
     "shell.execute_reply": "2024-06-30T12:37:40.174793Z",
     "shell.execute_reply.started": "2024-06-30T12:37:40.167392Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "68e3f6bc-b734-41ae-94a2-90dde70c2bbf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:37:41.700007Z",
     "iopub.status.busy": "2024-06-30T12:37:41.699441Z",
     "iopub.status.idle": "2024-06-30T12:37:41.704117Z",
     "shell.execute_reply": "2024-06-30T12:37:41.703603Z",
     "shell.execute_reply.started": "2024-06-30T12:37:41.699985Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import colorbm as cbm\n",
    "import seaborn as sns\n",
    "cpal = cbm.pal('npg').as_hex.copy()\n",
    "sns.set_palette(sns.color_palette(cpal))\n",
    "rcParams['pdf.fonttype'] = 42\n",
    "rcParams['ps.fonttype'] = 42\n",
    "rcParams['font.family'] = 'Arial'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "294dc6ef-ba75-481c-8ab5-c39880a6e4c1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:37:43.865302Z",
     "iopub.status.busy": "2024-06-30T12:37:43.864749Z",
     "iopub.status.idle": "2024-06-30T12:37:59.288651Z",
     "shell.execute_reply": "2024-06-30T12:37:59.287874Z",
     "shell.execute_reply.started": "2024-06-30T12:37:43.865255Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=f1_score(allf1[dname]['gt'].values,allf1[dname][k],average='macro')\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "aeb12ba2-032c-42e2-9084-05e1787b1bc8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:37:59.289641Z",
     "iopub.status.busy": "2024-06-30T12:37:59.289517Z",
     "iopub.status.idle": "2024-06-30T12:37:59.293994Z",
     "shell.execute_reply": "2024-06-30T12:37:59.293321Z",
     "shell.execute_reply.started": "2024-06-30T12:37:59.289630Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df.columns = ['scANVI','CellTypist','Ours_mean','Ours_max']\n",
    "f1df = f1df.loc[:,['Ours_mean','Ours_max','scANVI','CellTypist']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "faf9e528-00ed-4c60-a83d-e526bfe96d1b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:37:59.294981Z",
     "iopub.status.busy": "2024-06-30T12:37:59.294798Z",
     "iopub.status.idle": "2024-06-30T12:37:59.324272Z",
     "shell.execute_reply": "2024-06-30T12:37:59.323358Z",
     "shell.execute_reply.started": "2024-06-30T12:37:59.294964Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df.to_csv('/stor/usr/sgenetmp/results/annotation/mouse2_division_marcof1.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8a5e8a68-ba3f-4c8a-94a5-e746822f3837",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T06:02:27.345641Z",
     "iopub.status.busy": "2024-09-15T06:02:27.345067Z",
     "iopub.status.idle": "2024-09-15T06:02:27.711584Z",
     "shell.execute_reply": "2024-09-15T06:02:27.709825Z",
     "shell.execute_reply.started": "2024-09-15T06:02:27.345601Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df = pd.read_csv('/stor/usr/sgenetmp/results/annotation/mouse2_division_marcof1.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "53912916-a803-4f19-a66d-35cf0180e88b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T06:02:31.878028Z",
     "iopub.status.busy": "2024-09-15T06:02:31.877477Z",
     "iopub.status.idle": "2024-09-15T06:02:31.892283Z",
     "shell.execute_reply": "2024-09-15T06:02:31.890337Z",
     "shell.execute_reply.started": "2024-09-15T06:02:31.877988Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df.mean(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02181479-b944-4d5c-a99c-2294db52ec64",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T06:02:36.126526Z",
     "iopub.status.busy": "2024-09-15T06:02:36.126015Z",
     "iopub.status.idle": "2024-09-15T06:02:36.140934Z",
     "shell.execute_reply": "2024-09-15T06:02:36.139156Z",
     "shell.execute_reply.started": "2024-09-15T06:02:36.126487Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df.std(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "927ee625-e49d-4569-84cf-29c94a06b718",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:38:23.605925Z",
     "iopub.status.busy": "2024-06-30T12:38:23.604903Z",
     "iopub.status.idle": "2024-06-30T12:38:24.201898Z",
     "shell.execute_reply": "2024-06-30T12:38:24.201138Z",
     "shell.execute_reply.started": "2024-06-30T12:38:23.605873Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "figsize(6,4)\n",
    "pltdf = f1df.melt(ignore_index=False)\n",
    "pltdf['dataset']=pltdf.index\n",
    "sns.boxplot(data=pltdf,x='variable',y='value',hue='variable')\n",
    "sns.stripplot(data = pltdf,x='variable',y='value',size=5,jitter=True,hue='variable',edgecolor='black',linewidth=0.2)\n",
    "palette = {variable: \"gray\" for variable in pltdf['dataset'].unique()}\n",
    "sns.lineplot(data = pltdf,x='variable',y='value',hue='dataset',legend=False,palette=palette, alpha=0.2)\n",
    "plt.xlabel('')\n",
    "plt.ylabel('Macro F1',fontsize=18)\n",
    "plt.title('Division',fontsize=21)\n",
    "plt.xticks(fontsize=14,rotation=30);\n",
    "plt.yticks(fontsize=14);\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichepredict/division_macrof1.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b261364f-21cb-41e0-ad59-7e0c828c4acd",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    vcts = allf1[dname]['gt'].value_counts()\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=precision_score(allf1[dname]['gt'].values,allf1[dname][k],average='weighted')\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d3a8ead2-b985-4ab8-a0ff-302f9ed931a0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "pltdf = f1df.melt(ignore_index=False)\n",
    "pltdf['dataset']=pltdf.index\n",
    "sns.boxplot(data=pltdf,x='variable',y='value',hue='variable')\n",
    "sns.stripplot(data = pltdf,x='variable',y='value',size=5,jitter=True,hue='variable',edgecolor='black',linewidth=0.2)\n",
    "palette = {variable: \"gray\" for variable in pltdf['dataset'].unique()}\n",
    "sns.lineplot(data = pltdf,x='variable',y='value',hue='dataset',legend=False,palette=palette, alpha=0.2)\n",
    "plt.xlabel('Methods')\n",
    "plt.ylabel('Precision')\n",
    "# plt.ylim(0,1)\n",
    "plt.title('MERFISH Mouse2 division')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1dde5a1b-b111-44c7-9b50-b5c71049ff61",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    vcts = allf1[dname]['gt'].value_counts()\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=recall_score(allf1[dname]['gt'].values,allf1[dname][k],average='weighted')\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a57b433-d05d-4442-8165-9200a98d315d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "pltdf = f1df.melt(ignore_index=False)\n",
    "pltdf['dataset']=pltdf.index\n",
    "sns.boxplot(data=pltdf,x='variable',y='value',hue='variable')\n",
    "sns.stripplot(data = pltdf,x='variable',y='value',size=5,jitter=True,hue='variable',edgecolor='black',linewidth=0.2)\n",
    "palette = {variable: \"gray\" for variable in pltdf['dataset'].unique()}\n",
    "sns.lineplot(data = pltdf,x='variable',y='value',hue='dataset',legend=False,palette=palette, alpha=0.2)\n",
    "plt.xlabel('Methods')\n",
    "plt.ylabel('Recall')\n",
    "# plt.ylim(0,1)\n",
    "plt.title('MERFISH Mouse2 division')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "51c0710f-7dd6-47aa-938f-9fee6432af58",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-09T02:13:35.779178Z",
     "iopub.status.busy": "2024-06-09T02:13:35.778460Z",
     "iopub.status.idle": "2024-06-09T02:13:51.632000Z",
     "shell.execute_reply": "2024-06-09T02:13:51.630969Z",
     "shell.execute_reply.started": "2024-06-09T02:13:35.779125Z"
    }
   },
   "outputs": [],
   "source": [
    "f1df = {}\n",
    "for dname in allf1.keys():\n",
    "    f1df[dname]={}\n",
    "    vcts = allf1[dname]['gt'].value_counts()\n",
    "    for k in prefixpath.keys():\n",
    "        f1df[dname][k]=f1_score(allf1[dname]['gt'].values,allf1[dname][k],average='weighted',labels=vcts.index)\n",
    "f1df = pd.DataFrame(f1df).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "id": "8757e868-6a59-433d-929a-5f9afc7267a2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:42:03.003863Z",
     "iopub.status.busy": "2024-06-30T12:42:03.003305Z",
     "iopub.status.idle": "2024-06-30T12:42:04.828760Z",
     "shell.execute_reply": "2024-06-30T12:42:04.827233Z",
     "shell.execute_reply.started": "2024-06-30T12:42:03.003818Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dname = list(allf1.keys())[30]\n",
    "adata = sc.read_h5ad(f'/nfs/public/usr/MERFISH2023/Zhuang-ABCA-2/processed/Sgeneration/{dname}.h5ad')\n",
    "test = adata\n",
    "#filter\n",
    "setidx = test.obs.index[test.obs.index.isin(ccfa2.index)]\n",
    "if len(setidx)==0:\n",
    "    print(dname)\n",
    "test = test[setidx].copy()\n",
    "test.obs['parcellation_index']=ccfa2.loc[test.obs.index,'parcellation_index']\n",
    "test = test[test.obs['parcellation_index'] !=0]\n",
    "test = test[test.obs['parcellation_index'] !=987]\n",
    "query = regionanno.loc[test.obs.parcellation_index.values,:]\n",
    "query.index = test.obs.index\n",
    "test.obs = pd.concat([test.obs, query],axis=1)\n",
    "adata = test\n",
    "\n",
    "uniquek = []\n",
    "for k in allf1[dname].keys():\n",
    "    adata.obs[k]=allf1[dname][k]\n",
    "    uniquek.append(np.unique(allf1[dname][k]))\n",
    "uniquek = np.unique(np.concatenate(uniquek))\n",
    "division_color = dict(regionanno[['division','division_color']].drop_duplicates().values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "902b836e-0799-4131-83ef-036fd7a59229",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:45:27.555858Z",
     "iopub.status.busy": "2024-06-30T12:45:27.554651Z",
     "iopub.status.idle": "2024-06-30T12:45:27.576087Z",
     "shell.execute_reply": "2024-06-30T12:45:27.575117Z",
     "shell.execute_reply.started": "2024-06-30T12:45:27.555809Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "uniquek = np.unique(allf1[dname]['gt'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9e6e554-3cf6-470d-b3fa-29dfd9408dc1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T12:58:48.219441Z",
     "iopub.status.busy": "2024-06-30T12:58:48.218503Z",
     "iopub.status.idle": "2024-06-30T12:58:48.226030Z",
     "shell.execute_reply": "2024-06-30T12:58:48.225287Z",
     "shell.execute_reply.started": "2024-06-30T12:58:48.219392Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "dname"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50a9a28b-2ee4-41d6-b307-3c4578be6f1f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T13:04:43.783539Z",
     "iopub.status.busy": "2024-06-30T13:04:43.782978Z",
     "iopub.status.idle": "2024-06-30T13:04:51.184953Z",
     "shell.execute_reply": "2024-06-30T13:04:51.183849Z",
     "shell.execute_reply.started": "2024-06-30T13:04:43.783489Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import matplotlib.lines as mlines\n",
    "# Plot\n",
    "fig, axes = plt.subplots(1, 4, figsize=(15, 4))  # Increased figure size for clarity\n",
    "\n",
    "# Plotting spatial data\n",
    "sc.pl.spatial(adata, color='gt', palette=division_color, spot_size=0.03, ax=axes[0], show=False, legend_loc=None,title=f'Ground Truth')\n",
    "sc.pl.spatial(adata, color='modelmax', palette=division_color, spot_size=0.03, ax=axes[1], show=False, legend_loc=None,title=f\"Ours_max ({f1df.loc[dname]['Ours_max']:.2f})\")\n",
    "sc.pl.spatial(adata, color='celltypist', palette=division_color, spot_size=0.03, ax=axes[2], show=False, legend_loc=None,title=f\"CellTypist ({f1df.loc[dname]['CellTypist']:.2f})\")\n",
    "sc.pl.spatial(adata, color='scANVI', palette=division_color, spot_size=0.03, ax=axes[3], show=False, legend_loc=None,title=f\"scANVI ({f1df.loc[dname]['scANVI']:.2f})\")\n",
    "for ax in axes:\n",
    "    ax.set_xlabel('')  # Remove x-axis label\n",
    "    ax.set_ylabel('')  # Remove y-axis label\n",
    "# Manually create legend with circle markers\n",
    "handles = [mlines.Line2D([], [], color=division_color[key], marker='o', linestyle='None', markersize=5, label=key) \n",
    "           for key in uniquek.tolist()]\n",
    "\n",
    "# Adjust the layout and display the legend with multiple columns\n",
    "fig.legend(handles=handles, labels=[h.get_label() for h in handles], loc='lower center', bbox_to_anchor=(0.5, -0.1), title='Legend', ncol=1)\n",
    "plt.tight_layout()\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichepredict/showcase36.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f92b6f70-5704-4dee-bc85-ae62f8f3191b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-07-01T03:38:14.926284Z",
     "iopub.status.busy": "2024-07-01T03:38:14.925186Z",
     "iopub.status.idle": "2024-07-01T03:38:17.655949Z",
     "shell.execute_reply": "2024-07-01T03:38:17.655393Z",
     "shell.execute_reply.started": "2024-07-01T03:38:14.926213Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "sc.pl.spatial(adata, color='gt', palette=division_color, spot_size=0.03, groups=['Cortical subplate','Olfactory areas'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1aeb4f11-c956-4154-bc65-163a1b134077",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "bgi",
   "language": "python",
   "name": "bgi"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
