{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b9a58d4b-82a6-4510-847d-199439c1aaf1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T03:59:45.791355Z",
     "iopub.status.busy": "2024-09-15T03:59:45.790871Z",
     "iopub.status.idle": "2024-09-15T03:59:45.840744Z",
     "shell.execute_reply": "2024-09-15T03:59:45.839266Z",
     "shell.execute_reply.started": "2024-09-15T03:59:45.791315Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05d6bc33-bc01-4afe-aca1-8d333a84f2b5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T03:59:46.198192Z",
     "iopub.status.busy": "2024-09-15T03:59:46.197654Z",
     "iopub.status.idle": "2024-09-15T03:59:49.476120Z",
     "shell.execute_reply": "2024-09-15T03:59:49.474225Z",
     "shell.execute_reply.started": "2024-09-15T03:59:46.198148Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
    "%pylab inline\n",
    "import scanpy as sc\n",
    "import pandas as pd\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "385808e8-1d54-41f0-9b58-15380ff50ec2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T03:59:49.735875Z",
     "iopub.status.busy": "2024-09-15T03:59:49.734511Z",
     "iopub.status.idle": "2024-09-15T03:59:49.779110Z",
     "shell.execute_reply": "2024-09-15T03:59:49.777139Z",
     "shell.execute_reply.started": "2024-09-15T03:59:49.735826Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def cluster_k_leiden(embadata,n_cluster,max_steps=50,this_min=0,this_max=10):\n",
    "    this_step = 0\n",
    "    print('reference cluster number',n_cluster)\n",
    "    while this_step < max_steps:\n",
    "        this_resolution = this_min + ((this_max-this_min)/2)\n",
    "        sc.tl.leiden(embadata,resolution=this_resolution,random_state=42)\n",
    "        this_clusters = embadata.obs['leiden'].nunique()\n",
    "        if this_clusters > n_cluster:\n",
    "            this_max = this_resolution\n",
    "        elif this_clusters < n_cluster:\n",
    "            this_min = this_resolution\n",
    "        else:break\n",
    "        this_step+=1\n",
    "    if this_step==max_steps:\n",
    "        print('Cannot find the number of clusters')\n",
    "        print('Use resolution',this_resolution)\n",
    "    else:\n",
    "        print('use resolution',this_resolution)\n",
    "     # leiden\n",
    "    sc.tl.leiden(embadata,resolution=this_resolution,random_state=42,key_added=f'cluster_{n_cluster}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "b59bb102-4a69-48fe-9635-7118d493d32f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T03:59:51.354326Z",
     "iopub.status.busy": "2024-09-15T03:59:51.353731Z",
     "iopub.status.idle": "2024-09-15T03:59:54.142109Z",
     "shell.execute_reply": "2024-09-15T03:59:54.140560Z",
     "shell.execute_reply.started": "2024-09-15T03:59:51.354279Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "annotationtable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/cluster_to_cluster_annotation_membership_pivoted.csv')\n",
    "annotationtable = annotationtable.set_index('cluster_alias')\n",
    "annotationcolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/cluster_to_cluster_annotation_membership_color.csv')\n",
    "annotationcolor = annotationcolor.set_index('cluster_alias')\n",
    "annotation = pd.concat([annotationtable,annotationcolor],axis=1)\n",
    "regiontable = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_name.csv',index_col=0)\n",
    "regioncolor = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/parcellation_to_parcellation_term_membership_color.csv',index_col=0)\n",
    "regionanno = pd.concat([regiontable,regioncolor],axis=1)\n",
    "ccfv1 = pd.read_csv('/nfs/public/usr/MERFISH2023/Annotation/A2ccf_coordinates.csv',index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6eb54ae-bc8c-4fd0-b878-dd1758ee711e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T03:59:54.221836Z",
     "iopub.status.busy": "2024-09-15T03:59:54.221180Z",
     "iopub.status.idle": "2024-09-15T03:59:55.174155Z",
     "shell.execute_reply": "2024-09-15T03:59:55.172437Z",
     "shell.execute_reply.started": "2024-09-15T03:59:54.221756Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "files = sorted(os.listdir('/stor/usr/sgenetmp/results/embedding/mouse2/'))\n",
    "dnamelist = [x.split('_')[1][:-5] for x in files if x.startswith('raw')]\n",
    "print(dnamelist)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bc471ef8-1610-415a-acb5-b1fcb98c723f",
   "metadata": {
    "tags": []
   },
   "source": [
    "# Metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15cb9a42-ff01-4be4-9dd0-9723c5d89f12",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_AMI_mouse2_0625.csv',index_col=0)\n",
    "resultdfari = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_ARI_mouse2_0625.csv',index_col=0)\n",
    "resultdf = resultdf.drop_duplicates()\n",
    "resultdfari = resultdfari.drop_duplicates()\n",
    "results = {}\n",
    "resultsari = {}\n",
    "modelnames = {'Raw':'raw','Model':'adataemb','ModelFt':'adataemb_finetune',\n",
    "              'GraphST':'graphstemb','NicheCompass':'nichecompass','STAGATE':'stagater005'}\n",
    "from sklearn.metrics.cluster import adjusted_mutual_info_score, adjusted_rand_score\n",
    "for datasetname in dnamelist:\n",
    "    if datasetname in results.keys():\n",
    "        print(f'{datasetname} in results')\n",
    "        continue\n",
    "    print(datasetname)\n",
    "    alladata = {}\n",
    "    rawadata= sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/raw_{datasetname}.h5ad')\n",
    "    region_num = len(rawadata.obs['region'].unique())\n",
    "    division_num = len(rawadata.obs['division'].unique())\n",
    "    \n",
    "    for mname in modelnames.keys():\n",
    "        if modelnames[mname].startswith('adataemb'): \n",
    "            regionlist = [0.1,0.2,0.3]\n",
    "            if mname=='ModelFt':\n",
    "                regionlist +=[0.4,0.45]\n",
    "            for region in regionlist:\n",
    "                if (datasetname in resultdf.columns) and (f'division_{mname}_R{region}' in resultdf.index):\n",
    "                    if not pd.isna(resultdf.loc[f'division_{mname}_R{region}',datasetname]):\n",
    "                        print(f'{datasetname} & division_{mname}_R{region} in the result')\n",
    "                        continue\n",
    "                modeladata = sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{region}_{datasetname}.h5ad')\n",
    "                alladata[f'{mname}_R{region}']=modeladata\n",
    "        elif modelnames[mname].startswith('mean'): \n",
    "            regionlist = [0.1,0.2,0.3]\n",
    "            for region in regionlist:\n",
    "                if (datasetname in resultdf.columns) and (f'division_{mname}_R{region}' in resultdf.index):\n",
    "                    if not pd.isna(resultdf.loc[f'division_{mname}_R{region}',datasetname]):\n",
    "                        print(f'{datasetname} & division_{mname}_R{region} in the result')\n",
    "                        continue\n",
    "                modeladata = sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{region}_{datasetname}.h5ad')\n",
    "                alladata[f'{mname}_R{region}']=modeladata                \n",
    "        else:\n",
    "            if (datasetname in resultdf.columns) and (f'division_{mname}' in resultdf.index):\n",
    "                if not pd.isna(resultdf.loc[f'division_{mname}',datasetname]):\n",
    "                    print(f'{datasetname} & division_{mname} in the result')\n",
    "                    continue\n",
    "            alladata[mname]= sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{datasetname}.h5ad')\n",
    "                \n",
    "    results[datasetname]={}\n",
    "    resultsari[datasetname]={}\n",
    "    for k in alladata.keys():\n",
    "        if not f'cluster_{region_num}' in alladata[k].obs:\n",
    "            sc.pp.pca(alladata[k])\n",
    "            sc.pp.neighbors(alladata[k])\n",
    "            cluster_k_leiden(alladata[k],region_num)\n",
    "            cluster_k_leiden(alladata[k],division_num)\n",
    "        ami1 = adjusted_mutual_info_score(rawadata.obs['region'].values.tolist(),alladata[k].obs[f'cluster_{region_num}'].values.tolist())\n",
    "        ami2 = adjusted_mutual_info_score(rawadata.obs['division'].values.tolist(),alladata[k].obs[f'cluster_{division_num}'].values.tolist())\n",
    "        results[datasetname][f'region_{k}'] = ami1\n",
    "        results[datasetname][f'division_{k}'] = ami2\n",
    "        \n",
    "        ari1 = adjusted_rand_score(rawadata.obs['region'].values.tolist(),alladata[k].obs[f'cluster_{region_num}'].values.tolist())\n",
    "        ari2 = adjusted_rand_score(rawadata.obs['division'].values.tolist(),alladata[k].obs[f'cluster_{division_num}'].values.tolist())\n",
    "        resultsari[datasetname][f'region_{k}'] = ari1\n",
    "        resultsari[datasetname][f'division_{k}'] = ari2\n",
    "        print(k,ami1,ami2,ari1,ari2)\n",
    "newresultdf = pd.concat([resultdf,pd.DataFrame(results)])\n",
    "newresultdfari = pd.concat([resultdfari,pd.DataFrame(resultsari)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "df95cbfc-9fda-426a-81f3-45ef603b8334",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:00:42.289030Z",
     "iopub.status.busy": "2024-09-15T04:00:42.288454Z",
     "iopub.status.idle": "2024-09-15T04:00:43.258809Z",
     "shell.execute_reply": "2024-09-15T04:00:43.257236Z",
     "shell.execute_reply.started": "2024-09-15T04:00:42.288979Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import colorbm as cbm\n",
    "import seaborn as sns\n",
    "cpal = cbm.pal('npg').as_hex.copy()\n",
    "sns.set_palette(sns.color_palette(cpal))\n",
    "rcParams['pdf.fonttype'] = 42\n",
    "rcParams['ps.fonttype'] = 42\n",
    "rcParams['font.family'] = 'Arial'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e4eaac33-00b9-442c-a695-65926aff5c42",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:00:46.112209Z",
     "iopub.status.busy": "2024-09-15T04:00:46.111626Z",
     "iopub.status.idle": "2024-09-15T04:00:46.886071Z",
     "shell.execute_reply": "2024-09-15T04:00:46.884198Z",
     "shell.execute_reply.started": "2024-09-15T04:00:46.112160Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_AMI_mouse2_0625.csv',index_col=0)\n",
    "pltdf = resultdf.melt(ignore_index=False)\n",
    "pltdf['model']=pltdf.index.str.split('_').str[1:].str.join('_')\n",
    "pltdf['class']=pltdf.index.str.split('_').str[0]\n",
    "pltdf.index = np.arange(pltdf.shape[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f982c1b-dfcf-4a56-a82a-cd47861db03e",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Comparison"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c5df0f91-0b6d-4152-bc42-6ac207852ba8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:00:55.715991Z",
     "iopub.status.busy": "2024-09-15T04:00:55.715462Z",
     "iopub.status.idle": "2024-09-15T04:00:55.763098Z",
     "shell.execute_reply": "2024-09-15T04:00:55.761082Z",
     "shell.execute_reply.started": "2024-09-15T04:00:55.715948Z"
    }
   },
   "outputs": [],
   "source": [
    "rcParams['font.family'] = 'Arial'\n",
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "319597ba-f871-41c9-8ebe-6d2b1f1972e1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:01:34.217041Z",
     "iopub.status.busy": "2024-09-15T04:01:34.216495Z",
     "iopub.status.idle": "2024-09-15T04:01:34.265126Z",
     "shell.execute_reply": "2024-09-15T04:01:34.263309Z",
     "shell.execute_reply.started": "2024-09-15T04:01:34.217001Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f09ace9c-e550-4218-bfd4-b6d57ba2aecc",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:01:39.614107Z",
     "iopub.status.busy": "2024-09-15T04:01:39.613568Z",
     "iopub.status.idle": "2024-09-15T04:01:39.660557Z",
     "shell.execute_reply": "2024-09-15T04:01:39.658444Z",
     "shell.execute_reply.started": "2024-09-15T04:01:39.614064Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "140744c5-02dc-469e-acd0-ab1c2ccf603c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:32:47.180226Z",
     "iopub.status.busy": "2024-06-30T09:32:47.180096Z",
     "iopub.status.idle": "2024-06-30T09:32:49.316418Z",
     "shell.execute_reply": "2024-06-30T09:32:49.315762Z",
     "shell.execute_reply.started": "2024-06-30T09:32:47.180213Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "rcParams['font.family'] = 'Arial'\n",
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')\n",
    "\n",
    "figsize(6,4)\n",
    "modelorder = ['Ours_Finetune','Ours' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Region',fontsize=15);\n",
    "plt.ylabel('AMI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_region.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "fbd90907-421a-4092-be0b-49955cb58587",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:23:54.092246Z",
     "iopub.status.busy": "2024-09-15T04:23:54.091679Z",
     "iopub.status.idle": "2024-09-15T04:23:54.146528Z",
     "shell.execute_reply": "2024-09-15T04:23:54.144525Z",
     "shell.execute_reply.started": "2024-09-15T04:23:54.092206Z"
    }
   },
   "outputs": [],
   "source": [
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7780e75-a10a-45de-8431-39f5d774b0b6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:24:04.319059Z",
     "iopub.status.busy": "2024-09-15T04:24:04.318517Z",
     "iopub.status.idle": "2024-09-15T04:24:04.366745Z",
     "shell.execute_reply": "2024-09-15T04:24:04.365075Z",
     "shell.execute_reply.started": "2024-09-15T04:24:04.319019Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a81a7b88-2422-4b29-808a-7c8154391485",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T04:24:12.682900Z",
     "iopub.status.busy": "2024-09-15T04:24:12.682393Z",
     "iopub.status.idle": "2024-09-15T04:24:12.728629Z",
     "shell.execute_reply": "2024-09-15T04:24:12.726782Z",
     "shell.execute_reply.started": "2024-09-15T04:24:12.682862Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df71e9aa-0ffd-40f0-9c85-665026c67f9d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:32:49.317439Z",
     "iopub.status.busy": "2024-06-30T09:32:49.317088Z",
     "iopub.status.idle": "2024-06-30T09:32:49.897675Z",
     "shell.execute_reply": "2024-06-30T09:32:49.897006Z",
     "shell.execute_reply.started": "2024-06-30T09:32:49.317424Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')\n",
    "\n",
    "modelorder = ['Ours_Finetune','Ours' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Division',fontsize=15);\n",
    "plt.ylabel('AMI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12);\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_division.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "id": "b331b348-b3f7-4b6c-8bff-e8289a7f1469",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:32:49.899048Z",
     "iopub.status.busy": "2024-06-30T09:32:49.898873Z",
     "iopub.status.idle": "2024-06-30T09:32:49.945096Z",
     "shell.execute_reply": "2024-06-30T09:32:49.944497Z",
     "shell.execute_reply.started": "2024-06-30T09:32:49.899036Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_ARI_mouse2_0625.csv',index_col=0)\n",
    "pltdf = resultdf.melt(ignore_index=False)\n",
    "pltdf['model']=pltdf.index.str.split('_').str[1:].str.join('_')\n",
    "pltdf['class']=pltdf.index.str.split('_').str[0]\n",
    "pltdf.index = np.arange(pltdf.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5796abd-862a-42aa-8492-a34eb0076e35",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:32:49.945680Z",
     "iopub.status.busy": "2024-06-30T09:32:49.945558Z",
     "iopub.status.idle": "2024-06-30T09:32:50.532041Z",
     "shell.execute_reply": "2024-06-30T09:32:50.531405Z",
     "shell.execute_reply.started": "2024-06-30T09:32:49.945668Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')\n",
    "\n",
    "modelorder = ['Ours_Finetune','Ours' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Region',fontsize=15);\n",
    "plt.ylabel('ARI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ari_region.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2fea268b-6eb4-4047-8a70-a6640544c0c1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T09:32:50.532805Z",
     "iopub.status.busy": "2024-06-30T09:32:50.532680Z",
     "iopub.status.idle": "2024-06-30T09:32:51.098335Z",
     "shell.execute_reply": "2024-06-30T09:32:51.097680Z",
     "shell.execute_reply.started": "2024-06-30T09:32:50.532793Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['ModelFt_R0.3','Model_R0.3' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('ModelFt_R0.3','Ours_Finetune')\n",
    "tmpdf.loc[:,'model'] = tmpdf.loc[:,'model'].str.replace('Model_R0.3','Ours')\n",
    "\n",
    "modelorder = ['Ours_Finetune','Ours' ,'NicheCompass', 'STAGATE','GraphST', 'Raw']\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Division',fontsize=15);\n",
    "plt.ylabel('AMI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12);\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ari_division.pdf')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "47539122-f656-40f3-9a7f-7956d9b17d1a",
   "metadata": {},
   "source": [
    "## Region size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "4d4fdbe6-3619-44e6-896c-31c1e37e199d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:10:07.297668Z",
     "iopub.status.busy": "2024-09-15T05:10:07.297130Z",
     "iopub.status.idle": "2024-09-15T05:10:07.368977Z",
     "shell.execute_reply": "2024-09-15T05:10:07.367700Z",
     "shell.execute_reply.started": "2024-09-15T05:10:07.297628Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_AMI_mouse2_0625.csv',index_col=0)\n",
    "pltdf = resultdf.melt(ignore_index=False)\n",
    "pltdf['model']=pltdf.index.str.split('_').str[1:].str.join('_')\n",
    "pltdf['class']=pltdf.index.str.split('_').str[0]\n",
    "pltdf.index = np.arange(pltdf.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "cc085919-3fa5-414b-9aef-22ae848f3e29",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:10:21.407023Z",
     "iopub.status.busy": "2024-09-15T05:10:21.406480Z",
     "iopub.status.idle": "2024-09-15T05:10:21.451784Z",
     "shell.execute_reply": "2024-09-15T05:10:21.450075Z",
     "shell.execute_reply.started": "2024-09-15T05:10:21.406983Z"
    }
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eae0af3b-289e-42e5-8f64-adf8d5ca575a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:10:22.557338Z",
     "iopub.status.busy": "2024-09-15T05:10:22.556809Z",
     "iopub.status.idle": "2024-09-15T05:10:22.599272Z",
     "shell.execute_reply": "2024-09-15T05:10:22.598748Z",
     "shell.execute_reply.started": "2024-09-15T05:10:22.557299Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b8b30758-c482-46a4-8b5f-f8e91912aa83",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:10:23.393581Z",
     "iopub.status.busy": "2024-09-15T05:10:23.393087Z",
     "iopub.status.idle": "2024-09-15T05:10:23.437296Z",
     "shell.execute_reply": "2024-09-15T05:10:23.435768Z",
     "shell.execute_reply.started": "2024-09-15T05:10:23.393544Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "be458cfc-2093-4995-a008-9a2630a5f50a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:16:28.787983Z",
     "iopub.status.busy": "2024-09-15T05:16:28.787413Z",
     "iopub.status.idle": "2024-09-15T05:16:28.834528Z",
     "shell.execute_reply": "2024-09-15T05:16:28.833143Z",
     "shell.execute_reply.started": "2024-09-15T05:16:28.787941Z"
    }
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f313c54-3ea4-4529-9da1-a76366c6cde9",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:16:29.427077Z",
     "iopub.status.busy": "2024-09-15T05:16:29.426575Z",
     "iopub.status.idle": "2024-09-15T05:16:29.473923Z",
     "shell.execute_reply": "2024-09-15T05:16:29.472692Z",
     "shell.execute_reply.started": "2024-09-15T05:16:29.427038Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b9b544e-2289-4461-8457-feb6e1b42e83",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:16:29.973094Z",
     "iopub.status.busy": "2024-09-15T05:16:29.972320Z",
     "iopub.status.idle": "2024-09-15T05:16:30.016415Z",
     "shell.execute_reply": "2024-09-15T05:16:30.014641Z",
     "shell.execute_reply.started": "2024-09-15T05:16:29.973053Z"
    }
   },
   "outputs": [],
   "source": [
    "tmpdf.groupby('model')['value'].std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "572983ec-4e02-4f4d-8b42-afea22d501bf",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-27T13:55:31.791813Z",
     "iopub.status.busy": "2024-06-27T13:55:31.791276Z",
     "iopub.status.idle": "2024-06-27T13:55:32.570967Z",
     "shell.execute_reply": "2024-06-27T13:55:32.569757Z",
     "shell.execute_reply.started": "2024-06-27T13:55:31.791767Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "figsize(6,4)\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Region',fontsize=15);\n",
    "plt.ylabel('AMI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.axvline(x=3.5, color='black', linestyle='--')\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ami_regiongradient.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd7ff450-d9b6-4751-b46d-8f2b405cbf9a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-27T13:55:35.347209Z",
     "iopub.status.busy": "2024-06-27T13:55:35.346026Z",
     "iopub.status.idle": "2024-06-27T13:55:36.137757Z",
     "shell.execute_reply": "2024-06-27T13:55:36.137127Z",
     "shell.execute_reply.started": "2024-06-27T13:55:35.347142Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Division',fontsize=15);\n",
    "plt.ylabel('AMI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.axvline(x=3.5, color='black', linestyle='--')\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ami_divisiongradient.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "4ffe42c0-740d-4660-82dc-81ae92f8d46c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-15T05:06:15.840729Z",
     "iopub.status.busy": "2024-09-15T05:06:15.840169Z",
     "iopub.status.idle": "2024-09-15T05:06:16.178794Z",
     "shell.execute_reply": "2024-09-15T05:06:16.176852Z",
     "shell.execute_reply.started": "2024-09-15T05:06:15.840685Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_ARI_mouse2_0625.csv',index_col=0)\n",
    "pltdf = resultdf.melt(ignore_index=False)\n",
    "pltdf['model']=pltdf.index.str.split('_').str[1:].str.join('_')\n",
    "pltdf['class']=pltdf.index.str.split('_').str[0]\n",
    "pltdf.index = np.arange(pltdf.shape[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "81217833-120b-4dbb-8083-c4bdd3711ea8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-27T13:55:40.686170Z",
     "iopub.status.busy": "2024-06-27T13:55:40.685011Z",
     "iopub.status.idle": "2024-06-27T13:55:41.259476Z",
     "shell.execute_reply": "2024-06-27T13:55:41.258979Z",
     "shell.execute_reply.started": "2024-06-27T13:55:40.686104Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='region')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Region',fontsize=15);\n",
    "plt.ylabel('ARI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.axvline(x=3.5, color='black', linestyle='--')\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ari_regiongradient.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "353618f6-5ac1-4647-a2b9-ce1f22cb75ec",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-27T13:55:42.935331Z",
     "iopub.status.busy": "2024-06-27T13:55:42.934645Z",
     "iopub.status.idle": "2024-06-27T13:55:43.493360Z",
     "shell.execute_reply": "2024-06-27T13:55:43.492709Z",
     "shell.execute_reply.started": "2024-06-27T13:55:42.935287Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelorder = ['Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1',\n",
    "       'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45']\n",
    "mask = (pltdf['class']=='division')&(pltdf['model'].isin(modelorder))\n",
    "tmpdf = pltdf[mask]\n",
    "sns.boxplot(data = tmpdf,x='model',y='value',hue='model',saturation=0.7,order=modelorder,hue_order=modelorder)\n",
    "sns.stripplot(data = tmpdf,x='model',y='value',size=5,jitter=True,hue='model',edgecolor='black',linewidth=0.2,order=modelorder,hue_order=modelorder)\n",
    "palette = {variable: \"gray\" for variable in pltdf['variable'].unique()}\n",
    "sns.lineplot(data = tmpdf,x='model',y='value',hue='variable',legend=False,palette=palette, alpha=0.2)\n",
    "plt.title('Division',fontsize=15);\n",
    "plt.ylabel('ARI',fontsize=14);\n",
    "plt.xticks(rotation=30,fontsize=12,color='black');\n",
    "plt.yticks(fontsize=11);\n",
    "plt.xlabel('');\n",
    "plt.tight_layout()\n",
    "plt.axvline(x=3.5, color='black', linestyle='--')\n",
    "plt.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Clustering_ari_divisiongradient.pdf')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d87b18fc-6b2d-4a45-9101-5c98649e9408",
   "metadata": {},
   "source": [
    "# ShowCase"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30c677bf-9ec0-4e2a-9abc-062634d71dbc",
   "metadata": {},
   "source": [
    "## region"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "4ffed23d-6f7e-45e2-9d20-16a49e5330d2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-20T05:19:25.066666Z",
     "iopub.status.busy": "2024-09-20T05:19:25.066114Z",
     "iopub.status.idle": "2024-09-20T05:19:26.327336Z",
     "shell.execute_reply": "2024-09-20T05:19:26.325510Z",
     "shell.execute_reply.started": "2024-09-20T05:19:25.066627Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "resultdf = pd.read_csv('/stor/usr/sgenetmp/results/embedding/results_AMI_mouse2_0625.csv',index_col=0)\n",
    "modelorder = ['region_ModelFt_R0.1','region_ModelFt_R0.2', 'region_ModelFt_R0.3', 'region_ModelFt_R0.4', 'region_ModelFt_R0.45']\n",
    "\n",
    "fltresults = resultdf.loc[modelorder,:]\n",
    "\n",
    "fltresults['size']=np.array([0.1,0.2,0.3,0.4,0.45])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "cf0f00bf-93b0-4b24-8047-77776850d136",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-19T04:03:15.895148Z",
     "iopub.status.busy": "2024-09-19T04:03:15.894611Z",
     "iopub.status.idle": "2024-09-19T04:03:15.942206Z",
     "shell.execute_reply": "2024-09-19T04:03:15.940416Z",
     "shell.execute_reply.started": "2024-09-19T04:03:15.895107Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "fltresults.loc['mean',:] = fltresults.mean(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e007431-b56c-45d4-bbe3-f5be6ed5d6d0",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-19T04:03:17.478363Z",
     "iopub.status.busy": "2024-09-19T04:03:17.477848Z",
     "iopub.status.idle": "2024-09-19T04:03:17.533259Z",
     "shell.execute_reply": "2024-09-19T04:03:17.531451Z",
     "shell.execute_reply.started": "2024-09-19T04:03:17.478324Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "fltresults.T.sort_values('mean',ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "3c76731d-23f1-4dfa-aada-243dd633dd71",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T07:59:04.116720Z",
     "iopub.status.busy": "2024-06-30T07:59:04.115475Z",
     "iopub.status.idle": "2024-06-30T07:59:04.165396Z",
     "shell.execute_reply": "2024-06-30T07:59:04.164850Z",
     "shell.execute_reply.started": "2024-06-30T07:59:04.116667Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "corrdf = {}\n",
    "for x in fltresults.columns:\n",
    "    if x =='size':\n",
    "        continue\n",
    "    corrdf[x] = {}\n",
    "    corrdf[x]['Pearson'] = fltresults[x].corr(fltresults['size'])\n",
    "    corrdf[x]['Slope'] = corrdf[x]['Pearson'] * (fltresults[x].std() / fltresults['size'].std())\n",
    "corrdf = pd.DataFrame(corrdf).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "id": "59b3812e-9263-406f-bb6d-1797c701cb25",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T07:59:04.484832Z",
     "iopub.status.busy": "2024-06-30T07:59:04.484635Z",
     "iopub.status.idle": "2024-06-30T07:59:04.508052Z",
     "shell.execute_reply": "2024-06-30T07:59:04.507368Z",
     "shell.execute_reply.started": "2024-06-30T07:59:04.484801Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "corrdf = pd.concat([corrdf,fltresults.T],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0bb4c074-8abc-4f77-83d7-5b2282de4980",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-06-30T07:59:04.851387Z",
     "iopub.status.busy": "2024-06-30T07:59:04.850573Z",
     "iopub.status.idle": "2024-06-30T07:59:04.884811Z",
     "shell.execute_reply": "2024-06-30T07:59:04.884166Z",
     "shell.execute_reply.started": "2024-06-30T07:59:04.851340Z"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "corrdf.sort_values('Slope',ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "3399e489-f721-49a1-a8d0-ae17e3555934",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-19T04:03:37.208584Z",
     "iopub.status.busy": "2024-09-19T04:03:37.208047Z",
     "iopub.status.idle": "2024-09-19T04:57:29.211011Z",
     "shell.execute_reply": "2024-09-19T04:57:29.208983Z",
     "shell.execute_reply.started": "2024-09-19T04:03:37.208544Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "modelnames = {'Raw':'raw','Model':'adataemb','ModelFt':'adataemb_finetune',\n",
    "              'GraphST':'graphstemb','NicheCompass':'nichecompass','STAGATE':'stagater005'}\n",
    "for datasetname in ['Zhuang-ABCA-2.036']:\n",
    "    alladata = {}\n",
    "    for mname in modelnames.keys():\n",
    "        if modelnames[mname].startswith('adataemb'): \n",
    "            regionlist = [0.1,0.2,0.3]\n",
    "            if mname=='ModelFt':\n",
    "                regionlist +=[0.4,0.45]\n",
    "            for region in regionlist:\n",
    "                modeladata = sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{region}_{datasetname}.h5ad')\n",
    "                alladata[f'{mname}_R{region}']=modeladata\n",
    "        elif modelnames[mname].startswith('mean'): \n",
    "            regionlist = [0.1,0.2,0.3]\n",
    "            for region in regionlist:\n",
    "                modeladata = sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{region}_{datasetname}.h5ad')\n",
    "                alladata[f'{mname}_R{region}']=modeladata                \n",
    "        else:\n",
    "            alladata[mname]= sc.read_h5ad(f'/stor/usr/sgenetmp/results/embedding/mouse2/{modelnames[mname]}_{datasetname}.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "fac6f8eb-d9c8-452a-90d1-fbb6eed0178a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-19T04:57:29.212505Z",
     "iopub.status.busy": "2024-09-19T04:57:29.212282Z",
     "iopub.status.idle": "2024-09-19T04:57:29.248378Z",
     "shell.execute_reply": "2024-09-19T04:57:29.246715Z",
     "shell.execute_reply.started": "2024-09-19T04:57:29.212485Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "mlist = ['Raw', 'Model_R0.1', 'Model_R0.2', 'Model_R0.3', 'ModelFt_R0.1', 'ModelFt_R0.2', 'ModelFt_R0.3', 'ModelFt_R0.4', 'ModelFt_R0.45', 'GraphST', 'NicheCompass', 'STAGATE']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9407c1ad-d85e-4dc3-b288-efc2c8442639",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-20T05:22:07.074286Z",
     "iopub.status.busy": "2024-09-20T05:22:07.073679Z",
     "iopub.status.idle": "2024-09-20T05:22:09.496131Z",
     "shell.execute_reply": "2024-09-20T05:22:09.494633Z",
     "shell.execute_reply.started": "2024-09-20T05:22:07.074243Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "classname = 'division'\n",
    "# classname = 'region'\n",
    "sc.pl.spatial(alladata['Raw'], color=classname, spot_size=0.03,legend_loc=None,title=f'Ground Truth')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "dd20704b-c91a-4ebd-8337-3d690ccc1b8d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-19T04:57:31.428685Z",
     "iopub.status.busy": "2024-09-19T04:57:31.428511Z",
     "iopub.status.idle": "2024-09-19T04:57:31.455958Z",
     "shell.execute_reply": "2024-09-19T04:57:31.454349Z",
     "shell.execute_reply.started": "2024-09-19T04:57:31.428672Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "colors = alladata['Raw'].uns[f'{classname}_colors']\n",
    "colors += colors\n",
    "region_num = len(alladata['Raw'].obs[classname].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "6ef28495-a72b-452f-9b88-a90e3e2787af",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-20T05:27:12.668667Z",
     "iopub.status.busy": "2024-09-20T05:27:12.668104Z",
     "iopub.status.idle": "2024-09-20T05:27:12.712233Z",
     "shell.execute_reply": "2024-09-20T05:27:12.710480Z",
     "shell.execute_reply.started": "2024-09-20T05:27:12.668625Z"
    }
   },
   "outputs": [],
   "source": [
    "mlist = ['Raw',\n",
    " 'Model_R0.1',\n",
    " 'Model_R0.2',\n",
    " 'Model_R0.3',\n",
    " 'ModelFt_R0.1',\n",
    " 'ModelFt_R0.2',\n",
    " 'ModelFt_R0.3',\n",
    " 'GraphST',\n",
    " 'NicheCompass',\n",
    " 'STAGATE']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02e5e413-76b3-4b68-9d9b-cdcd0c6428d6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-09-20T05:38:08.028921Z",
     "iopub.status.busy": "2024-09-20T05:38:08.028320Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from scipy.optimize import linear_sum_assignment\n",
    "from sklearn.metrics.cluster import adjusted_rand_score,adjusted_mutual_info_score\n",
    "fig, axes = plt.subplots(3, 4, figsize=(15, 12))\n",
    "for n,k in enumerate(mlist):\n",
    "    \n",
    "    categories_x = np.unique(alladata[k].obs[f'cluster_{region_num}'])\n",
    "    categories_y = np.unique(alladata[k].obs[classname])\n",
    "\n",
    "    cost_matrix = np.zeros((len(categories_x), len(categories_y)))\n",
    "\n",
    "    for i, x in enumerate(categories_x):\n",
    "        for j, y in enumerate(categories_y):\n",
    "            cost_matrix[i, j] = -((alladata[k].obs[f'cluster_{region_num}'] == x)&(alladata[k].obs[f'region'] == y)).sum()\n",
    "\n",
    "    row_ind, col_ind = linear_sum_assignment(cost_matrix)\n",
    "    color_map = {categories_x[i]: colors[j] for i, j in zip(row_ind, col_ind)}\n",
    "    for x in categories_x:\n",
    "        c=1\n",
    "        if x not in color_map.keys():\n",
    "            color_map[x]=colors[-c]\n",
    "            c+=1\n",
    "    amiscore = adjusted_mutual_info_score(alladata[k].obs[f'cluster_{region_num}'].values,alladata[k].obs[classname].values)\n",
    "    sc.pl.spatial(alladata[k], color=f'cluster_{region_num}', spot_size=0.03, ax=axes[n//4, n%4], show=False, legend_loc=None,title=f\"{k.replace('0.3','0.6').replace('0.2','0.4').replace('0.1','0.2')}({amiscore:.2f})\",palette=color_map)\n",
    "sc.pl.spatial(alladata['Raw'], color=classname, spot_size=0.03,legend_loc=None,title=f'Ground Truth',ax=axes[2,2])\n",
    "fig.delaxes(axes.flatten()[-1])\n",
    "fig.tight_layout()\n",
    "fig.savefig('/stor/usr/sgenetmp/results/figures/nichecluster/Visualization_all.pdf')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c081028-aa44-4ea9-ab98-5cf23138d41a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pt2",
   "language": "python",
   "name": "pt2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
