{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import anndata2ri\n",
    "import logging\n",
    "from scipy.sparse import issparse\n",
    "from CSCORE.CSCORE_IRLS import CSCORE_IRLS\n",
    "\n",
    "import rpy2.rinterface_lib.callbacks as rcb\n",
    "import rpy2.robjects as ro"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rcb.logger.setLevel(logging.ERROR)\n",
    "ro.pandas2ri.activate()\n",
    "anndata2ri.activate()\n",
    "%load_ext rpy2.ipython"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "library(Seurat)\n",
    "library(sctransform)\n",
    "library(Hmisc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tissue_id = \"spleen\"\n",
    "identify = \"637C\"\n",
    "\n",
    "for i in glob.glob(\"./spleen/scrna_*.h5ad\"):\n",
    "    print(i)\n",
    "\n",
    "read_path = f\"./{tissue_id}/scrna_{tissue_id}_{identify}.h5ad\"\n",
    "\n",
    "adata = sc.read_h5ad(read_path)\n",
    "\n",
    "\n",
    "adata.var['mt'] = adata.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'\n",
    "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)\n",
    "\n",
    "sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')\n",
    "sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')\n",
    "\n",
    "adata = adata[adata.obs.n_genes_by_counts < 4000, :]\n",
    "adata = adata[adata.obs.pct_counts_mt < 20, :]\n",
    "\n",
    "adata = adata[:,adata.var['mt']==False]\n",
    "\n",
    "if issparse(adata.X):\n",
    "    if not adata.X.has_sorted_indices:\n",
    "        adata.X.sort_indices()\n",
    "ro.globalenv[\"adata\"] = adata\n",
    "\n",
    "adata.obs['n_counts'] = np.array(np.sum(adata.X, axis= 1))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "seurat_obj = as.Seurat(adata, counts=\"X\", data = NULL)\n",
    "seurat_obj = RenameAssays(seurat_obj, originalexp = \"RNA\")\n",
    "res = SCTransform(object=seurat_obj, vst.flavor = \"v2\", variable.features.n = 1000 , method = \"glmGamPoi\", verbose = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gene_list = list(ro.r(\"rownames(res@assays$SCT@scale.data)\"))\n",
    "norm_x = ro.r(\"res@assays$SCT@scale.data\")\n",
    "exp_matrix = pd.DataFrame(norm_x, index=gene_list)\n",
    "adata_new = adata[:,gene_list]\n",
    "\n",
    "counts = adata_new.X\n",
    "seq_depth = adata_new.obs['n_counts'].values\n",
    "\n",
    "B_cell_result = CSCORE_IRLS(np.array(counts), seq_depth)\n",
    "p_value = B_cell_result[1]\n",
    "cor_matrix = (p_value<0.005)*1\n",
    "print(cor_matrix)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp_matrix.to_csv(f\"./{tissue_id}_atlas/scrna_{tissue_id}_{identify}\"+\"_rna_expression.csv\")\n",
    "cor_matrix = pd.DataFrame(cor_matrix, index = gene_list, columns = gene_list)\n",
    "cor_matrix.to_csv(f\"./{tissue_id}_atlas/scrna_{tissue_id}_{identify}\"+\"_pvalue.csv\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
