{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scanpy as sc\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import anndata2ri\n",
    "import logging\n",
    "from scipy.sparse import issparse\n",
    "from CSCORE.CSCORE_IRLS import CSCORE_IRLS\n",
    "\n",
    "import rpy2.rinterface_lib.callbacks as rcb\n",
    "import rpy2.robjects as ro"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rcb.logger.setLevel(logging.ERROR)\n",
    "ro.pandas2ri.activate()\n",
    "anndata2ri.activate()\n",
    "%load_ext rpy2.ipython"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "library(Seurat)\n",
    "library(sctransform)\n",
    "library(Hmisc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = sc.read_h5ad(f\"scatac_heart_674.h5ad\")\n",
    "sc.pp.filter_cells(adata, min_genes=200)\n",
    "sc.pp.filter_genes(adata, min_cells=3)\n",
    "adata.var_names = [i.split('.')[0] for i in adata.var_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import mygene\n",
    "mg = mygene.MyGeneInfo()\n",
    "geneList = adata.var_names\n",
    "geneSyms = mg.querymany(geneList , scopes='ensembl.gene', fields='symbol', species='human')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gene_set = {}\n",
    "for i in geneSyms:\n",
    "    if 'symbol' in i.keys():\n",
    "        gene_set[i['query']] = i['symbol'] \n",
    "adata = adata[:, list(gene_set.keys())]\n",
    "adata.var_names = [gene_set[i] for i in adata.var_names]\n",
    "adata.var['mt'] = adata.var_names.str.startswith('MT')  # annotate the group of mitochondrial genes as 'mt'\n",
    "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)\n",
    "sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt')\n",
    "sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts')\n",
    "adata = adata[adata.obs.n_genes_by_counts < 6000, :]\n",
    "# adata = adata[adata.obs.pct_counts_mt < 20, :]\n",
    "adata = adata[:,adata.var['mt']==False]\n",
    "adata.write_h5ad(\"scatac_heart_674new.h5ad\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "adata = sc.read_h5ad(\"scatac_heart_674new.h5ad\")\n",
    "adata.var_names_make_unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if issparse(adata.X):\n",
    "    if not adata.X.has_sorted_indices:\n",
    "        adata.X.sort_indices()\n",
    "ro.globalenv[\"adata\"] = adata\n",
    "adata.obs['n_counts'] = np.array(np.sum(adata.X, axis= 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%R\n",
    "seurat_obj = as.Seurat(adata, counts=\"X\", data = NULL)\n",
    "seurat_obj = RenameAssays(seurat_obj, originalexp = \"RNA\")\n",
    "res = SCTransform(object=seurat_obj, vst.flavor = \"v2\", variable.features.n = 1000 , method = \"glmGamPoi\", verbose = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gene_list = list(ro.r(\"rownames(res@assays$SCT@scale.data)\"))\n",
    "norm_x = ro.r(\"res@assays$SCT@scale.data\")\n",
    "exp_matrix = pd.DataFrame(norm_x, index=gene_list)\n",
    "\n",
    "adata_new = adata[:,gene_list]\n",
    "counts = adata_new.X.todense()\n",
    "seq_depth = adata_new.obs['n_counts'].values\n",
    "\n",
    "B_cell_result = CSCORE_IRLS(np.array(counts), seq_depth)\n",
    "\n",
    "p_value = B_cell_result[1]\n",
    "cor_matrix = (p_value<0.005)*1\n",
    "print(cor_matrix)\n",
    "\n",
    "exp_matrix.to_csv(f\"./heart_atlas/scatac_heart_674\"+\"_rna_expression.csv\")\n",
    "cor_matrix = pd.DataFrame(cor_matrix, index = gene_list, columns = gene_list)\n",
    "cor_matrix.to_csv(f\"./heart_atlas/scatac_heart_674\"+\"_pvalue.csv\")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
