{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import scanpy as sc\n",
    "import anndata as ad\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.cluster import MiniBatchKMeans, KMeans\n",
    "import yaml\n",
    "import os, time\n",
    "\n",
    "def calculate_metacell(yaml_path, output_path, num_metacells=3000, num_pcs=500):\n",
    "    \"\"\"\n",
    "    Calculate metacells through clustering.\n",
    "\n",
    "    Args:\n",
    "        yaml_path (str): The path to a YAML file containing the name and directory of the samples to be processed.\n",
    "        output_path (str): The path where the results of the calculation will be saved.\n",
    "        num_metacells (int, optional): The number of metacells to be generated through clustering. Defaults to 3000.\n",
    "        num_pcs (int, optional): The number of principal components (PCs) to be used in the analysis. Defaults to 500.\n",
    "\n",
    "    Returns:\n",
    "        Anndata: The Anndata object containing the clustering results for metacells. It is just for testing the mapping accuracy.\n",
    "    \"\"\"\n",
    "    ## check if previous results exist in the output path\n",
    "    if os.path.exists(output_path + \"integrated.h5ad\"):\n",
    "        adata = sc.read_h5ad(output_path + \"integrated.h5ad\")\n",
    "        print(\"Loaded previous results\")\n",
    "    else:\n",
    "        os.mkdir(output_path)\n",
    "        ## read the data one-by-one and concatenate into one scanpy object\n",
    "        with open(yaml_path, 'r') as f:\n",
    "            sample_name = yaml.safe_load(f)\n",
    "        adata_list = []\n",
    "        for s in sample_name['train']:\n",
    "            adata_temp = sc.read_h5ad(s)\n",
    "            adata_list.append(adata_temp)\n",
    "        print(f\"Loaded {len(adata_list)} samples\")\n",
    "        adata = ad.concat(adata_list)\n",
    "        ## check if the data is log-transformed\n",
    "        if adata.X.min() >= 0 and adata.X.max() <= 15:\n",
    "            print(\"Original data is log-transformed\")\n",
    "        else:\n",
    "            print(\"Original data is not log-transformed\")\n",
    "            sc.pp.normalize_total(adata, target_sum=1e4)\n",
    "            sc.pp.log1p(adata)\n",
    "        sc.tl.pca(adata, n_comps=num_pcs)\n",
    "        adata.write(output_path + \"integrated.h5ad\")\n",
    "\n",
    "    print(f\"{adata.shape[0]} cells and {adata.shape[1]} genes in total\")\n",
    "    if adata.varm['PCs'].shape[1] < num_pcs:\n",
    "        sc.tl.pca(adata, n_comps=num_pcs)\n",
    "        adata.write(output_path + \"integrated.h5ad\")\n",
    "\n",
    "    ## process the data and perform clustering\n",
    "    print(\"Performing KMeans clustering\")\n",
    "    start = time.time()\n",
    "    kmeans = MiniBatchKMeans(n_clusters=num_metacells,\n",
    "                            # n_init='auto',\n",
    "                            batch_size=327680, ## recommend smaller batchsize for faster performance\n",
    "                            max_iter=200,\n",
    "                            random_state=0)\n",
    "    kmeans.fit(adata.obsm['X_pca'][:, :num_pcs])\n",
    "    adata.obs['metacell'] = kmeans.labels_\n",
    "    meta_cells_label = set(adata.obs[\"metacell\"])\n",
    "    num_metacells = len(meta_cells_label)\n",
    "    print(f\"Clustering took {time.time()-start:.2f} seconds\")\n",
    "    print(f\"Yielded {num_metacells} metacells using {num_pcs} PCs.\")\n",
    "\n",
    "    ## calculate the mean expression and pca of the metacells\n",
    "    metacell_df = pd.DataFrame(index=[f\"metacell_{label}\" for label in meta_cells_label], columns=adata.var_names)\n",
    "    meta_pca_df = pd.DataFrame(index=[f\"metacell_pca_{label}\" for label in meta_cells_label], columns=[f'PC{i}' for i in range(num_pcs)])\n",
    "    for i, label in enumerate(meta_cells_label):\n",
    "        metacell_df.iloc[i,:] = adata[adata.obs[\"metacell\"] == label].X.mean(axis=0)\n",
    "        meta_pca_df.iloc[i,:] = adata[adata.obs[\"metacell\"] == label].obsm['X_pca'][:,:num_pcs].mean(axis=0)\n",
    "        adata.obs.loc[adata.obs[\"metacell\"] == label, \"metacell\"] = i\n",
    "\n",
    "    ## group the metacells\n",
    "    kc5 = KMeans(n_clusters=5, random_state=0, n_init=\"auto\").fit(meta_pca_df.values)\n",
    "    kc10 = KMeans(n_clusters=10, random_state=0, n_init=\"auto\").fit(meta_pca_df.values)\n",
    "    kc15 = KMeans(n_clusters=15, random_state=0, n_init=\"auto\").fit(meta_pca_df.values)\n",
    "    kmdict = np.array([kc5.labels_, kc10.labels_, kc15.labels_]).T\n",
    "\n",
    "    ## save the results\n",
    "    np.save(output_path + \"mean.npy\", adata.X.mean(0))\n",
    "    np.save(output_path + \"PCs.npy\", adata.varm['PCs'][:, :num_pcs])\n",
    "    np.save(output_path + \"metacell_expr.npy\", metacell_df.values)\n",
    "    np.save(output_path + \"metacell_pca.npy\", meta_pca_df.values)\n",
    "    np.save(output_path + 'kmdict.npy', kmdict)\n",
    "    print(\"Results saved.\")\n",
    "\n",
    "\n",
    "    ## just for testing\n",
    "    return adata\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "yaml_path = './data_merfish.yaml'\n",
    "output_path = './metacell/'\n",
    "adata = calculate_metacell(yaml_path, output_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PC0</th>\n",
       "      <th>PC1</th>\n",
       "      <th>PC2</th>\n",
       "      <th>PC3</th>\n",
       "      <th>PC4</th>\n",
       "      <th>PC5</th>\n",
       "      <th>PC6</th>\n",
       "      <th>PC7</th>\n",
       "      <th>PC8</th>\n",
       "      <th>PC9</th>\n",
       "      <th>...</th>\n",
       "      <th>PC490</th>\n",
       "      <th>PC491</th>\n",
       "      <th>PC492</th>\n",
       "      <th>PC493</th>\n",
       "      <th>PC494</th>\n",
       "      <th>PC495</th>\n",
       "      <th>PC496</th>\n",
       "      <th>PC497</th>\n",
       "      <th>PC498</th>\n",
       "      <th>PC499</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>meta_pca_0</th>\n",
       "      <td>-4.528443</td>\n",
       "      <td>-2.764500</td>\n",
       "      <td>0.204084</td>\n",
       "      <td>2.204027</td>\n",
       "      <td>-5.546653</td>\n",
       "      <td>-3.119863</td>\n",
       "      <td>-3.238143</td>\n",
       "      <td>3.798974</td>\n",
       "      <td>-0.430050</td>\n",
       "      <td>1.425821</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.013971</td>\n",
       "      <td>0.044268</td>\n",
       "      <td>-0.025512</td>\n",
       "      <td>0.056957</td>\n",
       "      <td>0.080754</td>\n",
       "      <td>-0.041736</td>\n",
       "      <td>0.020152</td>\n",
       "      <td>-0.029999</td>\n",
       "      <td>-0.021017</td>\n",
       "      <td>-0.013270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_1</th>\n",
       "      <td>-1.184810</td>\n",
       "      <td>5.465216</td>\n",
       "      <td>2.296548</td>\n",
       "      <td>-3.239223</td>\n",
       "      <td>-0.204140</td>\n",
       "      <td>-1.637721</td>\n",
       "      <td>1.593285</td>\n",
       "      <td>-0.189306</td>\n",
       "      <td>1.432100</td>\n",
       "      <td>0.519004</td>\n",
       "      <td>...</td>\n",
       "      <td>0.016356</td>\n",
       "      <td>-0.009680</td>\n",
       "      <td>-0.004169</td>\n",
       "      <td>0.007520</td>\n",
       "      <td>-0.014184</td>\n",
       "      <td>0.003578</td>\n",
       "      <td>0.003657</td>\n",
       "      <td>0.011310</td>\n",
       "      <td>-0.007444</td>\n",
       "      <td>-0.016586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2</th>\n",
       "      <td>-3.743690</td>\n",
       "      <td>-2.714610</td>\n",
       "      <td>0.045413</td>\n",
       "      <td>2.645315</td>\n",
       "      <td>-4.551899</td>\n",
       "      <td>1.051311</td>\n",
       "      <td>0.167849</td>\n",
       "      <td>-4.500321</td>\n",
       "      <td>1.454891</td>\n",
       "      <td>-1.626905</td>\n",
       "      <td>...</td>\n",
       "      <td>0.119189</td>\n",
       "      <td>0.492453</td>\n",
       "      <td>0.069747</td>\n",
       "      <td>-0.783184</td>\n",
       "      <td>0.785393</td>\n",
       "      <td>-0.025167</td>\n",
       "      <td>0.018348</td>\n",
       "      <td>-0.594667</td>\n",
       "      <td>-0.669811</td>\n",
       "      <td>-0.737063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_3</th>\n",
       "      <td>-1.081812</td>\n",
       "      <td>0.052769</td>\n",
       "      <td>-0.188333</td>\n",
       "      <td>4.373316</td>\n",
       "      <td>1.957066</td>\n",
       "      <td>1.151244</td>\n",
       "      <td>-1.664828</td>\n",
       "      <td>-2.752620</td>\n",
       "      <td>-1.156565</td>\n",
       "      <td>1.481478</td>\n",
       "      <td>...</td>\n",
       "      <td>0.270531</td>\n",
       "      <td>-0.351045</td>\n",
       "      <td>0.257824</td>\n",
       "      <td>0.260153</td>\n",
       "      <td>0.359099</td>\n",
       "      <td>-0.283829</td>\n",
       "      <td>-0.434999</td>\n",
       "      <td>0.516117</td>\n",
       "      <td>0.249920</td>\n",
       "      <td>0.081426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_4</th>\n",
       "      <td>0.708829</td>\n",
       "      <td>5.461336</td>\n",
       "      <td>-5.595344</td>\n",
       "      <td>-0.014970</td>\n",
       "      <td>-2.635151</td>\n",
       "      <td>-2.701778</td>\n",
       "      <td>-1.691776</td>\n",
       "      <td>1.884805</td>\n",
       "      <td>1.448144</td>\n",
       "      <td>-0.616171</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.188771</td>\n",
       "      <td>-0.326973</td>\n",
       "      <td>0.095400</td>\n",
       "      <td>0.104225</td>\n",
       "      <td>-0.160484</td>\n",
       "      <td>-0.015747</td>\n",
       "      <td>0.357049</td>\n",
       "      <td>-0.097271</td>\n",
       "      <td>-0.473582</td>\n",
       "      <td>-0.193636</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2995</th>\n",
       "      <td>1.914200</td>\n",
       "      <td>4.198896</td>\n",
       "      <td>-4.603026</td>\n",
       "      <td>-0.448284</td>\n",
       "      <td>-0.278186</td>\n",
       "      <td>0.845025</td>\n",
       "      <td>-0.866848</td>\n",
       "      <td>-0.678358</td>\n",
       "      <td>-0.812415</td>\n",
       "      <td>-1.377833</td>\n",
       "      <td>...</td>\n",
       "      <td>0.043532</td>\n",
       "      <td>-0.048676</td>\n",
       "      <td>-0.040588</td>\n",
       "      <td>0.034951</td>\n",
       "      <td>0.008944</td>\n",
       "      <td>-0.007064</td>\n",
       "      <td>0.011344</td>\n",
       "      <td>-0.032774</td>\n",
       "      <td>0.072806</td>\n",
       "      <td>0.039972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2996</th>\n",
       "      <td>7.832729</td>\n",
       "      <td>-4.125685</td>\n",
       "      <td>1.358982</td>\n",
       "      <td>-0.804107</td>\n",
       "      <td>0.126665</td>\n",
       "      <td>-0.831557</td>\n",
       "      <td>0.068182</td>\n",
       "      <td>0.456288</td>\n",
       "      <td>0.620467</td>\n",
       "      <td>0.230441</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.043833</td>\n",
       "      <td>0.147033</td>\n",
       "      <td>0.147205</td>\n",
       "      <td>-0.174209</td>\n",
       "      <td>-0.162566</td>\n",
       "      <td>-0.182836</td>\n",
       "      <td>0.120269</td>\n",
       "      <td>-0.270309</td>\n",
       "      <td>-0.090341</td>\n",
       "      <td>0.089590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2997</th>\n",
       "      <td>5.505001</td>\n",
       "      <td>-1.650820</td>\n",
       "      <td>1.268998</td>\n",
       "      <td>-0.034423</td>\n",
       "      <td>0.044158</td>\n",
       "      <td>0.295871</td>\n",
       "      <td>-0.987080</td>\n",
       "      <td>0.321371</td>\n",
       "      <td>-0.363875</td>\n",
       "      <td>-0.699090</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001711</td>\n",
       "      <td>0.007770</td>\n",
       "      <td>-0.018306</td>\n",
       "      <td>-0.009216</td>\n",
       "      <td>-0.010985</td>\n",
       "      <td>-0.013905</td>\n",
       "      <td>0.027204</td>\n",
       "      <td>0.006671</td>\n",
       "      <td>-0.002955</td>\n",
       "      <td>0.010073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2998</th>\n",
       "      <td>0.496271</td>\n",
       "      <td>4.191075</td>\n",
       "      <td>-7.088961</td>\n",
       "      <td>-3.057764</td>\n",
       "      <td>-0.799768</td>\n",
       "      <td>-0.415481</td>\n",
       "      <td>-0.815531</td>\n",
       "      <td>0.768218</td>\n",
       "      <td>-0.444590</td>\n",
       "      <td>-0.206377</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.033995</td>\n",
       "      <td>-0.004664</td>\n",
       "      <td>0.076158</td>\n",
       "      <td>0.083602</td>\n",
       "      <td>-0.030803</td>\n",
       "      <td>0.131021</td>\n",
       "      <td>0.160362</td>\n",
       "      <td>-0.103967</td>\n",
       "      <td>0.143163</td>\n",
       "      <td>-0.033488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>meta_pca_2999</th>\n",
       "      <td>-5.582205</td>\n",
       "      <td>-4.293712</td>\n",
       "      <td>-0.843856</td>\n",
       "      <td>-4.898518</td>\n",
       "      <td>0.611782</td>\n",
       "      <td>-0.134582</td>\n",
       "      <td>0.182743</td>\n",
       "      <td>-0.173399</td>\n",
       "      <td>0.256055</td>\n",
       "      <td>-1.114440</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.013051</td>\n",
       "      <td>-0.061565</td>\n",
       "      <td>0.023300</td>\n",
       "      <td>-0.016409</td>\n",
       "      <td>-0.048254</td>\n",
       "      <td>0.135349</td>\n",
       "      <td>-0.009455</td>\n",
       "      <td>0.065485</td>\n",
       "      <td>0.091954</td>\n",
       "      <td>-0.062281</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3000 rows × 500 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    PC0       PC1       PC2       PC3       PC4       PC5  \\\n",
       "meta_pca_0    -4.528443 -2.764500  0.204084  2.204027 -5.546653 -3.119863   \n",
       "meta_pca_1    -1.184810  5.465216  2.296548 -3.239223 -0.204140 -1.637721   \n",
       "meta_pca_2    -3.743690 -2.714610  0.045413  2.645315 -4.551899  1.051311   \n",
       "meta_pca_3    -1.081812  0.052769 -0.188333  4.373316  1.957066  1.151244   \n",
       "meta_pca_4     0.708829  5.461336 -5.595344 -0.014970 -2.635151 -2.701778   \n",
       "...                 ...       ...       ...       ...       ...       ...   \n",
       "meta_pca_2995  1.914200  4.198896 -4.603026 -0.448284 -0.278186  0.845025   \n",
       "meta_pca_2996  7.832729 -4.125685  1.358982 -0.804107  0.126665 -0.831557   \n",
       "meta_pca_2997  5.505001 -1.650820  1.268998 -0.034423  0.044158  0.295871   \n",
       "meta_pca_2998  0.496271  4.191075 -7.088961 -3.057764 -0.799768 -0.415481   \n",
       "meta_pca_2999 -5.582205 -4.293712 -0.843856 -4.898518  0.611782 -0.134582   \n",
       "\n",
       "                    PC6       PC7       PC8       PC9  ...     PC490  \\\n",
       "meta_pca_0    -3.238143  3.798974 -0.430050  1.425821  ... -0.013971   \n",
       "meta_pca_1     1.593285 -0.189306  1.432100  0.519004  ...  0.016356   \n",
       "meta_pca_2     0.167849 -4.500321  1.454891 -1.626905  ...  0.119189   \n",
       "meta_pca_3    -1.664828 -2.752620 -1.156565  1.481478  ...  0.270531   \n",
       "meta_pca_4    -1.691776  1.884805  1.448144 -0.616171  ... -0.188771   \n",
       "...                 ...       ...       ...       ...  ...       ...   \n",
       "meta_pca_2995 -0.866848 -0.678358 -0.812415 -1.377833  ...  0.043532   \n",
       "meta_pca_2996  0.068182  0.456288  0.620467  0.230441  ... -0.043833   \n",
       "meta_pca_2997 -0.987080  0.321371 -0.363875 -0.699090  ... -0.001711   \n",
       "meta_pca_2998 -0.815531  0.768218 -0.444590 -0.206377  ... -0.033995   \n",
       "meta_pca_2999  0.182743 -0.173399  0.256055 -1.114440  ... -0.013051   \n",
       "\n",
       "                  PC491     PC492     PC493     PC494     PC495     PC496  \\\n",
       "meta_pca_0     0.044268 -0.025512  0.056957  0.080754 -0.041736  0.020152   \n",
       "meta_pca_1    -0.009680 -0.004169  0.007520 -0.014184  0.003578  0.003657   \n",
       "meta_pca_2     0.492453  0.069747 -0.783184  0.785393 -0.025167  0.018348   \n",
       "meta_pca_3    -0.351045  0.257824  0.260153  0.359099 -0.283829 -0.434999   \n",
       "meta_pca_4    -0.326973  0.095400  0.104225 -0.160484 -0.015747  0.357049   \n",
       "...                 ...       ...       ...       ...       ...       ...   \n",
       "meta_pca_2995 -0.048676 -0.040588  0.034951  0.008944 -0.007064  0.011344   \n",
       "meta_pca_2996  0.147033  0.147205 -0.174209 -0.162566 -0.182836  0.120269   \n",
       "meta_pca_2997  0.007770 -0.018306 -0.009216 -0.010985 -0.013905  0.027204   \n",
       "meta_pca_2998 -0.004664  0.076158  0.083602 -0.030803  0.131021  0.160362   \n",
       "meta_pca_2999 -0.061565  0.023300 -0.016409 -0.048254  0.135349 -0.009455   \n",
       "\n",
       "                  PC497     PC498     PC499  \n",
       "meta_pca_0    -0.029999 -0.021017 -0.013270  \n",
       "meta_pca_1     0.011310 -0.007444 -0.016586  \n",
       "meta_pca_2    -0.594667 -0.669811 -0.737063  \n",
       "meta_pca_3     0.516117  0.249920  0.081426  \n",
       "meta_pca_4    -0.097271 -0.473582 -0.193636  \n",
       "...                 ...       ...       ...  \n",
       "meta_pca_2995 -0.032774  0.072806  0.039972  \n",
       "meta_pca_2996 -0.270309 -0.090341  0.089590  \n",
       "meta_pca_2997  0.006671 -0.002955  0.010073  \n",
       "meta_pca_2998 -0.103967  0.143163 -0.033488  \n",
       "meta_pca_2999  0.065485  0.091954 -0.062281  \n",
       "\n",
       "[3000 rows x 500 columns]"
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca_ori"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>PC0</th>\n",
       "      <th>PC1</th>\n",
       "      <th>PC2</th>\n",
       "      <th>PC3</th>\n",
       "      <th>PC4</th>\n",
       "      <th>PC5</th>\n",
       "      <th>PC6</th>\n",
       "      <th>PC7</th>\n",
       "      <th>PC8</th>\n",
       "      <th>PC9</th>\n",
       "      <th>...</th>\n",
       "      <th>PC490</th>\n",
       "      <th>PC491</th>\n",
       "      <th>PC492</th>\n",
       "      <th>PC493</th>\n",
       "      <th>PC494</th>\n",
       "      <th>PC495</th>\n",
       "      <th>PC496</th>\n",
       "      <th>PC497</th>\n",
       "      <th>PC498</th>\n",
       "      <th>PC499</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>pca_0</th>\n",
       "      <td>-4.528443</td>\n",
       "      <td>-2.764500</td>\n",
       "      <td>0.204084</td>\n",
       "      <td>2.204027</td>\n",
       "      <td>-5.546653</td>\n",
       "      <td>-3.119863</td>\n",
       "      <td>-3.238143</td>\n",
       "      <td>3.798974</td>\n",
       "      <td>-0.430050</td>\n",
       "      <td>1.425821</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.013971</td>\n",
       "      <td>0.044268</td>\n",
       "      <td>-0.025512</td>\n",
       "      <td>0.056957</td>\n",
       "      <td>0.080754</td>\n",
       "      <td>-0.041736</td>\n",
       "      <td>0.020152</td>\n",
       "      <td>-0.029999</td>\n",
       "      <td>-0.021017</td>\n",
       "      <td>-0.013270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_1</th>\n",
       "      <td>-1.184810</td>\n",
       "      <td>5.465216</td>\n",
       "      <td>2.296548</td>\n",
       "      <td>-3.239223</td>\n",
       "      <td>-0.204140</td>\n",
       "      <td>-1.637721</td>\n",
       "      <td>1.593285</td>\n",
       "      <td>-0.189306</td>\n",
       "      <td>1.432100</td>\n",
       "      <td>0.519004</td>\n",
       "      <td>...</td>\n",
       "      <td>0.016356</td>\n",
       "      <td>-0.009680</td>\n",
       "      <td>-0.004169</td>\n",
       "      <td>0.007520</td>\n",
       "      <td>-0.014184</td>\n",
       "      <td>0.003578</td>\n",
       "      <td>0.003657</td>\n",
       "      <td>0.011310</td>\n",
       "      <td>-0.007444</td>\n",
       "      <td>-0.016586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2</th>\n",
       "      <td>-3.743690</td>\n",
       "      <td>-2.714610</td>\n",
       "      <td>0.045413</td>\n",
       "      <td>2.645315</td>\n",
       "      <td>-4.551899</td>\n",
       "      <td>1.051311</td>\n",
       "      <td>0.167849</td>\n",
       "      <td>-4.500321</td>\n",
       "      <td>1.454891</td>\n",
       "      <td>-1.626905</td>\n",
       "      <td>...</td>\n",
       "      <td>0.119189</td>\n",
       "      <td>0.492453</td>\n",
       "      <td>0.069747</td>\n",
       "      <td>-0.783184</td>\n",
       "      <td>0.785393</td>\n",
       "      <td>-0.025167</td>\n",
       "      <td>0.018348</td>\n",
       "      <td>-0.594667</td>\n",
       "      <td>-0.669811</td>\n",
       "      <td>-0.737063</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_3</th>\n",
       "      <td>-1.081812</td>\n",
       "      <td>0.052769</td>\n",
       "      <td>-0.188333</td>\n",
       "      <td>4.373316</td>\n",
       "      <td>1.957066</td>\n",
       "      <td>1.151244</td>\n",
       "      <td>-1.664828</td>\n",
       "      <td>-2.752620</td>\n",
       "      <td>-1.156565</td>\n",
       "      <td>1.481478</td>\n",
       "      <td>...</td>\n",
       "      <td>0.270531</td>\n",
       "      <td>-0.351045</td>\n",
       "      <td>0.257824</td>\n",
       "      <td>0.260153</td>\n",
       "      <td>0.359099</td>\n",
       "      <td>-0.283829</td>\n",
       "      <td>-0.434999</td>\n",
       "      <td>0.516117</td>\n",
       "      <td>0.249920</td>\n",
       "      <td>0.081426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_4</th>\n",
       "      <td>0.708829</td>\n",
       "      <td>5.461336</td>\n",
       "      <td>-5.595344</td>\n",
       "      <td>-0.014970</td>\n",
       "      <td>-2.635151</td>\n",
       "      <td>-2.701778</td>\n",
       "      <td>-1.691776</td>\n",
       "      <td>1.884805</td>\n",
       "      <td>1.448144</td>\n",
       "      <td>-0.616171</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.188771</td>\n",
       "      <td>-0.326973</td>\n",
       "      <td>0.095400</td>\n",
       "      <td>0.104225</td>\n",
       "      <td>-0.160484</td>\n",
       "      <td>-0.015747</td>\n",
       "      <td>0.357049</td>\n",
       "      <td>-0.097271</td>\n",
       "      <td>-0.473582</td>\n",
       "      <td>-0.193636</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2995</th>\n",
       "      <td>1.914200</td>\n",
       "      <td>4.198896</td>\n",
       "      <td>-4.603026</td>\n",
       "      <td>-0.448284</td>\n",
       "      <td>-0.278186</td>\n",
       "      <td>0.845025</td>\n",
       "      <td>-0.866848</td>\n",
       "      <td>-0.678358</td>\n",
       "      <td>-0.812415</td>\n",
       "      <td>-1.377833</td>\n",
       "      <td>...</td>\n",
       "      <td>0.043532</td>\n",
       "      <td>-0.048676</td>\n",
       "      <td>-0.040588</td>\n",
       "      <td>0.034951</td>\n",
       "      <td>0.008944</td>\n",
       "      <td>-0.007064</td>\n",
       "      <td>0.011344</td>\n",
       "      <td>-0.032774</td>\n",
       "      <td>0.072806</td>\n",
       "      <td>0.039972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2996</th>\n",
       "      <td>7.832729</td>\n",
       "      <td>-4.125685</td>\n",
       "      <td>1.358982</td>\n",
       "      <td>-0.804107</td>\n",
       "      <td>0.126665</td>\n",
       "      <td>-0.831557</td>\n",
       "      <td>0.068182</td>\n",
       "      <td>0.456288</td>\n",
       "      <td>0.620467</td>\n",
       "      <td>0.230441</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.043833</td>\n",
       "      <td>0.147033</td>\n",
       "      <td>0.147205</td>\n",
       "      <td>-0.174209</td>\n",
       "      <td>-0.162566</td>\n",
       "      <td>-0.182836</td>\n",
       "      <td>0.120269</td>\n",
       "      <td>-0.270309</td>\n",
       "      <td>-0.090341</td>\n",
       "      <td>0.089590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2997</th>\n",
       "      <td>5.505002</td>\n",
       "      <td>-1.650820</td>\n",
       "      <td>1.268998</td>\n",
       "      <td>-0.034423</td>\n",
       "      <td>0.044158</td>\n",
       "      <td>0.295871</td>\n",
       "      <td>-0.987080</td>\n",
       "      <td>0.321371</td>\n",
       "      <td>-0.363875</td>\n",
       "      <td>-0.699090</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.001711</td>\n",
       "      <td>0.007770</td>\n",
       "      <td>-0.018306</td>\n",
       "      <td>-0.009216</td>\n",
       "      <td>-0.010985</td>\n",
       "      <td>-0.013905</td>\n",
       "      <td>0.027204</td>\n",
       "      <td>0.006671</td>\n",
       "      <td>-0.002955</td>\n",
       "      <td>0.010073</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2998</th>\n",
       "      <td>0.496271</td>\n",
       "      <td>4.191075</td>\n",
       "      <td>-7.088961</td>\n",
       "      <td>-3.057764</td>\n",
       "      <td>-0.799768</td>\n",
       "      <td>-0.415481</td>\n",
       "      <td>-0.815531</td>\n",
       "      <td>0.768218</td>\n",
       "      <td>-0.444590</td>\n",
       "      <td>-0.206377</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.033995</td>\n",
       "      <td>-0.004664</td>\n",
       "      <td>0.076158</td>\n",
       "      <td>0.083602</td>\n",
       "      <td>-0.030803</td>\n",
       "      <td>0.131021</td>\n",
       "      <td>0.160362</td>\n",
       "      <td>-0.103967</td>\n",
       "      <td>0.143163</td>\n",
       "      <td>-0.033488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pca_2999</th>\n",
       "      <td>-5.582205</td>\n",
       "      <td>-4.293712</td>\n",
       "      <td>-0.843856</td>\n",
       "      <td>-4.898518</td>\n",
       "      <td>0.611782</td>\n",
       "      <td>-0.134582</td>\n",
       "      <td>0.182743</td>\n",
       "      <td>-0.173399</td>\n",
       "      <td>0.256055</td>\n",
       "      <td>-1.114440</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.013051</td>\n",
       "      <td>-0.061565</td>\n",
       "      <td>0.023300</td>\n",
       "      <td>-0.016409</td>\n",
       "      <td>-0.048254</td>\n",
       "      <td>0.135349</td>\n",
       "      <td>-0.009455</td>\n",
       "      <td>0.065485</td>\n",
       "      <td>0.091954</td>\n",
       "      <td>-0.062281</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3000 rows × 500 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               PC0       PC1       PC2       PC3       PC4       PC5  \\\n",
       "pca_0    -4.528443 -2.764500  0.204084  2.204027 -5.546653 -3.119863   \n",
       "pca_1    -1.184810  5.465216  2.296548 -3.239223 -0.204140 -1.637721   \n",
       "pca_2    -3.743690 -2.714610  0.045413  2.645315 -4.551899  1.051311   \n",
       "pca_3    -1.081812  0.052769 -0.188333  4.373316  1.957066  1.151244   \n",
       "pca_4     0.708829  5.461336 -5.595344 -0.014970 -2.635151 -2.701778   \n",
       "...            ...       ...       ...       ...       ...       ...   \n",
       "pca_2995  1.914200  4.198896 -4.603026 -0.448284 -0.278186  0.845025   \n",
       "pca_2996  7.832729 -4.125685  1.358982 -0.804107  0.126665 -0.831557   \n",
       "pca_2997  5.505002 -1.650820  1.268998 -0.034423  0.044158  0.295871   \n",
       "pca_2998  0.496271  4.191075 -7.088961 -3.057764 -0.799768 -0.415481   \n",
       "pca_2999 -5.582205 -4.293712 -0.843856 -4.898518  0.611782 -0.134582   \n",
       "\n",
       "               PC6       PC7       PC8       PC9  ...     PC490     PC491  \\\n",
       "pca_0    -3.238143  3.798974 -0.430050  1.425821  ... -0.013971  0.044268   \n",
       "pca_1     1.593285 -0.189306  1.432100  0.519004  ...  0.016356 -0.009680   \n",
       "pca_2     0.167849 -4.500321  1.454891 -1.626905  ...  0.119189  0.492453   \n",
       "pca_3    -1.664828 -2.752620 -1.156565  1.481478  ...  0.270531 -0.351045   \n",
       "pca_4    -1.691776  1.884805  1.448144 -0.616171  ... -0.188771 -0.326973   \n",
       "...            ...       ...       ...       ...  ...       ...       ...   \n",
       "pca_2995 -0.866848 -0.678358 -0.812415 -1.377833  ...  0.043532 -0.048676   \n",
       "pca_2996  0.068182  0.456288  0.620467  0.230441  ... -0.043833  0.147033   \n",
       "pca_2997 -0.987080  0.321371 -0.363875 -0.699090  ... -0.001711  0.007770   \n",
       "pca_2998 -0.815531  0.768218 -0.444590 -0.206377  ... -0.033995 -0.004664   \n",
       "pca_2999  0.182743 -0.173399  0.256055 -1.114440  ... -0.013051 -0.061565   \n",
       "\n",
       "             PC492     PC493     PC494     PC495     PC496     PC497  \\\n",
       "pca_0    -0.025512  0.056957  0.080754 -0.041736  0.020152 -0.029999   \n",
       "pca_1    -0.004169  0.007520 -0.014184  0.003578  0.003657  0.011310   \n",
       "pca_2     0.069747 -0.783184  0.785393 -0.025167  0.018348 -0.594667   \n",
       "pca_3     0.257824  0.260153  0.359099 -0.283829 -0.434999  0.516117   \n",
       "pca_4     0.095400  0.104225 -0.160484 -0.015747  0.357049 -0.097271   \n",
       "...            ...       ...       ...       ...       ...       ...   \n",
       "pca_2995 -0.040588  0.034951  0.008944 -0.007064  0.011344 -0.032774   \n",
       "pca_2996  0.147205 -0.174209 -0.162566 -0.182836  0.120269 -0.270309   \n",
       "pca_2997 -0.018306 -0.009216 -0.010985 -0.013905  0.027204  0.006671   \n",
       "pca_2998  0.076158  0.083602 -0.030803  0.131021  0.160362 -0.103967   \n",
       "pca_2999  0.023300 -0.016409 -0.048254  0.135349 -0.009455  0.065485   \n",
       "\n",
       "             PC498     PC499  \n",
       "pca_0    -0.021017 -0.013270  \n",
       "pca_1    -0.007444 -0.016586  \n",
       "pca_2    -0.669811 -0.737063  \n",
       "pca_3     0.249920  0.081426  \n",
       "pca_4    -0.473582 -0.193636  \n",
       "...            ...       ...  \n",
       "pca_2995  0.072806  0.039972  \n",
       "pca_2996 -0.090341  0.089590  \n",
       "pca_2997 -0.002955  0.010073  \n",
       "pca_2998  0.143163 -0.033488  \n",
       "pca_2999  0.091954 -0.062281  \n",
       "\n",
       "[3000 rows x 500 columns]"
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca_df = pd.DataFrame(data=pca, index=[f\"pca_{i}\" for i in range(3000)], columns=[f\"PC{i}\" for i in range(500)], dtype=float)\n",
    "pca_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "adata_ori = sc.read_h5ad('PATH/integrated_no142_pc500.h5ad')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.7576424 ,  3.6522322 ,  0.1599261 , ..., -0.02995351,\n",
       "        -0.28867042,  0.1329108 ],\n",
       "       [ 0.8895304 ,  3.5486174 , -0.6535454 , ..., -0.22012334,\n",
       "         0.01136652,  0.08548696],\n",
       "       [ 0.8094026 ,  3.5737336 , -0.41581962, ...,  0.12190228,\n",
       "         0.3234079 ,  0.49335164],\n",
       "       ...,\n",
       "       [-0.47301754,  1.5433978 ,  2.0638123 , ...,  0.31233504,\n",
       "        -0.4480625 ,  0.35856634],\n",
       "       [ 8.0414715 , -1.8221056 ,  0.2796873 , ..., -0.12187874,\n",
       "         0.19143249, -0.11392555],\n",
       "       [-0.52842337,  2.8675208 ,  1.5929552 , ..., -0.44244486,\n",
       "        -0.46519822, -0.03274676]], dtype=float32)"
      ]
     },
     "execution_count": 90,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata.obsm['X_pca']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.7576424 ,  3.6522322 ,  0.1599261 , ..., -0.02995351,\n",
       "        -0.28867042,  0.1329108 ],\n",
       "       [ 0.8895304 ,  3.5486174 , -0.6535454 , ..., -0.22012334,\n",
       "         0.01136652,  0.08548696],\n",
       "       [ 0.8094026 ,  3.5737336 , -0.41581962, ...,  0.12190228,\n",
       "         0.3234079 ,  0.49335164],\n",
       "       ...,\n",
       "       [-0.47301754,  1.5433978 ,  2.0638123 , ...,  0.31233504,\n",
       "        -0.4480625 ,  0.35856634],\n",
       "       [ 8.0414715 , -1.8221056 ,  0.2796873 , ..., -0.12187874,\n",
       "         0.19143249, -0.11392555],\n",
       "       [-0.52842337,  2.8675208 ,  1.5929552 , ..., -0.44244486,\n",
       "        -0.46519822, -0.03274676]], dtype=float32)"
      ]
     },
     "execution_count": 91,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adata_ori.obsm['X_pca']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
